Hadoop 使用outputcollector的mapreduce字数示例
我正在尝试使用outputcollector运行基本wordcount mapreduce示例,但遇到异常 信息mapreduce.Job:Job Job_local1048833344_0001失败,状态为失败,原因是:NA java.lang.Exception:java.io.IOException:map中的键类型不匹配:预期为org.apache.hadoop.io.Text,收到org.apache.hadoop.io.LongWritable 下面是我试图运行的代码:Hadoop 使用outputcollector的mapreduce字数示例,hadoop,mapreduce,output,word-count,collectors,Hadoop,Mapreduce,Output,Word Count,Collectors,我正在尝试使用outputcollector运行基本wordcount mapreduce示例,但遇到异常 信息mapreduce.Job:Job Job_local1048833344_0001失败,状态为失败,原因是:NA java.lang.Exception:java.io.IOException:map中的键类型不匹配:预期为org.apache.hadoop.io.Text,收到org.apache.hadoop.io.LongWritable 下面是我试图运行的代码: import
import java.io.*;
import java.util.StringTokenizer;
import java.util.Iterator;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCountOutputCollector {
public static class WordCountOutputCollectorMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
public static class WordCountOutputCollectorReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count outputcollector");
job.setJarByClass(WordCountOutputCollector.class);
job.setMapperClass(WordCountOutputCollectorMapper.class);
job.setCombinerClass(WordCountOutputCollectorReducer.class);
job.setReducerClass(WordCountOutputCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//conf.setInputFormat(TextInputFormat.class);
//conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//JobClient.runJob(conf);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.*;
导入java.util.StringTokenizer;
导入java.util.Iterator;
导入org.apache.hadoop.io.*;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapred.OutputCollector;
导入org.apache.hadoop.io.ObjectWritable;
导入org.apache.hadoop.mapred.Reporter;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.util.GenericOptionsParser;
公共类WordCountOutputCollector{
公共静态类WordCountOutputCollectorMapper扩展映射器{
私有最终静态IntWritable one=新的IntWritable(1);
私有文本字=新文本();
公共void映射(LongWritable键、文本值、OutputCollector输出、Reporter报告器)引发IOException{
字符串行=value.toString();
StringTokenizer标记器=新的StringTokenizer(行);
while(tokenizer.hasMoreTokens()){
set(tokenizer.nextToken());
输出。收集(字,一);
}
}
}
公共静态类WordCountOutputCollectorReducer扩展{
公共void reduce(文本键、迭代器值、OutputCollector输出、Reporter报告器)引发IOException{
整数和=0;
while(values.hasNext()){
sum+=values.next().get();
}
collect(key,newintwriteable(sum));
}
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
String[]otherArgs=新的GenericOptionsParser(conf,args);
if(otherArgs.length!=2){
System.err.println(“用法:wordcount”);
系统出口(2);
}
Job Job=新作业(conf,“字数输出收集器”);
job.setJarByClass(WordCountOutputCollector.class);
setMapperClass(WordCountOutputCollectorMapper.class);
job.setCombinerClass(WordCountOutputCollectorReducer.class);
setReducerClass(WordCountOutputCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//conf.setInputFormat(TextInputFormat.class);
//conf.setOutputFormat(TextOutputFormat.class);
setInputPaths(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
//runJob(conf);
系统退出(作业等待完成(真)?0:1;
}
}
试试这个:
import java.io.IOException;
导入java.io.PrintStream;
导入java.util.StringTokenizer;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Mapper.Context;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.Reducer.Context;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.util.GenericOptionsParser;
公共类字数
{
公共静态类映射
扩展映射器
{
私有静态最终IntWritable one=新的IntWritable(1);
私有文本字=新文本();
公共void映射(LongWritable参数LongWritable、Text参数Text、Mapper.Context参数Mapper)
抛出IOException、InterruptedException
{
StringTokenizer localStringTokenizer=新的StringTokenizer(paramText.toString());
while(localStringTokenizer.hasMoreTokens())
{
this.word.set(localStringTokenizer.nextToken());
paramMapper.write(this.word,one);
}
}
}
公共静态类减少
伸缩减速机
{
私有IntWritable结果=新的IntWritable();
public void reduce(Text-paramText、Iterable-paramIterable、Reducer.Context-paramReducer)
抛出IOException、InterruptedException
{
int i=0;
for(IntWritable LocalinWritable:paramIterable){
i+=localIntWritable.get();
}
本结果集(i);
write(paramText,this.result);
}
}
公共静态void main(字符串[]paramArrayOfString)
抛出异常
{
配置localConfiguration=新配置();
String[]arrayOfString=新的GenericOptionsParser(localConfiguration,paramArrayOfString);
如果(arrayOfString.length!=2)
{
System.err.println(“用法:WordCount”);
系统出口(2);
}
Job localJob=新作业(localConfiguration,“wordcount”);
setJarByClass(WordCount.class);
setMapperClass(WordCount.Map.class);
setReducerClass(WordCount.Reduce.class);
setCombinerClass(WordCount.Reduce.class);
setOutputKeyClass(Text.class);
localJob.setOutputValueClass(IntWritable.class);
文件输入
import java.io.IOException;
import java.io.PrintStream;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount
{
public static class Map
extends Mapper<LongWritable, Text, Text, IntWritable>
{
private static final IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable paramLongWritable, Text paramText, Mapper<LongWritable, Text, Text, IntWritable>.Context paramMapper)
throws IOException, InterruptedException
{
StringTokenizer localStringTokenizer = new StringTokenizer(paramText.toString());
while (localStringTokenizer.hasMoreTokens())
{
this.word.set(localStringTokenizer.nextToken());
paramMapper.write(this.word, one);
}
}
}
public static class Reduce
extends Reducer<Text, IntWritable, Text, IntWritable>
{
private IntWritable result = new IntWritable();
public void reduce(Text paramText, Iterable<IntWritable> paramIterable, Reducer<Text, IntWritable, Text, IntWritable>.Context paramReducer)
throws IOException, InterruptedException
{
int i = 0;
for (IntWritable localIntWritable : paramIterable) {
i += localIntWritable.get();
}
this.result.set(i);
paramReducer.write(paramText, this.result);
}
}
public static void main(String[] paramArrayOfString)
throws Exception
{
Configuration localConfiguration = new Configuration();
String[] arrayOfString = new GenericOptionsParser(localConfiguration, paramArrayOfString).getRemainingArgs();
if (arrayOfString.length != 2)
{
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}
Job localJob = new Job(localConfiguration, "wordcount");
localJob.setJarByClass(WordCount.class);
localJob.setMapperClass(WordCount.Map.class);
localJob.setReducerClass(WordCount.Reduce.class);
localJob.setCombinerClass(WordCount.Reduce.class);
localJob.setOutputKeyClass(Text.class);
localJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(localJob, new Path(arrayOfString[0]));
FileOutputFormat.setOutputPath(localJob, new Path(arrayOfString[1]));
System.exit(localJob.waitForCompletion(true) ? 0 : 1);
}
}
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
JobClient.runJob(conf);