Hadoop 我正在尝试将文件中的所有数字相加,该文件包含以空格分隔的数字,并使用MapReduce包含在多行中
我的输出出错了。输入文件为: 1234Hadoop 我正在尝试将文件中的所有数字相加,该文件包含以空格分隔的数字,并使用MapReduce包含在多行中,hadoop,mapreduce,Hadoop,Mapreduce,我的输出出错了。输入文件为: 1234 5 4 3 2 输出应为键:和值:24 MapReduce生成的输出:键:和值:34 我使用Ubuntu14.04中的OpenJDK 7来运行jar文件,而jar文件是在EclipseJuna中创建的,使用的java版本是OracleJDK 7来编译它。 NumberDriver.java 包装编号sum import java.io.*; //import java.util.StringTokenizer; import org.apache.had
5 4 3 2 输出应为键:和值:24 MapReduce生成的输出:键:和值:34 我使用Ubuntu14.04中的OpenJDK 7来运行jar文件,而jar文件是在EclipseJuna中创建的,使用的java版本是OracleJDK 7来编译它。 NumberDriver.java 包装编号sum
import java.io.*;
//import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class NumberDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
String[] otherArgs=new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!=2)
{
System.err.println("Error");
System.exit(2);
}
Job job=new Job(conf, "number sum");
job.setJarByClass(NumberDriver.class);
job.setMapperClass(NumberMapper.class);
job.setReducerClass(NumberReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
NumberMapper.java
package numbersum;
import java.io.*;
import java.util.StringTokenizer;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;
//import org.hsqldb.Tokenizer;
public class NumberMapper extends Mapper <LongWritable, Text, Text, IntWritable>
{
int sum;
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
StringTokenizer itr=new StringTokenizer(value.toString());
while(itr.hasMoreTokens())
{
sum+=Integer.parseInt(itr.nextToken());
}
context.write(new Text("sum"),new IntWritable(sum));
}
}
package numbersum;
导入java.io.*;
导入java.util.StringTokenizer;
//导入org.apache.hadoop.conf.Configuration;
//导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
//导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
//导入org.apache.hadoop.mapreduce.Reducer;
//导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//导入org.apache.hadoop.util.GenericOptionsParser;
//导入org.hsqldb.Tokenizer;
公共类NumberMapper扩展映射器
{
整数和;
公共void映射(LongWritable键、文本值、上下文上下文)引发IOException、InterruptedException
{
StringTokenizer itr=新的StringTokenizer(value.toString());
而(itr.hasMoreTokens())
{
sum+=Integer.parseInt(itr.nextToken());
}
编写(新文本(“sum”)、新可写(sum));
}
}
NumberReducer.java
package numbersum;
import java.io.*;
//import java.util.StringTokenizer;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;
public class NumberReducer extends Reducer <Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key,Iterable<IntWritable> values, Context context)throws IOException, InterruptedException
{
int sum=0;
for(IntWritable value:values)
{
sum+=value.get();
}
context.write(key,new IntWritable(sum));
}
}
package numbersum;
导入java.io.*;
//导入java.util.StringTokenizer;
//导入org.apache.hadoop.conf.Configuration;
//导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
//导入org.apache.hadoop.mapreduce.Job;
//导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
//导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//导入org.apache.hadoop.util.GenericOptionsParser;
公共类编号减速机扩展减速机
{
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException
{
整数和=0;
for(可写入值:值)
{
sum+=value.get();
}
write(key,newintwriteable(sum));
}
}
我想你忘了在map
函数的开头将sum
设置为0
:
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
sum = 0;
...
我想您忘记了在
map
函数开始时将sum
设置为0
:
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
sum = 0;
...
我最好的猜测是:
int sum; // <-- Why a class member?
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
int sum = 0; //Why not here?
StringTokenizer itr=new StringTokenizer(value.toString());
int-sum;// 我最好的猜测是:
int sum; // <-- Why a class member?
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
int sum = 0; //Why not here?
StringTokenizer itr=new StringTokenizer(value.toString());
int-sum;//没有实例变量默认为零值。没有实例变量默认为零值。非常感谢。。。成功了。将其声明为类成员不会刷新其值。将其作为答案接受@AnishGuptathanks很多。。。成功了。将其声明为类成员不会刷新其值。请将其作为答案@AnishGupta接受