Hadoop 我正在尝试将文件中的所有数字相加,该文件包含以空格分隔的数字,并使用MapReduce包含在多行中

Hadoop 我正在尝试将文件中的所有数字相加,该文件包含以空格分隔的数字,并使用MapReduce包含在多行中,hadoop,mapreduce,Hadoop,Mapreduce,我的输出出错了。输入文件为: 1234 5 4 3 2 输出应为键:和值:24 MapReduce生成的输出:键:和值:34 我使用Ubuntu14.04中的OpenJDK 7来运行jar文件,而jar文件是在EclipseJuna中创建的,使用的java版本是OracleJDK 7来编译它。 NumberDriver.java 包装编号sum import java.io.*; //import java.util.StringTokenizer; import org.apache.had

我的输出出错了。输入文件为:

1234
5 4 3 2

输出应为键:和值:24

MapReduce生成的输出:键:和值:34

我使用Ubuntu14.04中的OpenJDK 7来运行jar文件,而jar文件是在EclipseJuna中创建的,使用的java版本是OracleJDK 7来编译它。 NumberDriver.java

包装编号sum

import java.io.*;
//import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class NumberDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // TODO Auto-generated method stub
            Configuration conf=new Configuration();
            String[] otherArgs=new GenericOptionsParser(conf,args).getRemainingArgs();
            if(otherArgs.length!=2)
            {
                System.err.println("Error");
                System.exit(2);
            }
            Job job=new Job(conf, "number sum");
            job.setJarByClass(NumberDriver.class);
            job.setMapperClass(NumberMapper.class);
            job.setReducerClass(NumberReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
            FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
            System.exit(job.waitForCompletion(true)?0:1);
    }

}
NumberMapper.java

package numbersum;
import java.io.*;
import java.util.StringTokenizer;

//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;
//import org.hsqldb.Tokenizer;

public class NumberMapper extends Mapper <LongWritable, Text, Text, IntWritable> 
    {
        int sum;
        public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
        {
            StringTokenizer itr=new StringTokenizer(value.toString());
            while(itr.hasMoreTokens())
            {
                sum+=Integer.parseInt(itr.nextToken());
            }
            context.write(new Text("sum"),new IntWritable(sum));
        }
    }
package numbersum;
导入java.io.*;
导入java.util.StringTokenizer;
//导入org.apache.hadoop.conf.Configuration;
//导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
//导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
//导入org.apache.hadoop.mapreduce.Reducer;
//导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//导入org.apache.hadoop.util.GenericOptionsParser;
//导入org.hsqldb.Tokenizer;
公共类NumberMapper扩展映射器
{
整数和;
公共void映射(LongWritable键、文本值、上下文上下文)引发IOException、InterruptedException
{
StringTokenizer itr=新的StringTokenizer(value.toString());
而(itr.hasMoreTokens())
{
sum+=Integer.parseInt(itr.nextToken());
}
编写(新文本(“sum”)、新可写(sum));
}
}
NumberReducer.java

package numbersum;
import java.io.*;
//import java.util.StringTokenizer;

//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;

public class NumberReducer extends Reducer <Text, IntWritable, Text, IntWritable>
    {
        public void reduce(Text key,Iterable<IntWritable> values, Context context)throws IOException, InterruptedException
        {
            int sum=0;
            for(IntWritable value:values)
                {
                    sum+=value.get();
                }
            context.write(key,new IntWritable(sum));
        }
    }
package numbersum;
导入java.io.*;
//导入java.util.StringTokenizer;
//导入org.apache.hadoop.conf.Configuration;
//导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
//导入org.apache.hadoop.mapreduce.Job;
//导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
//导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//导入org.apache.hadoop.util.GenericOptionsParser;
公共类编号减速机扩展减速机
{
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException
{
整数和=0;
for(可写入值:值)
{
sum+=value.get();
}
write(key,newintwriteable(sum));
}
}

我想你忘了在
map
函数的开头将
sum
设置为
0

public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
    sum = 0;
...

我想您忘记了在
map
函数开始时将
sum
设置为
0

public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
    sum = 0;
...
我最好的猜测是:

    int sum; // <-- Why a class member?
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
    {
        int sum = 0; //Why not here?
        StringTokenizer itr=new StringTokenizer(value.toString());
int-sum;// 我最好的猜测是:

    int sum; // <-- Why a class member?
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
    {
        int sum = 0; //Why not here?
        StringTokenizer itr=new StringTokenizer(value.toString());

int-sum;//没有实例变量默认为零值。没有实例变量默认为零值。非常感谢。。。成功了。将其声明为类成员不会刷新其值。将其作为答案接受@AnishGuptathanks很多。。。成功了。将其声明为类成员不会刷新其值。请将其作为答案@AnishGupta接受