Java 映射减少程序抛出异常IOException“;“来自映射的键中的类型不匹配”;

Java 映射减少程序抛出异常IOException“;“来自映射的键中的类型不匹配”;,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,您好,请在下面找到我的代码,这是抛出异常 package HadoopMapReduce; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.

您好,请在下面找到我的代码,这是抛出异常

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Text, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Text key, Text value, Reducer.Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Reducer.Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;
            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital count");
        job.setJarByClass(HospitalCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class,Text.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}
请查看以下更新代码:

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Object, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Iterator<Text>, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;

             System.out.println("This is Reducer Jobs");

            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                   System.out.println("This is Value " + value);
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital_count");
        job.setJarByClass(HospitalsMapReduce.class);
        job.setMapperClass(TokenizerMapper.class);
        //job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class, IntWritable.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital/"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}
packagehadoopmapreduce;
导入java.io.IOException;
导入java.util.Iterator;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
公立医院{
公共静态类令牌映射器
扩展映射器{
私有最终静态IntWritable one=新的IntWritable(1);
私有文本字=新文本();
私有文本val=新文本();
公共void映射(对象键、文本值、上下文上下文)引发IOException、InterruptedException{
System.out.println(“这是值”+值);
字符串rec[]=value.toString().split(“,”);
字符串disease=rec[0];
字符串名称=rec[1];
字符串loc=rec[2];
int budget=Integer.parseInt(rec[3]);
int rating=Integer.parseInt(rec[4]);
字符串val1=1+“,“+name+”,“+budget+”,“+rating;
if(位置等信号情况(“Pune”)){
单词集(疾病);
val.set(val1);
上下文。写(单词,val);
}
}
}
公共静态类IntSumReducer
伸缩减速机{
私有文本结果=新文本();
public void reduce(文本键、迭代器值、,
语境
)抛出IOException、InterruptedException{
整数和=0;
int预算=0;
float avgBudget=0;
System.out.println(“这是减速机作业”);
while(values.hasNext())
{
字符串值[]=values.next().toString().split(“,”);
System.out.println(“这是值”+值);
sum=sum+Integer.parseInt(值[0]);
预算=预算+整数.parseInt(值[2]);
}
avgBudget=预算/总额;
结果集(总和+“”+avgBudget);
编写(键、结果);
}
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
conf.set(“fs.defaultFS”hdfs://127.0.0.1:9000");
FileSystem hdfs=FileSystem.get(conf);
路径输出=新路径(“/test/output2/”);
如果(hdfs.存在(输出)){
hdfs.delete(输出,true);
}
Job Job=Job.getInstance(conf,“医院计数”);
job.setJarByClass(HospitalsMapReduce.class);
setMapperClass(TokenizerMapper.class);
//job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
setOutputFormatClass(作业,TextOutputFormat.class);
MultipleOutputs.addNamedOutput(作业“text”、TextOutputFormat.class、text.class、IntWritable.class);
addInputPath(作业,新路径(“/test/hospital/”);
setOutputPath(作业,输出);
系统退出(作业等待完成(真)?0:1;
}
}

但现在我的问题是,Reducer函数并没有得到执行。我的输出仅显示映射函数的输出

我将把你的问题总结为

我的键和值都是字符串(
Text
),但映射/还原 框架认为我提供的是数字(
LongWritable

我同意源代码可能会让这变得不可能,因为所有映射器/还原器键和值都是
文本


因此,您可能希望查看应用程序jar文件的打包情况,以查看是否已将正确的版本发送到hadoop集群。否则,您的代码似乎无法以给定的异常结束。

看来是您的组合器导致了该问题。您已将减速机功能用作组合器。但是,map函数和combiner函数的输出格式不相同,这是不应该发生的。组合器在map函数的输出上被调用,是进一步组合器操作或reduce操作的输入。Reducer期望从到达它的数据中获得相同格式的键值对,无论它是否在组合器处理后出现

此外,从上面编写的代码中,我发现在组合器函数中查找平均值不是正确的做法。平均数永远不会是正确的


首先,删除合并器操作,因为它只是为了提高性能。一旦您知道您的代码在功能上运行良好,就可以引入它。

您的reducer定义应该如下所示:

public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
                Context context) throws IOException, InterruptedException {

     //your logic
    }

}
公共静态类IntSumReducer
伸缩减速机{
public void reduce(文本键、迭代器值、,
上下文)抛出IOException、InterruptedException{
//你的逻辑
}
}

使用Hadoop Iterable代替Java迭代器

按如下所示更改减速器定义和代码

public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
    System.out.println("This is Red Value ");
    int sum = 0;
    int budget = 0;
    float avgBudget = 0;
    System.out.println("This is Reducer Jobs");
    for (Text val : values) {
        String value[] = val.toString().split(",");
        System.out.println("This is Reduce Value " + value);
        sum = sum + Integer.parseInt(value[0]);
        budget = budget + Integer.parseInt(value[2]);
    }
    avgBudget = budget / sum;
    result.set(sum + " " + avgBudget);
    context.write(key, result);
}}
公共静态类IntSumReducer扩展了Reducer{
私有文本结果=新文本();
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException{
System.out.println(“这是红色值”);
整数和=0;
int预算=0;
float avgBudget=0;
System.out.pri
public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
                Context context) throws IOException, InterruptedException {

     //your logic
    }

}
public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
    System.out.println("This is Red Value ");
    int sum = 0;
    int budget = 0;
    float avgBudget = 0;
    System.out.println("This is Reducer Jobs");
    for (Text val : values) {
        String value[] = val.toString().split(",");
        System.out.println("This is Reduce Value " + value);
        sum = sum + Integer.parseInt(value[0]);
        budget = budget + Integer.parseInt(value[2]);
    }
    avgBudget = budget / sum;
    result.set(sum + " " + avgBudget);
    context.write(key, result);
}}