Java 映射减少程序抛出异常IOException“；“来自映射的键中的类型不匹配”；_Java_Hadoop_Mapreduce

Java 映射减少程序抛出异常IOException“；“来自映射的键中的类型不匹配”；

java hadoop mapreduce

Java 映射减少程序抛出异常IOException“；“来自映射的键中的类型不匹配”；,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,您好，请在下面找到我的代码，这是抛出异常 package HadoopMapReduce; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.

您好，请在下面找到我的代码，这是抛出异常

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Text, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Text key, Text value, Reducer.Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Reducer.Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;
            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital count");
        job.setJarByClass(HospitalCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class,Text.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

请查看以下更新代码：

package HadoopMapReduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class HospitalsMapReduce {

    public static class TokenizerMapper
            extends Mapper<Object, Text, Text, Text> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        private Text val = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            System.out.println("This is Value " + value);

            String rec[] = value.toString().split(",");
            String disease=rec[0];
            String name = rec[1];
            String loc = rec[2];
            int budget = Integer.parseInt(rec[3]);
            int rating = Integer.parseInt(rec[4]);
            String val1=1+","+name+","+budget+","+rating;



            if (loc.equalsIgnoreCase("Pune")) {
                word.set(disease);
                val.set(val1);
                context.write(word, val);
            }

        }
    }

    public static class IntSumReducer
            extends Reducer<Text, Iterator<Text>, Text, Text> {

        private Text result = new Text();

        public void reduce(Text key, Iterator<Text> values,
                Context context
        ) throws IOException, InterruptedException {


            int sum = 0;
            int budget=0;
            float avgBudget=0;

             System.out.println("This is Reducer Jobs");

            while(values.hasNext())
             {
                  String value[]=values.next().toString().split(",");
                   System.out.println("This is Value " + value);
                  sum=sum+Integer.parseInt(value[0]);
                  budget=budget+ Integer.parseInt(value[2]);                 
             }

            avgBudget=budget/sum;

            result.set(sum+" "+avgBudget);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://127.0.0.1:9000");
        FileSystem hdfs = FileSystem.get(conf);
        Path output = new Path("/test/output2/");
        if (hdfs.exists(output)) {
            hdfs.delete(output, true);
        }
        Job job = Job.getInstance(conf, "Hospital_count");
        job.setJarByClass(HospitalsMapReduce.class);
        job.setMapperClass(TokenizerMapper.class);
        //job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, Text.class, IntWritable.class);

        FileInputFormat.addInputPath(job, new Path("/test/hospital/"));
        FileOutputFormat.setOutputPath(job, output);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

packagehadoopmapreduce；
导入java.io.IOException；
导入java.util.Iterator；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.fs.FileSystem；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.IntWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.mapreduce.Job；
导入org.apache.hadoop.mapreduce.Mapper；
导入org.apache.hadoop.mapreduce.Reducer；
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat；
导入org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat；
导入org.apache.hadoop.mapreduce.lib.output.MultipleOutputs；
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat；
公立医院{
公共静态类令牌映射器
扩展映射器{
私有最终静态IntWritable one=新的IntWritable（1）；
私有文本字=新文本（）；
私有文本val=新文本（）；
公共void映射（对象键、文本值、上下文上下文）引发IOException、InterruptedException{
System.out.println（“这是值”+值）；
字符串rec[]=value.toString（）.split（“，”）；
字符串disease=rec[0]；
字符串名称=rec[1]；
字符串loc=rec[2]；
int budget=Integer.parseInt（rec[3]）；
int rating=Integer.parseInt（rec[4]）；
字符串val1=1+“，“+name+”，“+budget+”，“+rating；
if（位置等信号情况（“Pune”））{
单词集（疾病）；
val.set（val1）；
上下文。写（单词，val）；
}
}
}
公共静态类IntSumReducer
伸缩减速机{
私有文本结果=新文本（）；
public void reduce（文本键、迭代器值、，
语境
)抛出IOException、InterruptedException{
整数和=0；
int预算=0；
float avgBudget=0；
System.out.println（“这是减速机作业”）；
while（values.hasNext（））
{
字符串值[]=values.next（）.toString（）.split（“，”）；
System.out.println（“这是值”+值）；
sum=sum+Integer.parseInt（值[0]）；
预算=预算+整数.parseInt（值[2]）；
}
avgBudget=预算/总额；
结果集（总和+“”+avgBudget）；
编写（键、结果）；
}
}
公共静态void main（字符串[]args）引发异常{
Configuration conf=新配置（）；
conf.set（“fs.defaultFS”hdfs://127.0.0.1:9000");
FileSystem hdfs=FileSystem.get（conf）；
路径输出=新路径（“/test/output2/”）；
如果（hdfs.存在（输出））{
hdfs.delete（输出，true）；
}
Job Job=Job.getInstance（conf，“医院计数”）；
job.setJarByClass（HospitalsMapReduce.class）；
setMapperClass（TokenizerMapper.class）；
//job.setCombinerClass（IntSumReducer.class）；
job.setReducerClass（IntSumReducer.class）；
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（Text.class）；
setOutputFormatClass（作业，TextOutputFormat.class）；
MultipleOutputs.addNamedOutput（作业“text”、TextOutputFormat.class、text.class、IntWritable.class）；
addInputPath（作业，新路径（“/test/hospital/”）；
setOutputPath（作业，输出）；
系统退出（作业等待完成（真）？0:1；
}
}

但现在我的问题是，Reducer函数并没有得到执行。我的输出仅显示映射函数的输出

我将把你的问题总结为

我的键和值都是字符串（

Text

），但映射/还原框架认为我提供的是数字（

LongWritable

）

我同意源代码可能会让这变得不可能，因为所有映射器/还原器键和值都是

文本
因此，您可能希望查看应用程序jar文件的打包情况，以查看是否已将正确的版本发送到hadoop集群。否则，您的代码似乎无法以给定的异常结束。
看来是您的组合器导致了该问题。您已将减速机功能用作组合器。但是，map函数和combiner函数的输出格式不相同，这是不应该发生的。组合器在map函数的输出上被调用，是进一步组合器操作或reduce操作的输入。Reducer期望从到达它的数据中获得相同格式的键值对，无论它是否在组合器处理后出现
此外，从上面编写的代码中，我发现在组合器函数中查找平均值不是正确的做法。平均数永远不会是正确的
首先，删除合并器操作，因为它只是为了提高性能。一旦您知道您的代码在功能上运行良好，就可以引入它。
您的reducer定义应该如下所示：
public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
                Context context) throws IOException, InterruptedException {

     //your logic
    }

}

公共静态类IntSumReducer
伸缩减速机{
public void reduce（文本键、迭代器值、，
上下文）抛出IOException、InterruptedException{
//你的逻辑
}
}
使用Hadoop Iterable代替Java迭代器
按如下所示更改减速器定义和代码
public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
    System.out.println("This is Red Value ");
    int sum = 0;
    int budget = 0;
    float avgBudget = 0;
    System.out.println("This is Reducer Jobs");
    for (Text val : values) {
        String value[] = val.toString().split(",");
        System.out.println("This is Reduce Value " + value);
        sum = sum + Integer.parseInt(value[0]);
        budget = budget + Integer.parseInt(value[2]);
    }
    avgBudget = budget / sum;
    result.set(sum + " " + avgBudget);
    context.write(key, result);
}}

公共静态类IntSumReducer扩展了Reducer{
私有文本结果=新文本（）；
公共void reduce（文本键、Iterable值、上下文）
抛出IOException、InterruptedException{
System.out.println（“这是红色值”）；
整数和=0；
int预算=0；
float avgBudget=0；
System.out.pri
public static class IntSumReducer
            extends Reducer<Text, Text, Text, Text> {

    public void reduce(Text key, Iterator<Text> values,
                Context context) throws IOException, InterruptedException {

     //your logic
    }

}

public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {
private Text result = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
    System.out.println("This is Red Value ");
    int sum = 0;
    int budget = 0;
    float avgBudget = 0;
    System.out.println("This is Reducer Jobs");
    for (Text val : values) {
        String value[] = val.toString().split(",");
        System.out.println("This is Reduce Value " + value);
        sum = sum + Integer.parseInt(value[0]);
        budget = budget + Integer.parseInt(value[2]);
    }
    avgBudget = budget / sum;
    result.set(sum + " " + avgBudget);
    context.write(key, result);
}}