Java hadoop中的链接作业:类型不匹配
我想映射->减少->映射->减少 以下是我想做的: 我有这个输入tsv文件:Java hadoop中的链接作业:类型不匹配,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我想映射->减少->映射->减少 以下是我想做的: 我有这个输入tsv文件: 1 2 2 1 2 3 3 2 4 2 4 3 在我的第一份地图/缩小工作之后,我有了这个 1 0 2 -1 3 -1 4 2 在我的第二个map/reduce作业之后,我有了这个(输出文件) 除了我的代码编译,但对于第二个作业,我有这个错误 Error: java.io.IOException: Type mismatch in value from map: expected
1 2
2 1
2 3
3 2
4 2
4 3
在我的第一份地图/缩小工作之后,我有了这个
1 0
2 -1
3 -1
4 2
在我的第二个map/reduce作业之后,我有了这个(输出文件)
除了我的代码编译,但对于第二个作业,我有这个错误
Error: java.io.IOException: Type mismatch in value from map: expected org.apache.hadoop.io.IntWritable, received org.apache.hadoop.io.Text
我不明白,因为我没有向我的第二份工作发送值文本以下是我的完整代码:
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.util.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class problem {
public static class DiffMapper extends Mapper<Object, Text, Text, IntWritable> {
Text key = new Text();
private final static IntWritable one = new IntWritable(1);
private final static IntWritable minus = new IntWritable(-1);
public void map(Object offset, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(), "\t");
while (itr.hasMoreTokens()) {
if(itr.countTokens() % 2 == 0) {
key.set(itr.nextElement().toString());
context.write(key, one);
}
else {
key.set(itr.nextElement().toString());
context.write(key, minus);
}
}
}
}
public static class DiffReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static class CountMapper extends Mapper<Text, IntWritable, IntWritable, IntWritable> {
IntWritable key2 = new IntWritable();
private final static IntWritable one = new IntWritable(1);
public void mapCount(Text offset, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(), "\t");
while (itr.hasMoreElements()) {
String node = itr.nextElement().toString();
Integer diff = Integer.parseInt(itr.nextElement().toString());
key2.set(diff);
context.write(key2, one);
}
}
}
public static class CountReducer extends Reducer<IntWritable,IntWritable,LongWritable,IntWritable> {
private IntWritable result = new IntWritable();
public void reduceCount(LongWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf1 = new Configuration();
Job job = Job.getInstance(conf1, "problem");
job.setJarByClass(problem.class);
job.setMapperClass(DiffMapper.class);
job.setReducerClass(DiffReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
Path outputPath = new Path("Diff");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, outputPath);
outputPath.getFileSystem(conf1).delete(outputPath);
job.waitForCompletion(true);
//System.exit(job.waitForCompletion(true) ? 0 : 1);
Configuration conf2 = new Configuration();
Job job2 = Job.getInstance(conf2, "problem");
job2.setJarByClass(problem.class);
job2.setMapperClass(CountMapper.class);
job2.setReducerClass(CountReducer.class);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(IntWritable.class);
Path outputPath2 = new Path(args[1]);
FileInputFormat.addInputPath(job2, outputPath);
FileOutputFormat.setOutputPath(job2, new Path(args[1]));
outputPath2.getFileSystem(conf2).delete(outputPath2, true);
System.exit(job2.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.IOException;
导入java.util.StringTokenizer;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.io.*;
导入org.apache.hadoop.mapreduce.*;
导入org.apache.hadoop.util.*;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入java.io.IOException;
公共阶级问题{
公共静态类DiffMapper扩展了映射器{
文本键=新文本();
私有最终静态IntWritable one=新的IntWritable(1);
私有最终静态IntWritable减号=新的IntWritable(-1);
公共void映射(对象偏移量、文本值、上下文上下文)引发IOException、InterruptedException{
StringTokenizer itr=新的StringTokenizer(value.toString(),“\t”);
而(itr.hasMoreTokens()){
if(itr.countTokens()%2==0){
key.set(itr.nextElement().toString());
上下文。写(键,一);
}
否则{
key.set(itr.nextElement().toString());
context.write(键,减号);
}
}
}
}
公共静态类衍射器{
私有IntWritable结果=新的IntWritable();
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException{
整数和=0;
for(可写入值:值){
sum+=val.get();
}
结果集(总和);
编写(键、结果);
}
}
公共静态类CountMapper扩展映射器{
IntWritable key2=新的IntWritable();
私有最终静态IntWritable one=新的IntWritable(1);
公共void映射计数(文本偏移量、文本值、上下文上下文)引发IOException、InterruptedException{
StringTokenizer itr=新的StringTokenizer(value.toString(),“\t”);
while(itr.hasMoreElements()){
字符串节点=itr.nextElement().toString();
Integer diff=Integer.parseInt(itr.nextElement().toString());
键2.设置(差异);
编写(键2,1);
}
}
}
公共静态类CountReducer扩展了Reducer{
私有IntWritable结果=新的IntWritable();
public void reduceCount(LongWritable键、Iterable值、上下文)引发IOException、InterruptedException{
整数和=0;
for(可写入值:值){
sum+=val.get();
}
结果集(总和);
编写(键、结果);
}
}
公共静态void main(字符串[]args)引发异常{
Configuration conf1=新配置();
Job Job=Job.getInstance(conf1,“问题”);
job.setJarByClass(problem.class);
setMapperClass(DiffMapper.class);
job.setReducerClass(衍射器类);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
路径输出路径=新路径(“差异”);
addInputPath(作业,新路径(args[0]);
setOutputPath(作业,outputPath);
getFileSystem(conf1).delete(outputPath);
job.waitForCompletion(true);
//系统退出(作业等待完成(真)?0:1;
Configuration conf2=新配置();
Job job2=Job.getInstance(conf2,“问题”);
job2.setJarByClass(problem.class);
job2.setMapperClass(CountMapper.class);
job2.setReducerClass(CountReducer.class);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(IntWritable.class);
路径outputPath2=新路径(args[1]);
addInputPath(job2,outputPath);
setOutputPath(作业2,新路径(args[1]);
getFileSystem(conf2).delete(outputPath2,true);
系统退出(作业2.等待完成(真)?0:1;
}
}
默认情况下,所有映射程序都将使用TextInputFormat
。因此,键是可长写的,值是文本
您的错误是因为您将IntWritable设置为值
您的第二个映射器与第一个映射器没有什么不同,因此两个映射器的定义都需要是extends映射器
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.util.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class problem {
public static class DiffMapper extends Mapper<Object, Text, Text, IntWritable> {
Text key = new Text();
private final static IntWritable one = new IntWritable(1);
private final static IntWritable minus = new IntWritable(-1);
public void map(Object offset, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(), "\t");
while (itr.hasMoreTokens()) {
if(itr.countTokens() % 2 == 0) {
key.set(itr.nextElement().toString());
context.write(key, one);
}
else {
key.set(itr.nextElement().toString());
context.write(key, minus);
}
}
}
}
public static class DiffReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static class CountMapper extends Mapper<Text, IntWritable, IntWritable, IntWritable> {
IntWritable key2 = new IntWritable();
private final static IntWritable one = new IntWritable(1);
public void mapCount(Text offset, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(), "\t");
while (itr.hasMoreElements()) {
String node = itr.nextElement().toString();
Integer diff = Integer.parseInt(itr.nextElement().toString());
key2.set(diff);
context.write(key2, one);
}
}
}
public static class CountReducer extends Reducer<IntWritable,IntWritable,LongWritable,IntWritable> {
private IntWritable result = new IntWritable();
public void reduceCount(LongWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf1 = new Configuration();
Job job = Job.getInstance(conf1, "problem");
job.setJarByClass(problem.class);
job.setMapperClass(DiffMapper.class);
job.setReducerClass(DiffReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
Path outputPath = new Path("Diff");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, outputPath);
outputPath.getFileSystem(conf1).delete(outputPath);
job.waitForCompletion(true);
//System.exit(job.waitForCompletion(true) ? 0 : 1);
Configuration conf2 = new Configuration();
Job job2 = Job.getInstance(conf2, "problem");
job2.setJarByClass(problem.class);
job2.setMapperClass(CountMapper.class);
job2.setReducerClass(CountReducer.class);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(IntWritable.class);
Path outputPath2 = new Path(args[1]);
FileInputFormat.addInputPath(job2, outputPath);
FileOutputFormat.setOutputPath(job2, new Path(args[1]));
outputPath2.getFileSystem(conf2).delete(outputPath2, true);
System.exit(job2.waitForCompletion(true) ? 0 : 1);
}
}