Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Hadoop 为什么tupleWritable在传递给Recurder时变为空_Hadoop_Mapreduce_Reduce - Fatal编程技术网

Hadoop 为什么tupleWritable在传递给Recurder时变为空

Hadoop 为什么tupleWritable在传递给Recurder时变为空,hadoop,mapreduce,reduce,Hadoop,Mapreduce,Reduce,我有一个映射(对象键、文本值、上下文),将一个tupleWritable放在Context.write()上下文中。在reduce(文本键、Iterable值、上下文)中,我读取了tupleWritable,但它是空的。 下面是我的代码。这让我很困惑,任何帮助都将不胜感激 package boc.competition.team1; import java.io.IOException; import java.util.HashMap; import org.apache.hadoop.i

我有一个映射(对象键、文本值、上下文),将一个tupleWritable放在Context.write()上下文中。在reduce(文本键、Iterable值、上下文)中,我读取了tupleWritable,但它是空的。 下面是我的代码。这让我很困惑,任何帮助都将不胜感激

package boc.competition.team1;

import java.io.IOException;
import java.util.HashMap;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.join.TupleWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;;

public class App 
{
    public static class SCSTransMap extends Mapper<Object,Text,Text,TupleWritable>{
        private Text name = new Text();

        @Override
        public void map(Object key,Text value,Context context) throws IOException,InterruptedException{
                IntWritable i = new IntWritable(1);
                TupleWritable result = new TupleWritable(new IntWritable[] { i, new IntWritable(3)});
                System.out.println(result.get(0)+"====="+result.get(1));
//------here print the right value  1=====3
                context.write(name, result);
            }
        }
    }
    public static class reducer extends Reducer<Text,TupleWritable,Text,Text>{
        @Override
        public void reduce(Text key,Iterable<TupleWritable> values,Context context) throws IOException,InterruptedException{

            for(TupleWritable tuple:values) {
                System.out.println(tuple.get(0)+"====="+tuple.get(1));
// and here print 0=======0
            }

        }
    }

    public static void main( String[] args ) throws Exception
    {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf,"team1Job");
        job.setJarByClass(App.class);
        job.setReducerClass(reducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(TupleWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path("C:\\Program Files\\PuTTY\\data\\scs\\Scs_Journal.csv"), TextInputFormat.class,SCSTransMap.class);
        FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));

        System.exit(job.waitForCompletion(true)?0:1);
    }
}
包boc.competition.team1;
导入java.io.IOException;
导入java.util.HashMap;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.io.writeable;
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
导入org.apache.hadoop.mapreduce.lib.join.TupleWritable;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;;
公共类应用程序
{
公共静态类SCSTransMap扩展映射器{
私有文本名称=新文本();
@凌驾
公共void映射(对象键、文本值、上下文上下文)引发IOException、InterruptedException{
IntWritable i=新的IntWritable(1);
TupleWritable结果=新的TupleWritable(新的IntWritable[]{i,新的IntWritable(3)});
System.out.println(result.get(0)+“==”+result.get(1));
//------这里打印正确的值1=3
context.write(名称、结果);
}
}
}
公共静态类reducer扩展了reducer{
@凌驾
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException{
for(TupleWritable元组:值){
System.out.println(tuple.get(0)+“==”+tuple.get(1));
//这里打印0=0
}
}
}
公共静态void main(字符串[]args)引发异常
{
Configuration conf=新配置();
Job Job=Job.getInstance(conf,“team1Job”);
job.setJarByClass(App.class);
job.setReducerClass(reducer.class);
job.setMapOutputKeyClass(Text.class);
setMapOutputValueClass(TupleWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(作业,新路径(“C:\\Program Files\\PuTTY\\data\\scs\\scs\U Journal.csv”)、TextInputFormat.class、SCSTransMap.class);
setOutputPath(作业,新路径(OUT_路径));
系统退出(作业等待完成(真)?0:1;
}
}

我使用用户定义的可写类而不是tupleWritable类将值从映射传递到reduce 这是用户定义的可写表

package boc.competition.team1;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;

public class IntPairWritable implements Writable {
        private IntWritable value1;
        private IntWritable value2;

        public IntPairWritable() {
            value1 = new IntWritable();
            value2 = new IntWritable();
        }

        public IntPairWritable(int value1, int value2) {
            this.value1 = new IntWritable(value1);
            this.value2 = new IntWritable(value2);
        }

        public int getInt1() {
            return value1.get();
        }

        public int getInt2() {
            return value2.get();
        }

        @Override
        public String toString() {
            return value1.toString()+" "+value2.toString();
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            value1.readFields(in);
            value2.readFields(in);
        }

        @Override
        public void write(DataOutput out) throws IOException {
            value1.write(out);
            value2.write(out);
        }
}

根据
TupleWritable.java
源文件:

*这不是通用元组类型。在几乎所有情况下,用户都是
*鼓励实现自己的可序列化类型,该类型可以执行
*比这个类更好的验证和提供更有效的编码
*能干的。TupleWritable依赖于连接框架来实现类型安全和
*假设其实例很少被持久化,而不仅仅是假设
*与一般情况不相容,但与一般情况相反。
另请参见Chris Douglas-3的答案:

您需要访问TupleWritable::setWrite(int)。如果您想使用
TupleWritable位于join包之外,则需要将其设置为
(可能还有相关的方法,比如ClearWrite(int))public和
重新编译


可以肯定地说,
TupleWritable
并不是一个用于MapReduce作业的公共类。

@Ben Watson,还有其他代码与变量“name”有关。它对这个问题不太重要,所以我没有发布它们。抱歉,我无意删除我的评论。谢谢你的信息。