Java Can'；t在mapper、MapReduce中访问hashmap_Java_Hadoop_Mapreduce

Java Can'；t在mapper、MapReduce中访问hashmap

java hadoop mapreduce

Java Can'；t在mapper、MapReduce中访问hashmap,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我想用另一个文件中定义的词汇表（csv）替换映射器中输入数据的值。因此，我尝试将csv数据放入HashMap，并在映射器中引用它下面的java代码和csv是我程序的简化版本。这段代码在我的本地环境中运行（Mac OS X，伪分布式模式），但在我的EC2实例中运行（ubuntu，伪分布式模式）详细地说，我在过程中得到了这个标准： cat:4 human:2 flamingo:1 这意味着filereader成功地将csv数据放入HashMap 然而，映射器没有映射任何内容，因此我在EC2环境

我想用另一个文件中定义的词汇表（csv）替换映射器中输入数据的值。因此，我尝试将csv数据放入HashMap，并在映射器中引用它

下面的java代码和csv是我程序的简化版本。这段代码在我的本地环境中运行（Mac OS X，伪分布式模式），但在我的EC2实例中运行（ubuntu，伪分布式模式）

详细地说，我在过程中得到了这个标准：

cat:4
human:2
flamingo:1

这意味着filereader成功地将csv数据放入HashMap

然而，映射器没有映射任何内容，因此我在EC2环境中得到了空输出，尽管它映射了3*（输入文件的行数）元素，并在本地生成了以下内容：

test,cat
test,flamingo
test,human

有人有答案或提示吗

Test.java

import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput; 
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;

public class Test {

  public static HashMap<String, Integer> map  = new HashMap<String, Integer>();

  public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      for(Map.Entry<String, Integer> e : map.entrySet()) {
        context.write(new Text(e.getKey()), new Text("test"));
      }
    }
  }

  public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
      context.write(new Text("test"), key);
    }
  }

  public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
      Configuration conf = job.getConfiguration();
      String extension = ".txt";
      Path file = getDefaultWorkFile(job, extension);
      FileSystem fs = file.getFileSystem(conf);
      FSDataOutputStream fileOut = fs.create(file, false);
      return new LineRecordWriter<Text, Text>(fileOut, ",");
    }
  }

  public static void get_list(String list_path){
    try {
      FileReader fr = new FileReader(list_path);
      BufferedReader br = new BufferedReader(fr);
      String line = null, name = null;
      int leg = 0;

      while ((line = br.readLine()) != null) {
        if (!line.startsWith("name") && !line.trim().isEmpty()) {
          String[] name_leg = line.split(",", 0);
          name = name_leg[0];
          leg = Integer.parseInt(name_leg[1]);
          map.put(name, leg);
        }
      }
      br.close();
    }
    catch(IOException ex) {
      System.err.println(ex.getMessage());
      ex.printStackTrace();
    }

    for(Map.Entry<String, Integer> e : map.entrySet()) {
      System.out.println(e.getKey() + ":" + e.getValue());
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length != 3) {
      System.err.println(
        "Need 3 arguments: <input dir> <output base dir> <list path>");
      System.exit(1);
    }

    get_list(args[2]);
    Job job = Job.getInstance(conf, "test");

    job.setJarByClass(Test.class);
    job.setMapperClass(Mapper1.class);
    job.setReducerClass(Reducer1.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);

    // mapper output
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // reducer output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // formtter
    job.setOutputFormatClass(CommaTextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if(!job.waitForCompletion(true)){
      System.exit(1);
    }

    System.out.println("All Finished");
    System.exit(0);
  }
}

import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput; 
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.net.URI;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;

public class Test {

  public static HashMap<String, Integer> map  = new HashMap<String, Integer>();

  public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      URI[] files = context.getCacheFiles();
      Path list_path = new Path(files[0]);

      try {
        FileSystem fs = list_path.getFileSystem(context.getConfiguration());
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(list_path)));
        String line = null, name = null;
        int leg = 0;

        while ((line = br.readLine()) != null) {
          if (!line.startsWith("name") && !line.trim().isEmpty()) {
            String[] name_leg = line.split(",", 0);
            name = name_leg[0];
            leg = Integer.parseInt(name_leg[1]);
            map.put(name, leg);
          }
        }
        br.close();
      }
      catch(IOException ex) {
        System.err.println(ex.getMessage());
        ex.printStackTrace();
      }

      for(Map.Entry<String, Integer> e : map.entrySet()) {
        System.out.println(e.getKey() + ":" + e.getValue());
      }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      for(Map.Entry<String, Integer> e : map.entrySet()) {
        context.write(new Text(e.getKey()), new Text("test"));
      }
    }

  }

  public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
      context.write(new Text("test"), key);
    }
  }

  // Writer
  public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
      Configuration conf = job.getConfiguration();
      String extension = ".txt";
      Path file = getDefaultWorkFile(job, extension);
      FileSystem fs = file.getFileSystem(conf);
      FSDataOutputStream fileOut = fs.create(file, false);
      return new LineRecordWriter<Text, Text>(fileOut, ",");
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length != 3) {
      System.err.println(
        "Need 3 arguments: <input dir> <output base dir> <list path>");
      System.exit(1);
    }

    Job job = Job.getInstance(conf, "test");
    job.addCacheFile(new Path(args[2]).toUri());

    job.setJarByClass(Test.class);
    job.setMapperClass(Mapper1.class);
    job.setReducerClass(Reducer1.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);

    // mapper output
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // reducer output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // formtter
    job.setOutputFormatClass(CommaTextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if(!job.waitForCompletion(true)){
      System.exit(1);
    }

    System.out.println("All Finished");
    System.exit(0);
  }
}

================================= 我参考@Rahul Sharma的答案，修改代码如下。然后我的代码在这两种环境中都能工作

非常感谢@Rahul Sharma和@Serhiy的精确回答和有用的评论

Test.java

import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput; 
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;

public class Test {

  public static HashMap<String, Integer> map  = new HashMap<String, Integer>();

  public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      for(Map.Entry<String, Integer> e : map.entrySet()) {
        context.write(new Text(e.getKey()), new Text("test"));
      }
    }
  }

  public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
      context.write(new Text("test"), key);
    }
  }

  public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
      Configuration conf = job.getConfiguration();
      String extension = ".txt";
      Path file = getDefaultWorkFile(job, extension);
      FileSystem fs = file.getFileSystem(conf);
      FSDataOutputStream fileOut = fs.create(file, false);
      return new LineRecordWriter<Text, Text>(fileOut, ",");
    }
  }

  public static void get_list(String list_path){
    try {
      FileReader fr = new FileReader(list_path);
      BufferedReader br = new BufferedReader(fr);
      String line = null, name = null;
      int leg = 0;

      while ((line = br.readLine()) != null) {
        if (!line.startsWith("name") && !line.trim().isEmpty()) {
          String[] name_leg = line.split(",", 0);
          name = name_leg[0];
          leg = Integer.parseInt(name_leg[1]);
          map.put(name, leg);
        }
      }
      br.close();
    }
    catch(IOException ex) {
      System.err.println(ex.getMessage());
      ex.printStackTrace();
    }

    for(Map.Entry<String, Integer> e : map.entrySet()) {
      System.out.println(e.getKey() + ":" + e.getValue());
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length != 3) {
      System.err.println(
        "Need 3 arguments: <input dir> <output base dir> <list path>");
      System.exit(1);
    }

    get_list(args[2]);
    Job job = Job.getInstance(conf, "test");

    job.setJarByClass(Test.class);
    job.setMapperClass(Mapper1.class);
    job.setReducerClass(Reducer1.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);

    // mapper output
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // reducer output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // formtter
    job.setOutputFormatClass(CommaTextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if(!job.waitForCompletion(true)){
      System.exit(1);
    }

    System.out.println("All Finished");
    System.exit(0);
  }
}

import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput; 
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.net.URI;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;

public class Test {

  public static HashMap<String, Integer> map  = new HashMap<String, Integer>();

  public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      URI[] files = context.getCacheFiles();
      Path list_path = new Path(files[0]);

      try {
        FileSystem fs = list_path.getFileSystem(context.getConfiguration());
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(list_path)));
        String line = null, name = null;
        int leg = 0;

        while ((line = br.readLine()) != null) {
          if (!line.startsWith("name") && !line.trim().isEmpty()) {
            String[] name_leg = line.split(",", 0);
            name = name_leg[0];
            leg = Integer.parseInt(name_leg[1]);
            map.put(name, leg);
          }
        }
        br.close();
      }
      catch(IOException ex) {
        System.err.println(ex.getMessage());
        ex.printStackTrace();
      }

      for(Map.Entry<String, Integer> e : map.entrySet()) {
        System.out.println(e.getKey() + ":" + e.getValue());
      }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      for(Map.Entry<String, Integer> e : map.entrySet()) {
        context.write(new Text(e.getKey()), new Text("test"));
      }
    }

  }

  public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
      context.write(new Text("test"), key);
    }
  }

  // Writer
  public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
    @Override
    public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
      Configuration conf = job.getConfiguration();
      String extension = ".txt";
      Path file = getDefaultWorkFile(job, extension);
      FileSystem fs = file.getFileSystem(conf);
      FSDataOutputStream fileOut = fs.create(file, false);
      return new LineRecordWriter<Text, Text>(fileOut, ",");
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length != 3) {
      System.err.println(
        "Need 3 arguments: <input dir> <output base dir> <list path>");
      System.exit(1);
    }

    Job job = Job.getInstance(conf, "test");
    job.addCacheFile(new Path(args[2]).toUri());

    job.setJarByClass(Test.class);
    job.setMapperClass(Mapper1.class);
    job.setReducerClass(Reducer1.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);

    // mapper output
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // reducer output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // formtter
    job.setOutputFormatClass(CommaTextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    if(!job.waitForCompletion(true)){
      System.exit(1);
    }

    System.out.println("All Finished");
    System.exit(0);
  }
}

import java.io.IOException；
导入java.util.StringTokenizer；
导入java.io.FileReader；
导入java.io.BufferedReader；
导入java.io.DataInput；
导入java.util.HashMap；
导入java.util.Map；
导入java.util.Map.Entry；
导入java.net.URI；
导入java.io.InputStreamReader；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.IntWritable；
导入org.apache.hadoop.io.LongWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.io.writeable；
导入org.apache.hadoop.mapreduce.Job；
导入org.apache.hadoop.mapreduce.Mapper；
导入org.apache.hadoop.mapreduce.Reducer；
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat；
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat；
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat；
导入org.apache.hadoop.mapreduce.TaskAttemptContext；
导入org.apache.hadoop.fs.FileSystem；
导入org.apache.hadoop.fs.FSDataOutputStream；
导入org.apache.hadoop.mapreduce.RecordWriter；
导入org.apache.hadoop.io.WritableUtils；
公开课考试{
public static HashMap map=new HashMap（）；
公共静态类Mapper1扩展了Mapper{
@凌驾
受保护的无效设置（上下文上下文）引发IOException、InterruptedException{
URI[]files=context.getCacheFiles（）；
路径列表_Path=新路径（文件[0]）；
试一试{
FileSystem fs=list_path.getFileSystem（context.getConfiguration（））；
BufferedReader br=新的BufferedReader（新的InputStreamReader（fs.open（list_path））；
字符串行=null，名称=null；
int leg=0；
而（（line=br.readLine（））！=null）{
如果（！line.startsWith（“name”）和（&！line.trim（）.isEmpty（））{
String[]name_leg=line.split（“，”，0）；
name=name_leg[0]；
leg=Integer.parseInt（name_leg[1]）；
地图。放置（名称、腿）；
}
}
br.close（）；
}
捕获（IOEX异常）{
System.err.println（例如getMessage（））；
例如printStackTrace（）；
}
对于（Map.Entry e:Map.entrySet（））{
System.out.println（e.getKey（）+“：”+e.getValue（））；
}
}
@凌驾
受保护的void映射（LongWritable键、文本值、上下文）引发IOException、InterruptedException{
对于（Map.Entry e:Map.entrySet（））{
write（新文本（例如getKey（）），新文本（“test”）；
}
}
}
公共静态类Reducer 1扩展了Reducer{
@凌驾
受保护的void reduce（文本键、Iterable VAL、上下文上下文）引发IOException、InterruptedException{
编写（新文本（“测试”），键）；
}
}
//作家
公共静态类CommaTextOutputFormat扩展了TextOutputFormat{
@凌驾
公共RecordWriter getRecordWriter（TaskAttemptContext作业）引发IOException、InterruptedException{
Configuration=job.getConfiguration（）；
字符串扩展名=“.txt”；
路径文件=getDefaultWorkFile（作业，扩展名）；
FileSystem fs=file.getFileSystem（conf）；
FSDataOutputStream fileOut=fs.create（文件，false）；
返回新的LineRecordWriter（文件输出，“，”）；
}
}
公共静态void main（字符串[]args）引发异常{
Configuration conf=新配置（）；
如果（参数长度！=3）{
System.err.println(
“需要3个参数：”；
系统出口（1）；
}
Job Job=Job.getInstance（conf，“test”）；
job.addCacheFile（新路径（args[2]）.toUri（））；
job.setJarByClass（Test.class）；
job.setMapperClass（Mapper1.class）；
job.setReducerClass（Reducer1.class）；
作业。setNumReduceTasks（1）；
setInputFormatClass（TextInputFormat.class）；
//映射器输出
job.setMapOutputKeyClass（Text.class）；
job.setMapOutputValueClass（Text.class）；
//减速器输出
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（Text.class）；
//福米特
setOutputFormatClass（CommaTextOutputFormat.class）；
addInputPath（作业，新路径（args[0]）；
setOutputPath（作业，新路径（args[1]）；
如果（！job.waitForCompletion（true））{
系统出口（1）；
}
系统输出打印项次（“全部完成”）；
系统出口（0）；
}
}

首先，您需要了解有关mapreduce框架的更多信息

您的程序在本地模式下的行为与预期一致，因为映射器、还原器和作业都是在同一JVM上启动的。在这种情况下，对于伪分布式模式或分布式模式，将为每个组件分配单独的JVM。使用get_list放入hashMap的值对mapper和reducer不可见，因为它们位于单独的JVM中

用于使其在群集模式下工作

作业主类将文件添加到分布式缓存：

JobConf job = new JobConf();<br>
DistributedCache.addCacheArchive(new URI(args[2]), job);

我相信OP告诉了我即使