Java Can';t在mapper、MapReduce中访问hashmap
我想用另一个文件中定义的词汇表(csv)替换映射器中输入数据的值。因此,我尝试将csv数据放入HashMap,并在映射器中引用它 下面的java代码和csv是我程序的简化版本。这段代码在我的本地环境中运行(Mac OS X,伪分布式模式),但在我的EC2实例中运行(ubuntu,伪分布式模式) 详细地说,我在过程中得到了这个标准:Java Can';t在mapper、MapReduce中访问hashmap,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我想用另一个文件中定义的词汇表(csv)替换映射器中输入数据的值。因此,我尝试将csv数据放入HashMap,并在映射器中引用它 下面的java代码和csv是我程序的简化版本。这段代码在我的本地环境中运行(Mac OS X,伪分布式模式),但在我的EC2实例中运行(ubuntu,伪分布式模式) 详细地说,我在过程中得到了这个标准: cat:4 human:2 flamingo:1 这意味着filereader成功地将csv数据放入HashMap 然而,映射器没有映射任何内容,因此我在EC2环境
cat:4
human:2
flamingo:1
这意味着filereader成功地将csv数据放入HashMap
然而,映射器没有映射任何内容,因此我在EC2环境中得到了空输出,尽管它映射了3*(输入文件的行数)元素,并在本地生成了以下内容:
test,cat
test,flamingo
test,human
有人有答案或提示吗
Test.java
import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;
public class Test {
public static HashMap<String, Integer> map = new HashMap<String, Integer>();
public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for(Map.Entry<String, Integer> e : map.entrySet()) {
context.write(new Text(e.getKey()), new Text("test"));
}
}
}
public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
context.write(new Text("test"), key);
}
}
public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = ".txt";
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new LineRecordWriter<Text, Text>(fileOut, ",");
}
}
public static void get_list(String list_path){
try {
FileReader fr = new FileReader(list_path);
BufferedReader br = new BufferedReader(fr);
String line = null, name = null;
int leg = 0;
while ((line = br.readLine()) != null) {
if (!line.startsWith("name") && !line.trim().isEmpty()) {
String[] name_leg = line.split(",", 0);
name = name_leg[0];
leg = Integer.parseInt(name_leg[1]);
map.put(name, leg);
}
}
br.close();
}
catch(IOException ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
}
for(Map.Entry<String, Integer> e : map.entrySet()) {
System.out.println(e.getKey() + ":" + e.getValue());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length != 3) {
System.err.println(
"Need 3 arguments: <input dir> <output base dir> <list path>");
System.exit(1);
}
get_list(args[2]);
Job job = Job.getInstance(conf, "test");
job.setJarByClass(Test.class);
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
// mapper output
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// reducer output
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// formtter
job.setOutputFormatClass(CommaTextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if(!job.waitForCompletion(true)){
System.exit(1);
}
System.out.println("All Finished");
System.exit(0);
}
}
import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.net.URI;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;
public class Test {
public static HashMap<String, Integer> map = new HashMap<String, Integer>();
public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void setup(Context context) throws IOException, InterruptedException {
URI[] files = context.getCacheFiles();
Path list_path = new Path(files[0]);
try {
FileSystem fs = list_path.getFileSystem(context.getConfiguration());
BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(list_path)));
String line = null, name = null;
int leg = 0;
while ((line = br.readLine()) != null) {
if (!line.startsWith("name") && !line.trim().isEmpty()) {
String[] name_leg = line.split(",", 0);
name = name_leg[0];
leg = Integer.parseInt(name_leg[1]);
map.put(name, leg);
}
}
br.close();
}
catch(IOException ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
}
for(Map.Entry<String, Integer> e : map.entrySet()) {
System.out.println(e.getKey() + ":" + e.getValue());
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for(Map.Entry<String, Integer> e : map.entrySet()) {
context.write(new Text(e.getKey()), new Text("test"));
}
}
}
public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
context.write(new Text("test"), key);
}
}
// Writer
public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = ".txt";
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new LineRecordWriter<Text, Text>(fileOut, ",");
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length != 3) {
System.err.println(
"Need 3 arguments: <input dir> <output base dir> <list path>");
System.exit(1);
}
Job job = Job.getInstance(conf, "test");
job.addCacheFile(new Path(args[2]).toUri());
job.setJarByClass(Test.class);
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
// mapper output
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// reducer output
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// formtter
job.setOutputFormatClass(CommaTextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if(!job.waitForCompletion(true)){
System.exit(1);
}
System.out.println("All Finished");
System.exit(0);
}
}
=================================
我参考@Rahul Sharma的答案,修改代码如下。然后我的代码在这两种环境中都能工作
非常感谢@Rahul Sharma和@Serhiy的精确回答和有用的评论
Test.java
import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;
public class Test {
public static HashMap<String, Integer> map = new HashMap<String, Integer>();
public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for(Map.Entry<String, Integer> e : map.entrySet()) {
context.write(new Text(e.getKey()), new Text("test"));
}
}
}
public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
context.write(new Text("test"), key);
}
}
public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = ".txt";
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new LineRecordWriter<Text, Text>(fileOut, ",");
}
}
public static void get_list(String list_path){
try {
FileReader fr = new FileReader(list_path);
BufferedReader br = new BufferedReader(fr);
String line = null, name = null;
int leg = 0;
while ((line = br.readLine()) != null) {
if (!line.startsWith("name") && !line.trim().isEmpty()) {
String[] name_leg = line.split(",", 0);
name = name_leg[0];
leg = Integer.parseInt(name_leg[1]);
map.put(name, leg);
}
}
br.close();
}
catch(IOException ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
}
for(Map.Entry<String, Integer> e : map.entrySet()) {
System.out.println(e.getKey() + ":" + e.getValue());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length != 3) {
System.err.println(
"Need 3 arguments: <input dir> <output base dir> <list path>");
System.exit(1);
}
get_list(args[2]);
Job job = Job.getInstance(conf, "test");
job.setJarByClass(Test.class);
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
// mapper output
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// reducer output
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// formtter
job.setOutputFormatClass(CommaTextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if(!job.waitForCompletion(true)){
System.exit(1);
}
System.out.println("All Finished");
System.exit(0);
}
}
import java.io.IOException;
import java.util.StringTokenizer;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.DataInput;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.net.URI;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.io.WritableUtils;
public class Test {
public static HashMap<String, Integer> map = new HashMap<String, Integer>();
public static class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void setup(Context context) throws IOException, InterruptedException {
URI[] files = context.getCacheFiles();
Path list_path = new Path(files[0]);
try {
FileSystem fs = list_path.getFileSystem(context.getConfiguration());
BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(list_path)));
String line = null, name = null;
int leg = 0;
while ((line = br.readLine()) != null) {
if (!line.startsWith("name") && !line.trim().isEmpty()) {
String[] name_leg = line.split(",", 0);
name = name_leg[0];
leg = Integer.parseInt(name_leg[1]);
map.put(name, leg);
}
}
br.close();
}
catch(IOException ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
}
for(Map.Entry<String, Integer> e : map.entrySet()) {
System.out.println(e.getKey() + ":" + e.getValue());
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for(Map.Entry<String, Integer> e : map.entrySet()) {
context.write(new Text(e.getKey()), new Text("test"));
}
}
}
public static class Reducer1 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> vals, Context context) throws IOException, InterruptedException {
context.write(new Text("test"), key);
}
}
// Writer
public static class CommaTextOutputFormat extends TextOutputFormat<Text, Text> {
@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
String extension = ".txt";
Path file = getDefaultWorkFile(job, extension);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new LineRecordWriter<Text, Text>(fileOut, ",");
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length != 3) {
System.err.println(
"Need 3 arguments: <input dir> <output base dir> <list path>");
System.exit(1);
}
Job job = Job.getInstance(conf, "test");
job.addCacheFile(new Path(args[2]).toUri());
job.setJarByClass(Test.class);
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
// mapper output
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// reducer output
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// formtter
job.setOutputFormatClass(CommaTextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if(!job.waitForCompletion(true)){
System.exit(1);
}
System.out.println("All Finished");
System.exit(0);
}
}
import java.io.IOException;
导入java.util.StringTokenizer;
导入java.io.FileReader;
导入java.io.BufferedReader;
导入java.io.DataInput;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Map.Entry;
导入java.net.URI;
导入java.io.InputStreamReader;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.io.writeable;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
导入org.apache.hadoop.mapreduce.TaskAttemptContext;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.FSDataOutputStream;
导入org.apache.hadoop.mapreduce.RecordWriter;
导入org.apache.hadoop.io.WritableUtils;
公开课考试{
public static HashMap map=new HashMap();
公共静态类Mapper1扩展了Mapper{
@凌驾
受保护的无效设置(上下文上下文)引发IOException、InterruptedException{
URI[]files=context.getCacheFiles();
路径列表_Path=新路径(文件[0]);
试一试{
FileSystem fs=list_path.getFileSystem(context.getConfiguration());
BufferedReader br=新的BufferedReader(新的InputStreamReader(fs.open(list_path));
字符串行=null,名称=null;
int leg=0;
而((line=br.readLine())!=null){
如果(!line.startsWith(“name”)和(&!line.trim().isEmpty()){
String[]name_leg=line.split(“,”,0);
name=name_leg[0];
leg=Integer.parseInt(name_leg[1]);
地图。放置(名称、腿);
}
}
br.close();
}
捕获(IOEX异常){
System.err.println(例如getMessage());
例如printStackTrace();
}
对于(Map.Entry e:Map.entrySet()){
System.out.println(e.getKey()+“:”+e.getValue());
}
}
@凌驾
受保护的void映射(LongWritable键、文本值、上下文)引发IOException、InterruptedException{
对于(Map.Entry e:Map.entrySet()){
write(新文本(例如getKey()),新文本(“test”);
}
}
}
公共静态类Reducer 1扩展了Reducer{
@凌驾
受保护的void reduce(文本键、Iterable VAL、上下文上下文)引发IOException、InterruptedException{
编写(新文本(“测试”),键);
}
}
//作家
公共静态类CommaTextOutputFormat扩展了TextOutputFormat{
@凌驾
公共RecordWriter getRecordWriter(TaskAttemptContext作业)引发IOException、InterruptedException{
Configuration=job.getConfiguration();
字符串扩展名=“.txt”;
路径文件=getDefaultWorkFile(作业,扩展名);
FileSystem fs=file.getFileSystem(conf);
FSDataOutputStream fileOut=fs.create(文件,false);
返回新的LineRecordWriter(文件输出,“,”);
}
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
如果(参数长度!=3){
System.err.println(
“需要3个参数:”;
系统出口(1);
}
Job Job=Job.getInstance(conf,“test”);
job.addCacheFile(新路径(args[2]).toUri());
job.setJarByClass(Test.class);
job.setMapperClass(Mapper1.class);
job.setReducerClass(Reducer1.class);
作业。setNumReduceTasks(1);
setInputFormatClass(TextInputFormat.class);
//映射器输出
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//减速器输出
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//福米特
setOutputFormatClass(CommaTextOutputFormat.class);
addInputPath(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
如果(!job.waitForCompletion(true)){
系统出口(1);
}
系统输出打印项次(“全部完成”);
系统出口(0);
}
}
首先,您需要了解有关mapreduce框架的更多信息
您的程序在本地模式下的行为与预期一致,因为映射器、还原器和作业都是在同一JVM上启动的。在这种情况下,对于伪分布式模式或分布式模式,将为每个组件分配单独的JVM。使用get_list放入hashMap的值对mapper和reducer不可见,因为它们位于单独的JVM中用于使其在群集模式下工作
JobConf job = new JobConf();<br>
DistributedCache.addCacheArchive(new URI(args[2]), job);
我相信OP告诉了我即使