Java 带有2个键的Mapreduce
我只是在学习地图缩小的工作。我对我的作业做了一件事,我必须更改代码以接受另一个文本文件作为输入,输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例:Java 带有2个键的Mapreduce,java,eclipse,hadoop,mapreduce,cloudera,Java,Eclipse,Hadoop,Mapreduce,Cloudera,我只是在学习地图缩小的工作。我对我的作业做了一件事,我必须更改代码以接受另一个文本文件作为输入,输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例: 2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641 输出应该是这样的: 卡尔加里2009年平均值为:最大值:最小值: 这是我的代
2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641
输出应该是这样的:
卡尔加里2009年平均值为:最大值:最小值:
这是我的代码,它给出了txt文件并计算平均值、最小值和最大值:
public class AverageMinMax {
public static class Map extends Mapper<LongWritable,Date,Text,Text> {
//private static final FloatWritable rep= new FloatWritable(1);
public void map(LongWritable key,Text value,Context context)
throws IOException, InterruptedException {
context.write(new Text("Map_Output"), value);
};
}
public static class Combiner extends Reducer<Text,Text,Text,Text>
{
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException
{
Integer NumberOfValues=0;
double sum=0D;
double min=0D;
double max=0D;
//double min=values.get(0);
Iterator<Text> itr = values.iterator();
//convertString=values(0);
while(itr.hasNext())
{
String TexttoString = itr.next().toString();
Double value = Double.parseDouble(TexttoString);
if(value<min)
{
min=value;
}
if(value>max)
{
max=value;
}
NumberOfValues++;
sum+=value;
}
Double average = sum/NumberOfValues;
context.write(new Text("Combiner_output"), new Text(average + "," + NumberOfValues+","+min+","+max));
};
}
public static class Reduce extends
Reducer<Text,Text,Text,Text> {
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
Integer totalNumberOfValues= 0;
Double sum=0.00;
Double min=0D;
Double max=0D;
Iterator<Text> itr = values.iterator();
while(itr.hasNext())
{
String TexttoString = itr.next().toString();
String[] split_String = TexttoString.split(",");
Double average = Double.parseDouble(split_String[0]);
Integer NumberOfValues = Integer.parseInt(split_String[1]);
Double minValue=Double.parseDouble(split_String[2]);
Double maxValue=Double.parseDouble(split_String[3]);
if(minValue<min)
{
min=minValue;
}
if(maxValue>max)
{
max=maxValue;
}
sum+=(average*NumberOfValues);
totalNumberOfValues+=NumberOfValues;
}
Double average= sum/totalNumberOfValues;
context.write(new Text("Average and Minimum and Max is"), new Text(average.toString()+" and "+ min.toString()+" and "+ max.toString()));
};
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job=new Job(conf,"AverageMinMax.class");
job.setJarByClass(AverageMinMax.class);
job.setJobName("MapReduceAssignment");
//JobConf conf = new JobConf(Hadoop_map_reduce.class);
//conf.setJobName("Hadoop_assignment");
// Configuration conf = new Configuration();
//Job job = new Job(conf, "maxmin");
//job.setJarByClass(Hadoop_map_reduce.class);
// FileSystem fs = FileSystem.get(conf);
/* if (fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]), true);
}*/
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setNumReduceTasks(1);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setCombinerClass(Combiner.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
//FileInputFormat.addInputPath(job, new Path("/home/cloudera/Desktop/assign2"));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// FileOutputFormat.setOutputPath(job, new Path(" user/cloudera/output"));
job.waitForCompletion(true);
}
公共类AverageInMax{
公共静态类映射扩展映射器{
//private static final FloatWritable rep=新的FloatWritable(1);
公共void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException{
写入(新文本(“映射输出”),值);
};
}
公共静态类组合器扩展了Reducer
{
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException
{
整数NumberOfValues=0;
双和=0D;
双分钟=0D;
双最大值=0D;
//双最小值=值。获取(0);
迭代器itr=values.Iterator();
//convertString=值(0);
while(itr.hasNext())
{
字符串TexttoString=itr.next().toString();
Double value=Double.parseDouble(TexttoString);
如果(最大值)
{
最大值=最大值;
}
NumberOfValues++;
总和+=数值;
}
双平均值=总和/数值;
write(新文本(“组合器输出”)、新文本(平均值+”、“+NumberOfValues+”、“+min+”、“+max));
};
}
公共静态类Reduce扩展
减速器{
public void reduce(文本键、Iterable值、,
上下文)抛出IOException、InterruptedException{
整数totalNumberOfValues=0;
双倍和=0.00;
双分钟=0D;
双最大值=0D;
迭代器itr=values.Iterator();
while(itr.hasNext())
{
字符串TexttoString=itr.next().toString();
String[]split_String=TexttoString.split(“,”);
Double average=Double.parseDouble(拆分字符串[0]);
整数NumberOfValues=Integer.parseInt(拆分字符串[1]);
Double minValue=Double.parseDouble(拆分字符串[2]);
Double maxValue=Double.parseDouble(split_String[3]);
if(最小值最大值)
{
最大值=最大值;
}
总和+=(平均值*数值);
totalNumberOfValues+=NumberOfValues;
}
双平均值=总和/总数值;
context.write(新文本(“平均值和最小值以及最大值”),新文本(平均值.toString()+”和“+min.toString()+”和“+Max.toString()));
};
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
Job Job=新作业(conf,“AverageMinMax.class”);
job.setJarByClass(AverageMinMax.class);
job.setJobName(“MapReduceAsignment”);
//JobConf conf=newjobconf(Hadoop\u map\u reduce.class);
//conf.setJobName(“Hadoop_赋值”);
//Configuration conf=新配置();
//Job Job=新作业(conf,“maxmin”);
//setJarByClass(Hadoop\u map\u reduce.class);
//FileSystem fs=FileSystem.get(conf);
/*如果(fs.存在(新路径(args[1])){
fs.delete(新路径(args[1]),true);
}*/
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//作业。setNumReduceTasks(1);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setCombinerClass(Combiner.class);
setInputFormatClass(TextInputFormat.class);
setOutputFormatClass(TextOutputFormat.class);
addInputPath(作业,新路径(args[0]);
//setOutputPath(作业,新路径(args[1]);
//addInputPath(作业,新路径(“/home/cloudera/Desktop/assign2”);
setOutputPath(作业,新路径(args[1]);
//setOutputPath(作业,新路径(“user/cloudera/output”);
job.waitForCompletion(true);
}
}
所以,我的第一个问题是我不知道如何在映射器中转换日期,以及如何找到2个键并显示在输出中。我的意思是如何重写这个代码
谢谢你的帮助好的,看来你有很多问题。我马上想到两个:
- 您的映射器输出键是
。这行不通。你希望这把钥匙是什么,可能是城市的名字。在你的例子中,“卡尔加里”。这很容易使用'Combiner\u output'
(即从value.toString().split(',')[0]
字符上拆分,
后形成的列表中获取第一个元素)value
- 你的代码根本没有输出城市名称。通过执行
context.write(新文本(key.toString()+“平均值和最小值,最大值为”)、新文本(Average.toString()+”和“+min.toString()+”和“+Max.toString()))代码>在减速器中,其中
是从上一点开始的城市名称键
一般来说,我建议您从什么是Mapreduce开始,它的设计权衡,以及如何在Hadoop架构的范围内充分利用它。您的问题并不完全清楚。因此,我的假设如下:
String line = value.toString();
String[] tokens = line.split(",");
String[] date = tokens[2].split("-");
String year = date[0];
String location = tokens[0];
Text locationYear = new Text(location + " " + year);
ArrayListOfDoublesWritable readings = new ArrayListOfDoublesWritable()
for(int i = 5; i < tokens.length(); i++)
{
readings.add(Double.parseDouble(tokens[i]));
}
context.write(locationYear, readings);