Java 带有2个键的Mapreduce_Java_Eclipse_Hadoop_Mapreduce_Cloudera

Java 带有2个键的Mapreduce

java eclipse hadoop mapreduce

Java 带有2个键的Mapreduce,java,eclipse,hadoop,mapreduce,cloudera,Java,Eclipse,Hadoop,Mapreduce,Cloudera,我只是在学习地图缩小的工作。我对我的作业做了一件事，我必须更改代码以接受另一个文本文件作为输入，输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例： 2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641 输出应该是这样的：卡尔加里2009年平均值为：最大值：最小值：这是我的代

我只是在学习地图缩小的工作。我对我的作业做了一件事，我必须更改代码以接受另一个文本文件作为输入，输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例：

2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641

输出应该是这样的：

卡尔加里2009年平均值为：最大值：最小值：

这是我的代码，它给出了txt文件并计算平均值、最小值和最大值：

public class AverageMinMax {



public static class Map extends Mapper<LongWritable,Date,Text,Text> {


    //private static final FloatWritable rep= new  FloatWritable(1);
        public void map(LongWritable key,Text value,Context context)
        throws IOException, InterruptedException {
                context.write(new Text("Map_Output"), value);
        };
    }
      public static class Combiner extends Reducer<Text,Text,Text,Text>
      {
      public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException
          {
             Integer NumberOfValues=0;
             double sum=0D;
             double min=0D;
             double max=0D;
             //double min=values.get(0);
              Iterator<Text> itr = values.iterator();
              //convertString=values(0);
              while(itr.hasNext())
              {
                  String TexttoString = itr.next().toString();
                  Double value = Double.parseDouble(TexttoString);
                  if(value<min)
                  {
                      min=value;
                  }
                  if(value>max)
                  {
                      max=value;
                  }
                  NumberOfValues++;
                  sum+=value;
              }
               Double average = sum/NumberOfValues;
                context.write(new Text("Combiner_output"), new Text(average + "," + NumberOfValues+","+min+","+max));
          };
      }
 public static class Reduce extends
       Reducer<Text,Text,Text,Text> {
      public void reduce(Text key, Iterable<Text> values,
        Context context) throws IOException, InterruptedException {
           Integer totalNumberOfValues= 0;
          Double sum=0.00;
          Double min=0D;
          Double max=0D;
          Iterator<Text> itr = values.iterator();
            while(itr.hasNext())
          {
              String TexttoString = itr.next().toString();
              String[] split_String = TexttoString.split(",");
              Double average = Double.parseDouble(split_String[0]);
              Integer NumberOfValues = Integer.parseInt(split_String[1]);
              Double minValue=Double.parseDouble(split_String[2]);
              Double maxValue=Double.parseDouble(split_String[3]);
              if(minValue<min)
              {
                  min=minValue;
              }
              if(maxValue>max)
              {
                  max=maxValue;
              }
              sum+=(average*NumberOfValues);
              totalNumberOfValues+=NumberOfValues;   
          } 
          Double average= sum/totalNumberOfValues;
          context.write(new Text("Average and Minimum and Max is"), new Text(average.toString()+" and "+ min.toString()+" and "+ max.toString()));
          };
     }
     public static void main(String[] args) throws Exception {

         Configuration conf = new Configuration();
         Job job=new Job(conf,"AverageMinMax.class");
         job.setJarByClass(AverageMinMax.class);
         job.setJobName("MapReduceAssignment");
         //JobConf conf = new JobConf(Hadoop_map_reduce.class);

        //conf.setJobName("Hadoop_assignment");
         // Configuration conf = new Configuration();
      //Job job = new Job(conf, "maxmin");
      //job.setJarByClass(Hadoop_map_reduce.class);
     // FileSystem fs = FileSystem.get(conf);
    /*  if (fs.exists(new Path(args[1]))) {
       fs.delete(new Path(args[1]), true);
      }*/
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Text.class);

         //job.setNumReduceTasks(1);

         job.setMapperClass(Map.class);

        job.setReducerClass(Reduce.class);
         job.setCombinerClass(Combiner.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

     FileInputFormat.addInputPath(job, new Path(args[0]));
    //  FileOutputFormat.setOutputPath(job, new Path(args[1]));
        //FileInputFormat.addInputPath(job, new Path("/home/cloudera/Desktop/assign2"));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
      //  FileOutputFormat.setOutputPath(job, new Path(" user/cloudera/output"));
      job.waitForCompletion(true);
     }

公共类AverageInMax{
公共静态类映射扩展映射器{
//private static final FloatWritable rep=新的FloatWritable（1）；
公共void映射（可长写键、文本值、上下文）
抛出IOException、InterruptedException{
写入（新文本（“映射输出”），值）；
};
}
公共静态类组合器扩展了Reducer
{
公共void reduce（文本键、Iterable值、上下文上下文）引发IOException、InterruptedException
{
整数NumberOfValues=0；
双和=0D；
双分钟=0D；
双最大值=0D；
//双最小值=值。获取（0）；
迭代器itr=values.Iterator（）；
//convertString=值（0）；
while（itr.hasNext（））
{
字符串TexttoString=itr.next（）.toString（）；
Double value=Double.parseDouble（TexttoString）；
如果（最大值）
{
最大值=最大值；
}
NumberOfValues++；
总和+=数值；
}
双平均值=总和/数值；
write（新文本（“组合器输出”）、新文本（平均值+”、“+NumberOfValues+”、“+min+”、“+max））；
};
}
公共静态类Reduce扩展
减速器{
public void reduce（文本键、Iterable值、，
上下文）抛出IOException、InterruptedException{
整数totalNumberOfValues=0；
双倍和=0.00；
双分钟=0D；
双最大值=0D；
迭代器itr=values.Iterator（）；
while（itr.hasNext（））
{
字符串TexttoString=itr.next（）.toString（）；
String[]split_String=TexttoString.split（“，”）；
Double average=Double.parseDouble（拆分字符串[0]）；
整数NumberOfValues=Integer.parseInt（拆分字符串[1]）；
Double minValue=Double.parseDouble（拆分字符串[2]）；
Double maxValue=Double.parseDouble（split_String[3]）；
if（最小值最大值）
{
最大值=最大值；
}
总和+=（平均值*数值）；
totalNumberOfValues+=NumberOfValues；
} 
双平均值=总和/总数值；
context.write（新文本（“平均值和最小值以及最大值”），新文本（平均值.toString（）+”和“+min.toString（）+”和“+Max.toString（）））；
};
}
公共静态void main（字符串[]args）引发异常{
Configuration conf=新配置（）；
Job Job=新作业（conf，“AverageMinMax.class”）；
job.setJarByClass（AverageMinMax.class）；
job.setJobName（“MapReduceAsignment”）；
//JobConf conf=newjobconf（Hadoop\u map\u reduce.class）；
//conf.setJobName（“Hadoop_赋值”）；
//Configuration conf=新配置（）；
//Job Job=新作业（conf，“maxmin”）；
//setJarByClass（Hadoop\u map\u reduce.class）；
//FileSystem fs=FileSystem.get（conf）；
/*如果（fs.存在（新路径（args[1]））{
fs.delete（新路径（args[1]），true）；
}*/
job.setOutputKeyClass（Text.class）；
job.setOutputValueClass（Text.class）；
//作业。setNumReduceTasks（1）；
job.setMapperClass（Map.class）；
job.setReducerClass（Reduce.class）；
job.setCombinerClass（Combiner.class）；
setInputFormatClass（TextInputFormat.class）；
setOutputFormatClass（TextOutputFormat.class）；
addInputPath（作业，新路径（args[0]）；
//setOutputPath（作业，新路径（args[1]）；
//addInputPath（作业，新路径（“/home/cloudera/Desktop/assign2”）；
setOutputPath（作业，新路径（args[1]）；
//setOutputPath（作业，新路径（“user/cloudera/output”）；
job.waitForCompletion（true）；
}

}

所以，我的第一个问题是我不知道如何在映射器中转换日期，以及如何找到2个键并显示在输出中。我的意思是如何重写这个代码

谢谢你的帮助

好的，看来你有很多问题。我马上想到两个：

您的映射器输出键是
```
'Combiner\u output'
```
。这行不通。你希望这把钥匙是什么，可能是城市的名字。在你的例子中，“卡尔加里”。这很容易使用
```
value.toString（）.split（'，'）[0]
```
（即从
```
，
```
字符上拆分
```
value
```
后形成的列表中获取第一个元素）

你的代码根本没有输出城市名称。通过执行

context.write（新文本（key.toString（）+“平均值和最小值，最大值为”）、新文本（Average.toString（）+”和“+min.toString（）+”和“+Max.toString（）））在减速器中，其中键
是从上一点开始的城市名称


关于如何在Java中从字符串中提取日期，请查看以下文章：
一般来说，我建议您从什么是Mapreduce开始，它的设计权衡，以及如何在Hadoop架构的范围内充分利用它。
您的问题并不完全清楚。因此，我的假设如下：

你有收藏吗
String line = value.toString();
String[] tokens = line.split(",");
String[] date = tokens[2].split("-");
String year = date[0];
String location = tokens[0];

Text locationYear = new Text(location + " " + year);

ArrayListOfDoublesWritable readings = new ArrayListOfDoublesWritable()
for(int i = 5; i < tokens.length(); i++)
{
  readings.add(Double.parseDouble(tokens[i]));
}

context.write(locationYear, readings);