Java 带有2个键的Mapreduce

Java 带有2个键的Mapreduce,java,eclipse,hadoop,mapreduce,cloudera,Java,Eclipse,Hadoop,Mapreduce,Cloudera,我只是在学习地图缩小的工作。我对我的作业做了一件事,我必须更改代码以接受另一个文本文件作为输入,输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例: 2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641 输出应该是这样的: 卡尔加里2009年平均值为:最大值:最小值: 这是我的代

我只是在学习地图缩小的工作。我对我的作业做了一件事,我必须更改代码以接受另一个文本文件作为输入,输出必须显示位置和年份以及最大、最小和平均数量。这是我输入的一行的示例:
2009-01-0760468012694,2.5207754,0.06572168,0.025668362,0.972051954,0.037000279,0.022319018,0.003641149,0.002936745,0.016723641

输出应该是这样的:
卡尔加里2009年平均值为:最大值:最小值:

这是我的代码,它给出了txt文件并计算平均值、最小值和最大值:

public class AverageMinMax {



public static class Map extends Mapper<LongWritable,Date,Text,Text> {


    //private static final FloatWritable rep= new  FloatWritable(1);
        public void map(LongWritable key,Text value,Context context)
        throws IOException, InterruptedException {
                context.write(new Text("Map_Output"), value);
        };
    }
      public static class Combiner extends Reducer<Text,Text,Text,Text>
      {
      public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException
          {
             Integer NumberOfValues=0;
             double sum=0D;
             double min=0D;
             double max=0D;
             //double min=values.get(0);
              Iterator<Text> itr = values.iterator();
              //convertString=values(0);
              while(itr.hasNext())
              {
                  String TexttoString = itr.next().toString();
                  Double value = Double.parseDouble(TexttoString);
                  if(value<min)
                  {
                      min=value;
                  }
                  if(value>max)
                  {
                      max=value;
                  }
                  NumberOfValues++;
                  sum+=value;
              }
               Double average = sum/NumberOfValues;
                context.write(new Text("Combiner_output"), new Text(average + "," + NumberOfValues+","+min+","+max));
          };
      }
 public static class Reduce extends
       Reducer<Text,Text,Text,Text> {
      public void reduce(Text key, Iterable<Text> values,
        Context context) throws IOException, InterruptedException {
           Integer totalNumberOfValues= 0;
          Double sum=0.00;
          Double min=0D;
          Double max=0D;
          Iterator<Text> itr = values.iterator();
            while(itr.hasNext())
          {
              String TexttoString = itr.next().toString();
              String[] split_String = TexttoString.split(",");
              Double average = Double.parseDouble(split_String[0]);
              Integer NumberOfValues = Integer.parseInt(split_String[1]);
              Double minValue=Double.parseDouble(split_String[2]);
              Double maxValue=Double.parseDouble(split_String[3]);
              if(minValue<min)
              {
                  min=minValue;
              }
              if(maxValue>max)
              {
                  max=maxValue;
              }
              sum+=(average*NumberOfValues);
              totalNumberOfValues+=NumberOfValues;   
          } 
          Double average= sum/totalNumberOfValues;
          context.write(new Text("Average and Minimum and Max is"), new Text(average.toString()+" and "+ min.toString()+" and "+ max.toString()));
          };
     }
     public static void main(String[] args) throws Exception {

         Configuration conf = new Configuration();
         Job job=new Job(conf,"AverageMinMax.class");
         job.setJarByClass(AverageMinMax.class);
         job.setJobName("MapReduceAssignment");
         //JobConf conf = new JobConf(Hadoop_map_reduce.class);

        //conf.setJobName("Hadoop_assignment");
         // Configuration conf = new Configuration();
      //Job job = new Job(conf, "maxmin");
      //job.setJarByClass(Hadoop_map_reduce.class);
     // FileSystem fs = FileSystem.get(conf);
    /*  if (fs.exists(new Path(args[1]))) {
       fs.delete(new Path(args[1]), true);
      }*/
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Text.class);

         //job.setNumReduceTasks(1);

         job.setMapperClass(Map.class);

        job.setReducerClass(Reduce.class);
         job.setCombinerClass(Combiner.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

     FileInputFormat.addInputPath(job, new Path(args[0]));
    //  FileOutputFormat.setOutputPath(job, new Path(args[1]));
        //FileInputFormat.addInputPath(job, new Path("/home/cloudera/Desktop/assign2"));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
      //  FileOutputFormat.setOutputPath(job, new Path(" user/cloudera/output"));
      job.waitForCompletion(true);
     }
公共类AverageInMax{
公共静态类映射扩展映射器{
//private static final FloatWritable rep=新的FloatWritable(1);
公共void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException{
写入(新文本(“映射输出”),值);
};
}
公共静态类组合器扩展了Reducer
{
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException
{
整数NumberOfValues=0;
双和=0D;
双分钟=0D;
双最大值=0D;
//双最小值=值。获取(0);
迭代器itr=values.Iterator();
//convertString=值(0);
while(itr.hasNext())
{
字符串TexttoString=itr.next().toString();
Double value=Double.parseDouble(TexttoString);
如果(最大值)
{
最大值=最大值;
}
NumberOfValues++;
总和+=数值;
}
双平均值=总和/数值;
write(新文本(“组合器输出”)、新文本(平均值+”、“+NumberOfValues+”、“+min+”、“+max));
};
}
公共静态类Reduce扩展
减速器{
public void reduce(文本键、Iterable值、,
上下文)抛出IOException、InterruptedException{
整数totalNumberOfValues=0;
双倍和=0.00;
双分钟=0D;
双最大值=0D;
迭代器itr=values.Iterator();
while(itr.hasNext())
{
字符串TexttoString=itr.next().toString();
String[]split_String=TexttoString.split(“,”);
Double average=Double.parseDouble(拆分字符串[0]);
整数NumberOfValues=Integer.parseInt(拆分字符串[1]);
Double minValue=Double.parseDouble(拆分字符串[2]);
Double maxValue=Double.parseDouble(split_String[3]);
if(最小值最大值)
{
最大值=最大值;
}
总和+=(平均值*数值);
totalNumberOfValues+=NumberOfValues;
} 
双平均值=总和/总数值;
context.write(新文本(“平均值和最小值以及最大值”),新文本(平均值.toString()+”和“+min.toString()+”和“+Max.toString()));
};
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
Job Job=新作业(conf,“AverageMinMax.class”);
job.setJarByClass(AverageMinMax.class);
job.setJobName(“MapReduceAsignment”);
//JobConf conf=newjobconf(Hadoop\u map\u reduce.class);
//conf.setJobName(“Hadoop_赋值”);
//Configuration conf=新配置();
//Job Job=新作业(conf,“maxmin”);
//setJarByClass(Hadoop\u map\u reduce.class);
//FileSystem fs=FileSystem.get(conf);
/*如果(fs.存在(新路径(args[1])){
fs.delete(新路径(args[1]),true);
}*/
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//作业。setNumReduceTasks(1);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setCombinerClass(Combiner.class);
setInputFormatClass(TextInputFormat.class);
setOutputFormatClass(TextOutputFormat.class);
addInputPath(作业,新路径(args[0]);
//setOutputPath(作业,新路径(args[1]);
//addInputPath(作业,新路径(“/home/cloudera/Desktop/assign2”);
setOutputPath(作业,新路径(args[1]);
//setOutputPath(作业,新路径(“user/cloudera/output”);
job.waitForCompletion(true);
}
}

所以,我的第一个问题是我不知道如何在映射器中转换日期,以及如何找到2个键并显示在输出中。我的意思是如何重写这个代码


谢谢你的帮助

好的,看来你有很多问题。我马上想到两个:

  • 您的映射器输出键是
    'Combiner\u output'
    。这行不通。你希望这把钥匙是什么,可能是城市的名字。在你的例子中,“卡尔加里”。这很容易使用
    value.toString().split(',')[0]
    (即从
    字符上拆分
    value
    后形成的列表中获取第一个元素)
  • 你的代码根本没有输出城市名称。通过执行
    context.write(新文本(key.toString()+“平均值和最小值,最大值为”)、新文本(Average.toString()+”和“+min.toString()+”和“+Max.toString()))在减速器中,其中
    是从上一点开始的城市名称
关于如何在Java中从字符串中提取日期,请查看以下文章:


一般来说,我建议您从什么是Mapreduce开始,它的设计权衡,以及如何在Hadoop架构的范围内充分利用它。

您的问题并不完全清楚。因此,我的假设如下:

  • 你有收藏吗
    String line = value.toString();
    String[] tokens = line.split(",");
    String[] date = tokens[2].split("-");
    String year = date[0];
    String location = tokens[0];
    
    Text locationYear = new Text(location + " " + year);
    
    ArrayListOfDoublesWritable readings = new ArrayListOfDoublesWritable()
    for(int i = 5; i < tokens.length(); i++)
    {
      readings.add(Double.parseDouble(tokens[i]));
    }
    
    context.write(locationYear, readings);