Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java MapReduce:计算产品的平均评分和评论总数_Java_Hadoop_Mapreduce - Fatal编程技术网

Java MapReduce:计算产品的平均评分和评论总数

Java MapReduce:计算产品的平均评分和评论总数,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我正在练习MapReduce,我有一个Amazon.tsv文件,其中有一个产品评级的评论列表。1产品有很多评论,每次评论都有一个评级。评论还包含其他数据,如用户id、产品名称、评论标题等。我想在这个文件上使用MapReduce生成3列输出:产品ID、评论总数和产品的平均评级 链接到我用于测试的文件:(它是示例\u us.tsv) 到目前为止,我已经写了以下内容,但我有几个错误。请让我知道,如果有任何修复您看到或更好的逻辑,可以实现相同的目标。顺便说一句,我一直在使用Hadoop 制图员: pa

我正在练习MapReduce,我有一个Amazon.tsv文件,其中有一个产品评级的评论列表。1产品有很多评论,每次评论都有一个评级。评论还包含其他数据,如用户id、产品名称、评论标题等。我想在这个文件上使用MapReduce生成3列输出:产品ID、评论总数和产品的平均评级

链接到我用于测试的文件:(它是示例\u us.tsv

到目前为止,我已经写了以下内容,但我有几个错误。请让我知道,如果有任何修复您看到或更好的逻辑,可以实现相同的目标。顺便说一句,我一直在使用Hadoop

制图员:

package stubs;
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class ReviewMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{

  @Override
  public void map(LongWritable key, Text value, Context context)
    throws IOException, InterruptedException
  {

      int productIndex = 3; //index for productID
      int ratingIndex = 7; //index for ratingID

      String input = value.toString();
      String [] line = input.split("\\t");

      String productID = line[productIndex];
      String ratingVal = line[ratingIndex];


      if((productID.length() > 0) && (ratingVal.length() == 1))
      {
         int starRating = Integer.valueOf(ratingVal);
         context.write(new Text(productID), new IntWritable(starRating));
      }
  }
}
包存根;
导入java.io.IOException;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Mapper;
公共类ReviewMapper扩展映射器
{
@凌驾
公共void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException
{
int productIndex=3;//productID的索引
int ratingIndex=7;//ratingID的索引
字符串输入=value.toString();
String[]line=input.split(\\t”);
字符串productID=行[productIndex];
字符串ratingVal=行[ratingIndex];
如果((productID.length()>0)和(&(ratingVal.length()==1))
{
int starRating=Integer.valueOf(ratingVal);
write(新文本(productID),新intwriteable(starRating));
}
}
}
然后我的减速机:

package stubs;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class ReviewReducer extends Reducer<Text, IntWritable, Text, Text> {

  @Override
  public void reduce(Text key, Iterable<IntWritable> values, Context context)
    throws IOException, InterruptedException
  {
      int reviewCount = 0;
      int combineRating = 0;
      for(IntWritable value : values)
      {
          reviewCount++;
          combineRating += value.get();
      }

      int avgRating = (combineRating/reviewCount);
      String reviews = Integer.toString(reviewCount);
      String ratings = Integer.toString(avgRating);
      String result = reviews+ "\t" +ratings;

      context.write(key,  new Text(result));
  }
}
包存根;
导入java.io.IOException;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Reducer;
公共类ReviewReducer扩展了Reducer{
@凌驾
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException
{
int reviewCount=0;
int=0;
for(可写入值:值)
{
reviewCount++;
组合+=value.get();
}
int avgRating=(合并/审核计数);
String reviews=Integer.toString(reviewCount);
字符串额定值=整数.toString(AVG);
字符串结果=评论+“\t”+评分;
编写(键,新文本(结果));
}
}
最后,司机:

package stubs;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class AvgRatingReviews {

  public static void main(String[] args) throws Exception {

    if (args.length != 2) {
    System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
    System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(AvgRatingReviews.class);  
    job.setJobName("Review Results");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job,  new Path(args[1]));

    job.setMapperClass(ReviewMapper.class);
    job.setReducerClass(ReviewReducer.class);

    job.setOutputKeyClass(Text.class);;
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    System.exit(success ? 0 : 1);
  }
}
包存根;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
公开课评论{
公共静态void main(字符串[]args)引发异常{
如果(参数长度!=2){
System.out.printf(“用法:AvgWordLength\n”);
系统退出(-1);
}
作业=新作业();
job.setJarByClass(AvgRatingReviews.class);
job.setJobName(“审核结果”);
setInputPaths(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
job.setMapperClass(ReviewMapper.class);
job.setReducerClass(ReviewReducer.class);
job.setOutputKeyClass(Text.class);;
job.setOutputValueClass(Text.class);
布尔值success=job.waitForCompletion(true);
系统退出(成功?0:1);
}
}

您能否编辑问题以包含您遇到的错误?是否需要使用mapreduce?Spark、Pig或Hive需要更少的代码