Java Hadoop MapReduce多值
我试着做一个电影推荐系统,一直在关注这个网站 是否可以使用Hadoop Map和Reduce将上述代码转换为Java?Java Hadoop MapReduce多值,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我试着做一个电影推荐系统,一直在关注这个网站 是否可以使用Hadoop Map和Reduce将上述代码转换为Java? userid作为键 按用户分级的电影数量,分级编号计数,(电影ID,电影分级)作为值。 谢谢大家! 是的,您可以将其转换为map reduce程序 映射器逻辑: 假设输入的格式为(用户ID、电影ID、电影分级)(例如17,70,3),您可以在逗号(,)上拆分每一行,并将“用户ID”作为键,将(电影ID、电影分级)作为值。例如,对于记录:(17,70,3),可以发出键:(17)和
userid
作为键按用户分级的电影数量,分级编号计数,(电影ID,电影分级)
作为值。
谢谢大家! 是的,您可以将其转换为map reduce程序 映射器逻辑:
package com.myorg.hadooptests;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MovieRatings {
public static class MovieRatingsMapper
extends Mapper<LongWritable, Text , IntWritable, Text>{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String valueStr = value.toString();
int index = valueStr.indexOf(',');
if(index != -1) {
try
{
IntWritable keyUserID = new IntWritable(Integer.parseInt(valueStr.substring(0, index)));
context.write(keyUserID, new Text(valueStr.substring(index + 1)));
}
catch(Exception e)
{
// You could get a NumberFormatException
}
}
}
}
public static class MovieRatingsReducer
extends Reducer<IntWritable, Text, IntWritable, Text> {
public void reduce(IntWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
int movieCount = 0;
int movieRatingCount = 0;
String movieValues = "";
for (Text value : values) {
String[] tokens = value.toString().split(",");
if(tokens.length == 2)
{
movieRatingCount += Integer.parseInt(tokens[1].trim()); // You could get a NumberFormatException
movieCount++;
movieValues = movieValues.concat(value.toString() + " ");
}
}
context.write(key, new Text(Integer.toString(movieCount) + "," + Integer.toString(movieRatingCount) + ",(" + movieValues.trim() + ")"));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "CompositeKeyExample");
job.setJarByClass(MovieRatings.class);
job.setMapperClass(MovieRatingsMapper.class);
job.setReducerClass(MovieRatingsReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("/in/in2.txt"));
FileOutputFormat.setOutputPath(job, new Path("/out/"));
System.exit(job.waitForCompletion(true) ? 0:1);
}
}
我得到了输出:
17 1,3,(70,3)
35 1,1,(21,1)
49 3,7,(70,4 21,1 19,2)
87 2,3,(21,2 19,1)
98 1,2,(19,2)
是的,您可以将其转换为map reduce程序 映射器逻辑:
package com.myorg.hadooptests;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MovieRatings {
public static class MovieRatingsMapper
extends Mapper<LongWritable, Text , IntWritable, Text>{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String valueStr = value.toString();
int index = valueStr.indexOf(',');
if(index != -1) {
try
{
IntWritable keyUserID = new IntWritable(Integer.parseInt(valueStr.substring(0, index)));
context.write(keyUserID, new Text(valueStr.substring(index + 1)));
}
catch(Exception e)
{
// You could get a NumberFormatException
}
}
}
}
public static class MovieRatingsReducer
extends Reducer<IntWritable, Text, IntWritable, Text> {
public void reduce(IntWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
int movieCount = 0;
int movieRatingCount = 0;
String movieValues = "";
for (Text value : values) {
String[] tokens = value.toString().split(",");
if(tokens.length == 2)
{
movieRatingCount += Integer.parseInt(tokens[1].trim()); // You could get a NumberFormatException
movieCount++;
movieValues = movieValues.concat(value.toString() + " ");
}
}
context.write(key, new Text(Integer.toString(movieCount) + "," + Integer.toString(movieRatingCount) + ",(" + movieValues.trim() + ")"));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "CompositeKeyExample");
job.setJarByClass(MovieRatings.class);
job.setMapperClass(MovieRatingsMapper.class);
job.setReducerClass(MovieRatingsReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("/in/in2.txt"));
FileOutputFormat.setOutputPath(job, new Path("/out/"));
System.exit(job.waitForCompletion(true) ? 0:1);
}
}
我得到了输出:
17 1,3,(70,3)
35 1,1,(21,1)
49 3,7,(70,4 21,1 19,2)
87 2,3,(21,2 19,1)
98 1,2,(19,2)
您希望输出的具体内容是什么?基本上就像上面的示例一样<代码>17 1,3,(70,3)
用户ID,按用户分级的电影,分级编号计数,(movieid,movie rating
很抱歉。不清楚您期望的输入和输出是什么。如果您只是希望输出与输入相等,那么为什么需要MapReduce?抱歉。输入是用户ID,movieid,ratings
,因此我想计算一个用户分级的电影数量。例如,如果输入是s(userid,movie id,movie rating)=(17,70,3),那么输出将是(userid,按用户分级的电影数量,分级数计数,(movie id,movie ratings)=(17,1,3,(70,3))您希望输出的是什么?基本上就像上面的示例。17 1,3,(70,3)
userid,按用户分级的电影,分级数计数,(movieid,movie rating
很抱歉。不清楚您期望的输入和输出是什么。如果您只是希望输出与输入相等,那么为什么需要MapReduce?抱歉。输入是用户ID,movieid,ratings
,因此我想计算一个用户分级的电影数量。例如,如果输入是s(userid,movie id,movie rating)=(17,70,3),那么输出将是(userid,用户分级的电影数量,分级数计数,(movie id,movie ratings)=(17,1,3,(70,3))