Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java Hadoop MapReduce多值_Java_Hadoop_Mapreduce - Fatal编程技术网

Java Hadoop MapReduce多值

Java Hadoop MapReduce多值,java,hadoop,mapreduce,Java,Hadoop,Mapreduce,我试着做一个电影推荐系统,一直在关注这个网站 是否可以使用Hadoop Map和Reduce将上述代码转换为Java? userid作为键 按用户分级的电影数量,分级编号计数,(电影ID,电影分级)作为值。 谢谢大家! 是的,您可以将其转换为map reduce程序 映射器逻辑: 假设输入的格式为(用户ID、电影ID、电影分级)(例如17,70,3),您可以在逗号(,)上拆分每一行,并将“用户ID”作为键,将(电影ID、电影分级)作为值。例如,对于记录:(17,70,3),可以发出键:(17)和

我试着做一个电影推荐系统,一直在关注这个网站

是否可以使用Hadoop Map和Reduce将上述代码转换为Java?
userid
作为键
按用户分级的电影数量,分级编号计数,(电影ID,电影分级)
作为值。
谢谢大家!

是的,您可以将其转换为map reduce程序

映射器逻辑:

  • 假设输入的格式为(用户ID、电影ID、电影分级)(例如17,70,3),您可以在逗号(,)上拆分每一行,并将“用户ID”作为键,将(电影ID、电影分级)作为值。例如,对于记录:(17,70,3),可以发出键:(17)和值:(70,3)
  • 减速器逻辑:

  • 您将保留3个变量:movieCount(整数)、movieRatingCount(整数)、movieValues(字符串)
  • 对于每个值,您需要解析该值并获得“电影分级”。例如,对于值(70,3),您将解析电影分级=3

  • 对于每个有效记录,您将增加movieCount。您将把解析后的“电影评级”添加到“movieRatingCount”中,并将该值附加到“movieValues”字符串中

  • 您将获得所需的输出

    下面是实现这一点的代码:

    package com.myorg.hadooptests;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class MovieRatings {
    
    
        public static class MovieRatingsMapper
                extends Mapper<LongWritable, Text , IntWritable, Text>{
    
            public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
                String valueStr = value.toString();
                int index = valueStr.indexOf(',');
    
                if(index != -1) {
                    try
                    {
                        IntWritable keyUserID = new IntWritable(Integer.parseInt(valueStr.substring(0, index)));
                        context.write(keyUserID, new Text(valueStr.substring(index + 1)));
                    }
                    catch(Exception e)
                    {
                        // You could get a NumberFormatException
                    }
                }
            }
        }
    
        public static class MovieRatingsReducer
                extends Reducer<IntWritable, Text, IntWritable, Text> {
    
            public void reduce(IntWritable key, Iterable<Text> values,
                               Context context) throws IOException, InterruptedException {
    
                int movieCount = 0;
                int movieRatingCount = 0;
                String movieValues = "";
    
                for (Text value : values) {
                    String[] tokens = value.toString().split(",");
                    if(tokens.length == 2)
                    {
                        movieRatingCount += Integer.parseInt(tokens[1].trim()); // You could get a NumberFormatException
                        movieCount++;
                        movieValues = movieValues.concat(value.toString() + " ");
                    }
                }
    
                context.write(key, new Text(Integer.toString(movieCount) + "," + Integer.toString(movieRatingCount) + ",(" + movieValues.trim() + ")"));
            }
        }
    
        public static void main(String[] args) throws Exception {
    
            Configuration conf = new Configuration();
    
            Job job = Job.getInstance(conf, "CompositeKeyExample");
            job.setJarByClass(MovieRatings.class);
            job.setMapperClass(MovieRatingsMapper.class);
            job.setReducerClass(MovieRatingsReducer.class);
    
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path("/in/in2.txt"));
            FileOutputFormat.setOutputPath(job, new Path("/out/"));
    
            System.exit(job.waitForCompletion(true) ? 0:1);
    
        }
    }
    
    我得到了输出:

    17      1,3,(70,3)
    35      1,1,(21,1)
    49      3,7,(70,4 21,1  19,2)
    87      2,3,(21,2 19,1)
    98      1,2,(19,2)
    

    是的,您可以将其转换为map reduce程序

    映射器逻辑:

  • 假设输入的格式为(用户ID、电影ID、电影分级)(例如17,70,3),您可以在逗号(,)上拆分每一行,并将“用户ID”作为键,将(电影ID、电影分级)作为值。例如,对于记录:(17,70,3),可以发出键:(17)和值:(70,3)
  • 减速器逻辑:

  • 您将保留3个变量:movieCount(整数)、movieRatingCount(整数)、movieValues(字符串)
  • 对于每个值,您需要解析该值并获得“电影分级”。例如,对于值(70,3),您将解析电影分级=3

  • 对于每个有效记录,您将增加movieCount。您将把解析后的“电影评级”添加到“movieRatingCount”中,并将该值附加到“movieValues”字符串中

  • 您将获得所需的输出

    下面是实现这一点的代码:

    package com.myorg.hadooptests;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class MovieRatings {
    
    
        public static class MovieRatingsMapper
                extends Mapper<LongWritable, Text , IntWritable, Text>{
    
            public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
                String valueStr = value.toString();
                int index = valueStr.indexOf(',');
    
                if(index != -1) {
                    try
                    {
                        IntWritable keyUserID = new IntWritable(Integer.parseInt(valueStr.substring(0, index)));
                        context.write(keyUserID, new Text(valueStr.substring(index + 1)));
                    }
                    catch(Exception e)
                    {
                        // You could get a NumberFormatException
                    }
                }
            }
        }
    
        public static class MovieRatingsReducer
                extends Reducer<IntWritable, Text, IntWritable, Text> {
    
            public void reduce(IntWritable key, Iterable<Text> values,
                               Context context) throws IOException, InterruptedException {
    
                int movieCount = 0;
                int movieRatingCount = 0;
                String movieValues = "";
    
                for (Text value : values) {
                    String[] tokens = value.toString().split(",");
                    if(tokens.length == 2)
                    {
                        movieRatingCount += Integer.parseInt(tokens[1].trim()); // You could get a NumberFormatException
                        movieCount++;
                        movieValues = movieValues.concat(value.toString() + " ");
                    }
                }
    
                context.write(key, new Text(Integer.toString(movieCount) + "," + Integer.toString(movieRatingCount) + ",(" + movieValues.trim() + ")"));
            }
        }
    
        public static void main(String[] args) throws Exception {
    
            Configuration conf = new Configuration();
    
            Job job = Job.getInstance(conf, "CompositeKeyExample");
            job.setJarByClass(MovieRatings.class);
            job.setMapperClass(MovieRatingsMapper.class);
            job.setReducerClass(MovieRatingsReducer.class);
    
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path("/in/in2.txt"));
            FileOutputFormat.setOutputPath(job, new Path("/out/"));
    
            System.exit(job.waitForCompletion(true) ? 0:1);
    
        }
    }
    
    我得到了输出:

    17      1,3,(70,3)
    35      1,1,(21,1)
    49      3,7,(70,4 21,1  19,2)
    87      2,3,(21,2 19,1)
    98      1,2,(19,2)
    

    您希望输出的具体内容是什么?基本上就像上面的示例一样<代码>17 1,3,(70,3)
    用户ID,按用户分级的电影,分级编号计数,(movieid,movie rating
    很抱歉。不清楚您期望的输入和输出是什么。如果您只是希望输出与输入相等,那么为什么需要MapReduce?抱歉。输入是
    用户ID,movieid,ratings
    ,因此我想计算一个用户分级的电影数量。例如,如果输入是s(userid,movie id,movie rating)=(17,70,3),那么输出将是(userid,按用户分级的电影数量,分级数计数,(movie id,movie ratings)=(17,1,3,(70,3))您希望输出的是什么?基本上就像上面的示例。
    17 1,3,(70,3)
    userid,按用户分级的电影,分级数计数,(movieid,movie rating
    很抱歉。不清楚您期望的输入和输出是什么。如果您只是希望输出与输入相等,那么为什么需要MapReduce?抱歉。输入是
    用户ID,movieid,ratings
    ,因此我想计算一个用户分级的电影数量。例如,如果输入是s(userid,movie id,movie rating)=(17,70,3),那么输出将是(userid,用户分级的电影数量,分级数计数,(movie id,movie ratings)=(17,1,3,(70,3))