Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/383.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 映射reduce以生成n个最频繁的单词_Java_Mapreduce - Fatal编程技术网

Java 映射reduce以生成n个最频繁的单词

Java 映射reduce以生成n个最频繁的单词,java,mapreduce,Java,Mapreduce,我有一个MapReduce代码,它可以打印一个单词在文档中出现的次数。我想更改此代码以生成N个最频繁的单词。N是一个输入参数。代码如下: import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable;

我有一个MapReduce代码,它可以打印一个单词在文档中出现的次数。我想更改此代码以生成N个最频繁的单词。N是一个输入参数。代码如下:

    import java.io.IOException;        
    import org.apache.hadoop.conf.Configuration;       
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;        
    import org.apache.hadoop.io.LongWritable;        
    import org.apache.hadoop.io.Text;        
    import org.apache.hadoop.mapreduce.Job;        
    import org.apache.hadoop.mapreduce.Mapper;        
    import org.apache.hadoop.mapreduce.Reducer;        
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;        
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;        
    import org.apache.hadoop.util.GenericOptionsParser;

    public class WordCount {

    public static void main(String [] args) throws Exception

    {

    Configuration c=new Configuration();        
    String[] files=new GenericOptionsParser(c,args).getRemainingArgs();        
    Path input=new Path(files[0]);        
    Path output=new Path(files[1]);        
    Job j=new Job(c,"wordcount");        
    j.setJarByClass(WordCount.class);        
    j.setMapperClass(MapForWordCount.class);        
    j.setReducerClass(ReduceForWordCount.class);        
    j.setOutputKeyClass(Text.class);        
    j.setOutputValueClass(IntWritable.class);        
    FileInputFormat.addInputPath(j, input);        
    FileOutputFormat.setOutputPath(j, output);        
    System.exit(j.waitForCompletion(true)?0:1);

    }

    public static class MapForWordCount extends Mapper<LongWritable, Text, Text, IntWritable>{

    public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException

    {

    String line = value.toString();

    String[] words=line.split("[^\\p{L}0-9]+");

    for(String word: words )

    {
         if(word.length()<=3){
             continue;
         }else{
             Text outputKey = new Text(word.toUpperCase().trim());

              IntWritable outputValue = new IntWritable(1);

              con.write(outputKey, outputValue); 
         }
          } } }

    public static class ReduceForWordCount extends Reducer<Text, IntWritable, Text, IntWritable>
    {

 public void reduce(Text word, Iterable<IntWritable> values, Context con) throws IOException, InterruptedException

    {

    int sum = 0;

       for(IntWritable value : values)

       {

       sum += value.get();

       }

       con.write(word, new IntWritable(sum));

    }

    }

    }
import java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.util.GenericOptionsParser;
公共类字数{
公共静态void main(字符串[]args)引发异常
{
配置c=新配置();
String[]files=新的GenericOptionsParser(c,args).getRemainingArgs();
路径输入=新路径(文件[0]);
路径输出=新路径(文件[1]);
作业j=新作业(c,“字数”);
j、 setJarByClass(WordCount.class);
j、 setMapperClass(MapForWordCount.class);
j、 setReducerClass(reduceForDorCount.class);
j、 setOutputKeyClass(Text.class);
j、 setOutputValueClass(IntWritable.class);
addInputPath(j,输入);
setOutputPath(j,输出);
系统退出(j.waitForCompletion(true)-0:1);
}
公共静态类MapForWordCount扩展映射器{
公共void映射(LongWritable键、文本值、上下文con)引发IOException、InterruptedException
{
字符串行=value.toString();
String[]words=line.split([^\\p{L}0-9]+”;
for(字符串字:字)
{

if(word.length()请重新格式化您的代码。缩进是无法忍受的,而且您使用了太多的空行。为您进行此分析而改变Hadoop行为不是更好,而不是用Java对数据进行后处理吗?同意@tomayotatogy,一个粗略的搜索等等,其他地方建议分别使用Hadoop映射器和还原器,例如,为什么ot使用chain-mapreduce