Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/hadoop/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Hadoop JobConf类已弃用,需要更新示例_Hadoop_Mapreduce_Cloudera - Fatal编程技术网

Hadoop JobConf类已弃用,需要更新示例

Hadoop JobConf类已弃用,需要更新示例,hadoop,mapreduce,cloudera,Hadoop,Mapreduce,Cloudera,我正在编写hadoop程序,我真的不想使用不推荐的类。 在网上的任何地方,我都找不到更新过的程序 org.apache.hadoop.conf.Configuration 阶级 代替 org.apache.hadoop.mapred.JobConf 班级 public static void main(String[] args) throws Exception { JobConf conf = new JobConf(Test.class); conf.setJob

我正在编写hadoop程序,我真的不想使用不推荐的类。 在网上的任何地方,我都找不到更新过的程序

org.apache.hadoop.conf.Configuration

阶级 代替

org.apache.hadoop.mapred.JobConf

班级

   public static void main(String[] args) throws Exception {
     JobConf conf = new JobConf(Test.class);
     conf.setJobName("TESST");

     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(IntWritable.class);

     conf.setMapperClass(Map.class);
     conf.setCombinerClass(Reduce.class);
     conf.setReducerClass(Reduce.class);

     conf.setInputFormat(TextInputFormat.class);
     conf.setOutputFormat(TextOutputFormat.class);

     FileInputFormat.setInputPaths(conf, new Path(args[0]));
     FileOutputFormat.setOutputPath(conf, new Path(args[1]));

     JobClient.runJob(conf);
   }
这就是我的main()的样子。
任何人都可以为我提供更新的功能。

这里是经典的字数示例。您会注意到可能不需要的其他导入的基调,阅读代码您将了解哪个是哪个

有什么不同?我正在使用工具接口和GenericOptionParser来解析作业命令a.k.a:hadoop jar

在mapper中,您会注意到一个run对象。您可以去掉它,当您为Map方法提供代码时,它通常在默认情况下被调用。我把它放在那里是为了给你信息,你可以进一步控制映射阶段。这都是使用新的API。我希望你觉得它有用。还有什么问题,请告诉我

import java.io.IOException;
import java.util.*;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.GenericOptionsParser;

public class Inception extends Configured implements Tool{

 public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            context.write(word, one);
        }
    }

  public void run (Context context) throws IOException, InterruptedException {
        setup(context);
        while (context.nextKeyValue()) {
              map(context.getCurrentKey(), context.getCurrentValue(), context);
            }
        cleanup(context);
  }
 }

 public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterable<IntWritable> values, Context context) 
      throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable val : values) {
            sum += val.get();
        }
        context.write(key, new IntWritable(sum));
    }
 }

public int run(String[] args) throws Exception {

    Job job = Job.getInstance(new Configuration());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(WordCount.class);

    job.submit();
    return 0;
    }

 public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    ToolRunner.run(new WordCount(), otherArgs);
 }
}
import java.io.IOException;
导入java.util.*;
导入org.apache.commons.io.FileUtils;
导入org.apache.hadoop.conf.*;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.*;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
导入org.apache.hadoop.util.Tool;
导入org.apache.hadoop.util.ToolRunner;
导入org.apache.hadoop.util.GenericOptionsParser;
公共类初始扩展配置的实现工具{
公共静态类映射扩展映射器{
私有最终静态IntWritable one=新的IntWritable(1);
私有文本字=新文本();
公共void映射(LongWritable键、文本值、上下文上下文)引发IOException、InterruptedException{
字符串行=value.toString();
StringTokenizer标记器=新的StringTokenizer(行);
while(tokenizer.hasMoreTokens()){
set(tokenizer.nextToken());
上下文。写(单词,一);
}
}
公共void运行(上下文上下文)引发IOException、InterruptedException{
设置(上下文);
while(context.nextKeyValue()){
映射(context.getCurrentKey(),context.getCurrentValue(),context);
}
清理(上下文);
}
}
公共静态类Reduce扩展Reducer{
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException{
整数和=0;
for(可写入值:值){
sum+=val.get();
}
write(key,newintwriteable(sum));
}
}
公共int运行(字符串[]args)引发异常{
Job Job=Job.getInstance(新配置());
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
setInputFormatClass(TextInputFormat.class);
setOutputFormatClass(TextOutputFormat.class);
setInputPaths(作业,新路径(args[0]);
setOutputPath(作业,新路径(args[1]);
job.setJarByClass(WordCount.class);
job.submit();
返回0;
}
公共静态void main(字符串[]args)引发异常{
Configuration conf=新配置();
String[]otherArgs=新的GenericOptionsParser(conf,args);
运行(new WordCount(),otherArgs);
}
}
还以经典字数为例:

org.apache.hadoop.mapred.JobConf
是旧版本,在新版本中我们使用
Configuration
Job
来实现

请使用
org.apache.hadoop.mapreduce.lib.*
(它是新的API)而不是
org.apache.hadoop.mapred.TextInputFormat
(它是旧的)

Mapper
Reducer
并不是什么新功能,请参见
main
功能,它包括相对全面的配置,可以根据您的具体要求随意更改

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
  private Text outputKey;
  private IntWritable outputVal;

  @Override
  public void setup(Context context) {
    outputKey = new Text();
    outputVal = new IntWritable(1);
  }

  @Override
  public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    StringTokenizer stk = new StringTokenizer(value.toString());
    while(stk.hasMoreTokens()) {
      outputKey.set(stk.nextToken());
      context.write(outputKey, outputVal);
    }
  }
}

class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
  private IntWritable result;

  @Override
  public void setup(Context context) {
    result = new IntWritable();
  }

  @Override
  public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int sum = 0;
    for(IntWritable val: values) {
      sum += val.get();
    }
    result.set(sum);
    context.write(key, result);
  }
}

public class WordCount {
  public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    if(args.length != 2) {
      System.err.println("Usage: <in> <out>");
      System.exit(2);
    }
    Job job = Job.getInstance(conf, "Word Count");

    // set jar
    job.setJarByClass(WordCount.class);

    // set Mapper, Combiner, Reducer
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    /* Optional, set customer defined Partioner:
     * job.setPartitionerClass(MyPartioner.class);
     */

    // set output key
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // set input and output path
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // by default, Hadoop use TextInputFormat and TextOutputFormat
    // any customer defined input and output class must implement InputFormat/OutputFormat interface
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}
import java.io.IOException;
导入java.util.StringTokenizer;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.Reducer;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
导入org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
导入org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
类TokenizerMapper扩展映射器{
私有文本输出密钥;
私有IntWriteOutputVal;
@凌驾
公共无效设置(上下文){
outputKey=新文本();
outputVal=新的可写整数(1);
}
@凌驾
公共void映射(对象键、文本值、上下文上下文)引发IOException、InterruptedException{
StringTokenizer stk=新的StringTokenizer(value.toString());
while(stk.hasMoreTokens()){
outputKey.set(stk.nextToken());
write(outputKey,outputVal);
}
}
}
类IntSumReducer扩展了Reducer{
私有可写结果;
@凌驾
公共无效设置(上下文){
结果=新的IntWritable();
}
@凌驾
公共void reduce(文本键、Iterable值、上下文上下文)引发IOException、InterruptedException{
整数和=0;
for(可写入值:值){
sum+=val.get();
}
结果集(总和);
编写(键、结果);
}
}
公共类字数{
公共静态void main(字符串[]args)引发IOException、ClassNotFoundException、InterruptedException{
配置器