Hadoop:实现自定义FileInputFormat类时需要帮助

Hadoop:实现自定义FileInputFormat类时需要帮助,hadoop,Hadoop,我正在尝试使用hadoop实现一些Map/Reduce作业,用于大学作业。但目前,我在实现自定义FileInputFormat类以将文件中的全部内容放入映射器时完全陷入了困境 我从“hadoop:the Financial guide”(hadoop:the Financial guide)中选取了这个示例,没有做任何更改。我可以编译我的源代码,但如果我运行它,它将抛出此异常(目前我在debian 5.0上使用hadoop 1.0.2) 线程“main”中的异常java.lang.Runtime

我正在尝试使用hadoop实现一些Map/Reduce作业,用于大学作业。但目前,我在实现自定义FileInputFormat类以将文件中的全部内容放入映射器时完全陷入了困境

我从“hadoop:the Financial guide”(hadoop:the Financial guide)中选取了这个示例,没有做任何更改。我可以编译我的源代码,但如果我运行它,它将抛出此异常(目前我在debian 5.0上使用hadoop 1.0.2)

线程“main”中的异常java.lang.RuntimeException:java.lang.NoSuchMethodException:org.myorg.ExampleFileInputFormat$WholeFileInputFormat。() 位于org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115) 位于org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575) 位于org.apache.hadoop.mapred.JobClient.writeldsplits(JobClient.java:989) 位于org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981) 位于org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:174) 位于org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:897) 位于org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850) 位于java.security.AccessController.doPrivileged(本机方法) 位于javax.security.auth.Subject.doAs(Subject.java:396) 位于org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093) 位于org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850) 位于org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:824) 位于org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1261) 在org.myorg.ExampleFileInputFormat.run(ExampleFileInputFormat.java:163) 位于org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) 位于org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79) 位于org.myorg.ExampleFileInputFormat.main(ExampleFileInputFormat.java:172) 在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处 位于sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)中 位于java.lang.reflect.Method.invoke(Method.java:597) 位于org.apache.hadoop.util.RunJar.main(RunJar.java:156) 原因:java.lang.NoSuchMethodException:org.myorg.ExampleFileInputFormat$WholeFileInputFormat。() 位于java.lang.Class.getConstructor0(Class.java:2706) 位于java.lang.Class.getDeclaredConstructor(Class.java:1985) 位于org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109) ... 还有21个 我有点沮丧,因为我不明白发生了什么,而且我在网络搜索中找不到任何东西。也许你们中的一些人可以看看我的消息来源。为了调试的目的,它现在被剥离了


package org.myorg;
/*
* 
* 
*/
导入java.io.IOException;
导入java.util.*;
导入java.io.*;
导入java.util.regex.Pattern;
导入java.util.regex.Matcher;
导入org.apache.hadoop.fs.*;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.conf.*;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.io.*;
导入org.apache.hadoop.mapred.*;
导入org.apache.hadoop.mapred.TextInputFormat;
导入org.apache.hadoop.mapred.FileInputFormat;
导入org.apache.hadoop.util.*;
导入org.apache.hadoop.util.Tool;
导入org.apache.hadoop.util.ToolRunner;
导入org.apache.hadoop.conf.Configured;
公共类ExampleFileInputFormat扩展配置的实现工具{
/*
*  
*/
公共类WholeFileInputFormat扩展FileInputFormat{
@凌驾
受保护的布尔isSplitable(文件系统fs,路径文件名){
返回false;
}
@凌驾
公共RecordReader getRecordReader(InputSplit split、JobConf作业、Reporter Reporter)抛出IOException{
返回新的WholeFileRecordReader((FileSplit)拆分,作业);
}
}
公共类WholeFileRecordReader实现RecordReader{
私有文件分割文件分割;
私有配置配置;
私有布尔值=false;
公共WholeFileRecordReader(FileSplit FileSplit,配置配置)引发IOException{
this.fileSplit=fileSplit;
this.conf=conf;
}
@凌驾
公共NullWritable createKey(){
返回NullWritable.get();
}
@凌驾
公共字节可写createValue(){
返回新的BytesWritable();
}
@凌驾
public long getPos()引发IOException{
返回已处理?fileSplit.getLength():0;
}
@凌驾
公共浮点getProgress()引发IOException{
已处理退货?1.0f:0.0f;
}
@凌驾
公共布尔next(NullWritable键,BytesWritable值)引发IOException{
如果(!已处理){
byte[]contents=新字节[(int)fileSplit.getLength()];
Path file=fileSplit.getPath();
FileSystem fs=file.getFileSystem(conf);
FSDataInputStream in=null;
试一试{
in=fs.open(文件);
IOUtils.readFully(in,contents,0,contents.length);
value.set(contents,0,contents.length);
}最后{
IOUtils.closeStream(in);
}
已处理=真;
返回true;
}
返回false;
}
@凌驾
public void close()引发IOException{
//无所事事
}
}
/*  */
/* 
* : 
* */
公共静态类ExampleMap扩展MapReduceBase实现映射器{
私有最终静态IntWritable one=新的IntWritable(1);
私有文本字=新文本();
公共无效映射(可空写密钥,
Exception in thread "main" java.lang.RuntimeException: java.lang.NoSuchMethodException: org.myorg.ExampleFileInputFormat$WholeFileInputFormat.<init>()
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115)
    at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)
    at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:989)
    at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981)
    at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:174)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:897)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093)
    at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850)
    at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:824)
    at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1261)
    at org.myorg.ExampleFileInputFormat.run(ExampleFileInputFormat.java:163)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
    at org.myorg.ExampleFileInputFormat.main(ExampleFileInputFormat.java:172)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.NoSuchMethodException: org.myorg.ExampleFileInputFormat$WholeFileInputFormat.<init>()
    at java.lang.Class.getConstructor0(Class.java:2706)
    at java.lang.Class.getDeclaredConstructor(Class.java:1985)
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109)
    ... 21 more
package org.myorg;
/*
 * 
 * 
 */
import java.io.IOException;
import java.util.*;
import java.io.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.util.*;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configured;


public class ExampleFileInputFormat extends Configured implements Tool {

     /*
      *  <generics>
      */
    public class WholeFileInputFormat extends FileInputFormat<NullWritable, BytesWritable> {

        @Override
        protected boolean isSplitable(FileSystem fs, Path filename) {
            return false;
        }

        @Override
        public RecordReader<NullWritable, BytesWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
            return new WholeFileRecordReader((FileSplit) split, job);   
        }
    }

    public class WholeFileRecordReader implements RecordReader<NullWritable, BytesWritable> {

        private FileSplit fileSplit;
        private Configuration conf;
        private boolean processed = false;

        public WholeFileRecordReader(FileSplit fileSplit, Configuration conf) throws IOException {
            this.fileSplit = fileSplit;
            this.conf = conf;
        }

        @Override
        public NullWritable createKey() {
            return NullWritable.get();
        }

        @Override
        public BytesWritable createValue() {
            return new BytesWritable();
        }

        @Override
        public long getPos() throws IOException {
            return processed ? fileSplit.getLength() : 0;
        }

        @Override
        public float getProgress() throws IOException {
            return processed ? 1.0f : 0.0f;
        }

        @Override
        public boolean next(NullWritable key, BytesWritable value) throws IOException {
            if (!processed) {
                  byte[] contents = new byte[(int) fileSplit.getLength()];
                  Path file = fileSplit.getPath();
                  FileSystem fs = file.getFileSystem(conf);
                  FSDataInputStream in = null;
                  try {
                    in = fs.open(file);
                    IOUtils.readFully(in, contents, 0, contents.length);
                    value.set(contents, 0, contents.length);
                  } finally {
                    IOUtils.closeStream(in);
                  }
                  processed = true;
                  return true;
            }
            return false;
        }

        @Override
        public void close() throws IOException {
            // do nothing
        }
    }
      /* </generics> */


    /* 
     * <Task1>: 
     * */
    public static class ExampleMap extends MapReduceBase implements Mapper<NullWritable, BytesWritable, Text, IntWritable> {
     private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();
      public void map(NullWritable key, BytesWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
          output.collect(new Text("test"), one);
        }
      }
    public static class ExampleReduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
      public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
        int sum = 0;
        while (values.hasNext()) {
          sum += values.next().get();
        }
        output.collect(key, new IntWritable(sum));
      }
    }
    /* </Task1> */
    /*
     * <run>
     **/
    public int run(String[] args) throws Exception {

        if (args.length != 3) {
            printUsage();
            return 1;
        }

        String useCase = args[0];
        String inputPath = args[1];
        String outputPath = args[2];

        deleteOldOutput(outputPath);        


        JobConf conf = new JobConf(ExampleFileInputFormat.class);
        FileOutputFormat.setOutputPath(conf, new Path(outputPath));      
        FileInputFormat.setInputPaths(conf, new Path(inputPath));

        /* conf: Task1 */
        if (useCase.equals("cc_p")) {
            conf.setJobName("WordCount");
            /* Output: Key:Text -> Value:Integer */
            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(IntWritable.class);
            conf.setOutputFormat(TextOutputFormat.class);
            /* Input: Key.Text -> Value:Text */
            conf.setInputFormat(WholeFileInputFormat.class);
            conf.setMapperClass(ExampleMap.class);
            conf.setReducerClass(ExampleReduce.class);
        }
        /* default-option: Exit */
        else {
            printUsage();
            return 1;
        }

        JobClient.runJob(conf);
        return 0;
    }
    /* </run> */

    /*
     * <Main>
     */
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new ExampleFileInputFormat(), args);
        System.exit(res);
    }
    /* </Main> */

    /* 
    * <Helper>
    */
    private void printUsage() {
        System.out.println("usage: [usecase] [input-path] [output-path]");
        return;
    }

    private void deleteOldOutput(String outputPath) throws IOException {
        // Delete the output directory if it exists already
        Path outputDir = new Path(outputPath);
        FileSystem.get(getConf()).delete(outputDir, true);
    }
    /* </Helper->   */
}
public static class WholeFileInputFormat extends FileInputFormat<NullWritable, BytesWritable> {