Hadoop:实现自定义FileInputFormat类时需要帮助
我正在尝试使用hadoop实现一些Map/Reduce作业,用于大学作业。但目前,我在实现自定义FileInputFormat类以将文件中的全部内容放入映射器时完全陷入了困境 我从“hadoop:the Financial guide”(hadoop:the Financial guide)中选取了这个示例,没有做任何更改。我可以编译我的源代码,但如果我运行它,它将抛出此异常(目前我在debian 5.0上使用hadoop 1.0.2) 线程“main”中的异常java.lang.RuntimeException:java.lang.NoSuchMethodException:org.myorg.ExampleFileInputFormat$WholeFileInputFormat。() 位于org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115) 位于org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575) 位于org.apache.hadoop.mapred.JobClient.writeldsplits(JobClient.java:989) 位于org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981) 位于org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:174) 位于org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:897) 位于org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850) 位于java.security.AccessController.doPrivileged(本机方法) 位于javax.security.auth.Subject.doAs(Subject.java:396) 位于org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093) 位于org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850) 位于org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:824) 位于org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1261) 在org.myorg.ExampleFileInputFormat.run(ExampleFileInputFormat.java:163) 位于org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) 位于org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79) 位于org.myorg.ExampleFileInputFormat.main(ExampleFileInputFormat.java:172) 在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处 位于sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)中 位于java.lang.reflect.Method.invoke(Method.java:597) 位于org.apache.hadoop.util.RunJar.main(RunJar.java:156) 原因:java.lang.NoSuchMethodException:org.myorg.ExampleFileInputFormat$WholeFileInputFormat。() 位于java.lang.Class.getConstructor0(Class.java:2706) 位于java.lang.Class.getDeclaredConstructor(Class.java:1985) 位于org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109) ... 还有21个 我有点沮丧,因为我不明白发生了什么,而且我在网络搜索中找不到任何东西。也许你们中的一些人可以看看我的消息来源。为了调试的目的,它现在被剥离了Hadoop:实现自定义FileInputFormat类时需要帮助,hadoop,Hadoop,我正在尝试使用hadoop实现一些Map/Reduce作业,用于大学作业。但目前,我在实现自定义FileInputFormat类以将文件中的全部内容放入映射器时完全陷入了困境 我从“hadoop:the Financial guide”(hadoop:the Financial guide)中选取了这个示例,没有做任何更改。我可以编译我的源代码,但如果我运行它,它将抛出此异常(目前我在debian 5.0上使用hadoop 1.0.2) 线程“main”中的异常java.lang.Runtime
package org.myorg;
/*
*
*
*/
导入java.io.IOException;
导入java.util.*;
导入java.io.*;
导入java.util.regex.Pattern;
导入java.util.regex.Matcher;
导入org.apache.hadoop.fs.*;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.conf.*;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.io.*;
导入org.apache.hadoop.mapred.*;
导入org.apache.hadoop.mapred.TextInputFormat;
导入org.apache.hadoop.mapred.FileInputFormat;
导入org.apache.hadoop.util.*;
导入org.apache.hadoop.util.Tool;
导入org.apache.hadoop.util.ToolRunner;
导入org.apache.hadoop.conf.Configured;
公共类ExampleFileInputFormat扩展配置的实现工具{
/*
*
*/
公共类WholeFileInputFormat扩展FileInputFormat{
@凌驾
受保护的布尔isSplitable(文件系统fs,路径文件名){
返回false;
}
@凌驾
公共RecordReader getRecordReader(InputSplit split、JobConf作业、Reporter Reporter)抛出IOException{
返回新的WholeFileRecordReader((FileSplit)拆分,作业);
}
}
公共类WholeFileRecordReader实现RecordReader{
私有文件分割文件分割;
私有配置配置;
私有布尔值=false;
公共WholeFileRecordReader(FileSplit FileSplit,配置配置)引发IOException{
this.fileSplit=fileSplit;
this.conf=conf;
}
@凌驾
公共NullWritable createKey(){
返回NullWritable.get();
}
@凌驾
公共字节可写createValue(){
返回新的BytesWritable();
}
@凌驾
public long getPos()引发IOException{
返回已处理?fileSplit.getLength():0;
}
@凌驾
公共浮点getProgress()引发IOException{
已处理退货?1.0f:0.0f;
}
@凌驾
公共布尔next(NullWritable键,BytesWritable值)引发IOException{
如果(!已处理){
byte[]contents=新字节[(int)fileSplit.getLength()];
Path file=fileSplit.getPath();
FileSystem fs=file.getFileSystem(conf);
FSDataInputStream in=null;
试一试{
in=fs.open(文件);
IOUtils.readFully(in,contents,0,contents.length);
value.set(contents,0,contents.length);
}最后{
IOUtils.closeStream(in);
}
已处理=真;
返回true;
}
返回false;
}
@凌驾
public void close()引发IOException{
//无所事事
}
}
/* */
/*
* :
* */
公共静态类ExampleMap扩展MapReduceBase实现映射器{
私有最终静态IntWritable one=新的IntWritable(1);
私有文本字=新文本();
公共无效映射(可空写密钥,
Exception in thread "main" java.lang.RuntimeException: java.lang.NoSuchMethodException: org.myorg.ExampleFileInputFormat$WholeFileInputFormat.<init>()
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115)
at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:575)
at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:989)
at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:981)
at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:174)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:897)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1093)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:824)
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1261)
at org.myorg.ExampleFileInputFormat.run(ExampleFileInputFormat.java:163)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
at org.myorg.ExampleFileInputFormat.main(ExampleFileInputFormat.java:172)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.NoSuchMethodException: org.myorg.ExampleFileInputFormat$WholeFileInputFormat.<init>()
at java.lang.Class.getConstructor0(Class.java:2706)
at java.lang.Class.getDeclaredConstructor(Class.java:1985)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109)
... 21 more
package org.myorg;
/*
*
*
*/
import java.io.IOException;
import java.util.*;
import java.io.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.util.*;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configured;
public class ExampleFileInputFormat extends Configured implements Tool {
/*
* <generics>
*/
public class WholeFileInputFormat extends FileInputFormat<NullWritable, BytesWritable> {
@Override
protected boolean isSplitable(FileSystem fs, Path filename) {
return false;
}
@Override
public RecordReader<NullWritable, BytesWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
return new WholeFileRecordReader((FileSplit) split, job);
}
}
public class WholeFileRecordReader implements RecordReader<NullWritable, BytesWritable> {
private FileSplit fileSplit;
private Configuration conf;
private boolean processed = false;
public WholeFileRecordReader(FileSplit fileSplit, Configuration conf) throws IOException {
this.fileSplit = fileSplit;
this.conf = conf;
}
@Override
public NullWritable createKey() {
return NullWritable.get();
}
@Override
public BytesWritable createValue() {
return new BytesWritable();
}
@Override
public long getPos() throws IOException {
return processed ? fileSplit.getLength() : 0;
}
@Override
public float getProgress() throws IOException {
return processed ? 1.0f : 0.0f;
}
@Override
public boolean next(NullWritable key, BytesWritable value) throws IOException {
if (!processed) {
byte[] contents = new byte[(int) fileSplit.getLength()];
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream in = null;
try {
in = fs.open(file);
IOUtils.readFully(in, contents, 0, contents.length);
value.set(contents, 0, contents.length);
} finally {
IOUtils.closeStream(in);
}
processed = true;
return true;
}
return false;
}
@Override
public void close() throws IOException {
// do nothing
}
}
/* </generics> */
/*
* <Task1>:
* */
public static class ExampleMap extends MapReduceBase implements Mapper<NullWritable, BytesWritable, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(NullWritable key, BytesWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
output.collect(new Text("test"), one);
}
}
public static class ExampleReduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
/* </Task1> */
/*
* <run>
**/
public int run(String[] args) throws Exception {
if (args.length != 3) {
printUsage();
return 1;
}
String useCase = args[0];
String inputPath = args[1];
String outputPath = args[2];
deleteOldOutput(outputPath);
JobConf conf = new JobConf(ExampleFileInputFormat.class);
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
FileInputFormat.setInputPaths(conf, new Path(inputPath));
/* conf: Task1 */
if (useCase.equals("cc_p")) {
conf.setJobName("WordCount");
/* Output: Key:Text -> Value:Integer */
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setOutputFormat(TextOutputFormat.class);
/* Input: Key.Text -> Value:Text */
conf.setInputFormat(WholeFileInputFormat.class);
conf.setMapperClass(ExampleMap.class);
conf.setReducerClass(ExampleReduce.class);
}
/* default-option: Exit */
else {
printUsage();
return 1;
}
JobClient.runJob(conf);
return 0;
}
/* </run> */
/*
* <Main>
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new ExampleFileInputFormat(), args);
System.exit(res);
}
/* </Main> */
/*
* <Helper>
*/
private void printUsage() {
System.out.println("usage: [usecase] [input-path] [output-path]");
return;
}
private void deleteOldOutput(String outputPath) throws IOException {
// Delete the output directory if it exists already
Path outputDir = new Path(outputPath);
FileSystem.get(getConf()).delete(outputDir, true);
}
/* </Helper-> */
}
public static class WholeFileInputFormat extends FileInputFormat<NullWritable, BytesWritable> {