hadoop CustomInputFormat未被调用
我已经编写了一个自定义输入格式,并在job中配置了它。仍然没有调用inputformat。我保留了一些SOP,以便在运行代码时打印,但没有一个正在打印。即使我在driver类中注释自定义inputformat,输出仍然保持不变。我在哪里失踪 驾驶舱hadoop CustomInputFormat未被调用,hadoop,mapreduce,Hadoop,Mapreduce,我已经编写了一个自定义输入格式,并在job中配置了它。仍然没有调用inputformat。我保留了一些SOP,以便在运行代码时打印,但没有一个正在打印。即使我在driver类中注释自定义inputformat,输出仍然保持不变。我在哪里失踪 驾驶舱 public class TestDriver { public static void main(String args[]) throws IOException, InterruptedException, ClassNotFound
public class TestDriver {
public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException{
Configuration conf = new Configuration();
Job job = new Job(conf,"Custom Format");
job.setMapperClass(CustomInputFormatmapper.class);
job.setReducerClass(CustomInputFormatReducer.class);
job.setInputFormatClass(CustomInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.getConfiguration().set("fs.file.impl", "com.learn.WinLocalFileSystem");
String inputPath="In\\VISA_Details.csv";
Path inPath=new Path(inputPath);
String outputPath = "C:\\Users\\Desktop\\Hadoop learning\\output\\run1";
Path outPath=new Path(outputPath);
FileInputFormat.setInputPaths(job, inPath );
FileOutputFormat.setOutputPath(job, outPath);
System.out.println(job.waitForCompletion(true));
}
}
自定义输入格式
自定义记录阅读器
import java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.FSDataInputStream;
导入org.apache.hadoop.fs.FileSystem;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.io.compress.CompressionCodec;
导入org.apache.hadoop.io.compress.CompressionCodecFactory;
导入org.apache.hadoop.mapred.FileSplit;
导入org.apache.hadoop.mapreduce.InputSplit;
导入org.apache.hadoop.mapreduce.RecordReader;
导入org.apache.hadoop.mapreduce.TaskAttemptContext;
导入org.apache.hadoop.util.LineReader;
公共类CustomRecordReader扩展了RecordReader{
专用压缩编解码器工厂压缩编解码器;
私有final int NLINESTOPROCESS=3;
私人长期启动;
私人长pos;
私人长尾;
专用线路阅读器;
私有整数maxLineLength;
私钥;
私有文本值;
@凌驾
public void close()引发IOException{
//TODO自动生成的方法存根
}
@凌驾
公共对象getCurrentKey()引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回null;
}
@凌驾
公共对象getCurrentValue()引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回null;
}
@凌驾
public float getProgress()引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回0;
}
@凌驾
公共void初始化(InputSplit InputSplit,TaskAttemptContext TaskAttemptContext)
抛出IOException、InterruptedException{
System.out.println(“------------内部初始化:这不是打印------------”;
FileSplit split=(FileSplit)inputsplit;
配置作业=taskattemptcontext.getConfiguration();
maxLineLength=job.getInt(“mapred.linerecordreader.maxlength”,2147483647);
start=split.getStart();
end=start+split.getLength();
路径文件=split.getPath();
compressionCodecs=新的CompressionCodecFactory(作业);
CompressionCodec codec=compressionCodecs.getCodec(文件);
FileSystem fs=file.getFileSystem(作业);
FSDataInputStream fileIn=fs.open(split.getPath());
布尔skipFirstLine=false;
如果(编解码器!=null)
{
in=新的LineReader(codec.createInputStream(fileIn),作业);
末端=9223372036854775807L;
}否则
{
如果(启动!=0L)
{
skipFirstLine=true;
开始--;
fileIn.seek(开始);
}
in=新的行读取器(文件输入,作业);
}
if(skipFirstLine)
start+=in.readLine(newtext(),0,(int)Math.min(2147483647L,end-start));
pos=开始;
}
@凌驾
公共布尔值nextKeyValue()引发IOException、InterruptedException{
System.out.println(“------------INSIDE-nextKeyValue()--------------”;
if(key==null){
key=新的LongWritable();
}
如果(值==null){
值=新文本();
}
按键设置(pos);
value.clear();
最终文本换行符=新文本(“\n”);
Text newVal=新文本();
int newSize=0;
对于(int i=0;i我正在回答我自己的问题,因为这将帮助其他人解决我面临的问题。我导入的软件包有一个问题。
提到我犯的错误
CUSTOMINPUTFORMAT类
1) 缺少@Override注释
2) 从import org.apache.hadoop.mapred.InputSplit导入,而不是从org.apache.hadoop.mapreduce.InputSplit导入
自定义记录阅读器
1) 导入是从org.apache.hadoop.mapred.*完成的,而不是从org.apache.hadoop.mapreduce.*完成的;我假设您认为它不起作用,因为您没有看到打印语句?@DebD,您是否尝试过放置job.setjarByClass(TestDriver.class)
?@irW:我在eclipse中运行它,而不是在VM中运行。所以应该显示SOP语句。@SSaikia_JtheRocker:我不是在作业中运行它,所以我不需要放那个东西。即使我放了那个东西,也不会选择自定义的inputformat。也许你正在尝试的windows文件系统出了问题,它是我不熟悉的e代码?你能给我们看一下这次运行的日志吗?
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
public class CustomInputFormat extends TextInputFormat{
public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context)
{
System.out.println(" ------------ INSIDE createRecordReader()--------------");
return new CustomRecordReader();
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.LineReader;
public class CustomRecordReader extends RecordReader {
private CompressionCodecFactory compressionCodecs;
private final int NLINESTOPROCESS = 3;
private long start;
private long pos;
private long end;
private LineReader in;
private int maxLineLength;
private LongWritable key;
private Text value;
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
}
@Override
public Object getCurrentKey() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return null;
}
@Override
public Object getCurrentValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return null;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return 0;
}
@Override
public void initialize(InputSplit inputsplit,TaskAttemptContext taskattemptcontext)
throws IOException, InterruptedException {
System.out.println(" ---------- INSIDE INITILISE: THIS IS NOT PRINTING----------");
FileSplit split = (FileSplit)inputsplit;
Configuration job = taskattemptcontext.getConfiguration();
maxLineLength = job.getInt("mapred.linerecordreader.maxlength", 2147483647);
start = split.getStart();
end = start + split.getLength();
Path file = split.getPath();
compressionCodecs = new CompressionCodecFactory(job);
CompressionCodec codec = compressionCodecs.getCodec(file);
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
boolean skipFirstLine = false;
if(codec != null)
{
in = new LineReader(codec.createInputStream(fileIn), job);
end = 9223372036854775807L;
} else
{
if(start != 0L)
{
skipFirstLine = true;
start--;
fileIn.seek(start);
}
in = new LineReader(fileIn, job);
}
if(skipFirstLine)
start += in.readLine(new Text(), 0, (int)Math.min(2147483647L, end - start));
pos = start;
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
System.out.println(" ---------- INSIDE nextKeyValue()------------");
if(key==null){
key = new LongWritable();
}
if(value==null){
value = new Text();
}
key.set(pos);
value.clear();
final Text newLine = new Text("\n");
Text newVal = new Text();
int newSize = 0;
for(int i =0;i<NLINESTOPROCESS;i++){
Text v = new Text();
while(pos<end){
newSize = in.readLine(v, maxLineLength,Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),maxLineLength));
value.append(v.getBytes(),0, v.getLength());
value.append(newLine.getBytes(),0, newLine.getLength());
if (newSize == 0) {
break;
}
pos += newSize;
if (newSize < maxLineLength) {
break;
}
}
}
return false;
}
}
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CustomInputFormatmapper extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
public void map(LongWritable key, Text val, Context context)throws IOException, InterruptedException{
String value = val.toString();
String[] totalRows = value.split("\n");
int count =totalRows.length;
context.write(new LongWritable(Long.valueOf(count)), new LongWritable(1L));
}
}
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class CustomInputFormatReducer extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable> {
public void reduce(LongWritable key, Iterable<LongWritable> val, Context context) throws IOException, InterruptedException{
System.out.println(" --------REDUCER--------");
long count =0;
for(LongWritable vals: val){
count++;
}
context.write(key, new LongWritable(count));
}
}