hadoop CustomInputFormat未被调用_Hadoop_Mapreduce

hadoop CustomInputFormat未被调用

hadoop mapreduce

hadoop CustomInputFormat未被调用,hadoop,mapreduce,Hadoop,Mapreduce,我已经编写了一个自定义输入格式，并在job中配置了它。仍然没有调用inputformat。我保留了一些SOP，以便在运行代码时打印，但没有一个正在打印。即使我在driver类中注释自定义inputformat，输出仍然保持不变。我在哪里失踪驾驶舱 public class TestDriver { public static void main(String args[]) throws IOException, InterruptedException, ClassNotFound

我已经编写了一个自定义输入格式，并在job中配置了它。仍然没有调用inputformat。我保留了一些SOP，以便在运行代码时打印，但没有一个正在打印。即使我在driver类中注释自定义inputformat，输出仍然保持不变。我在哪里失踪

驾驶舱

public class TestDriver {

    public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException{

        Configuration conf = new Configuration();
        Job job = new Job(conf,"Custom Format");
        job.setMapperClass(CustomInputFormatmapper.class);
        job.setReducerClass(CustomInputFormatReducer.class);
        job.setInputFormatClass(CustomInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.getConfiguration().set("fs.file.impl", "com.learn.WinLocalFileSystem");
        String inputPath="In\\VISA_Details.csv";
        Path inPath=new Path(inputPath);
        String outputPath = "C:\\Users\\Desktop\\Hadoop learning\\output\\run1";
        Path outPath=new Path(outputPath);

        FileInputFormat.setInputPaths(job, inPath );
        FileOutputFormat.setOutputPath(job, outPath);

        System.out.println(job.waitForCompletion(true));


    }
}

自定义输入格式自定义记录阅读器

import java.io.IOException；
导入org.apache.hadoop.conf.Configuration；
导入org.apache.hadoop.fs.FSDataInputStream；
导入org.apache.hadoop.fs.FileSystem；
导入org.apache.hadoop.fs.Path；
导入org.apache.hadoop.io.LongWritable；
导入org.apache.hadoop.io.Text；
导入org.apache.hadoop.io.compress.CompressionCodec；
导入org.apache.hadoop.io.compress.CompressionCodecFactory；
导入org.apache.hadoop.mapred.FileSplit；
导入org.apache.hadoop.mapreduce.InputSplit；
导入org.apache.hadoop.mapreduce.RecordReader；
导入org.apache.hadoop.mapreduce.TaskAttemptContext；
导入org.apache.hadoop.util.LineReader；
公共类CustomRecordReader扩展了RecordReader{
专用压缩编解码器工厂压缩编解码器；
私有final int NLINESTOPROCESS=3；
私人长期启动；
私人长pos；
私人长尾；
专用线路阅读器；
私有整数maxLineLength；
私钥；
私有文本值；
@凌驾
public void close（）引发IOException{
//TODO自动生成的方法存根
}
@凌驾
公共对象getCurrentKey（）引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回null；
}
@凌驾
公共对象getCurrentValue（）引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回null；
}
@凌驾
public float getProgress（）引发IOException、InterruptedException{
//TODO自动生成的方法存根
返回0；
}
@凌驾
公共void初始化（InputSplit InputSplit，TaskAttemptContext TaskAttemptContext）
抛出IOException、InterruptedException{
System.out.println（“------------内部初始化：这不是打印------------”；
FileSplit split=（FileSplit）inputsplit；
配置作业=taskattemptcontext.getConfiguration（）；
maxLineLength=job.getInt（“mapred.linerecordreader.maxlength”，2147483647）；
start=split.getStart（）；
end=start+split.getLength（）；
路径文件=split.getPath（）；
compressionCodecs=新的CompressionCodecFactory（作业）；
CompressionCodec codec=compressionCodecs.getCodec（文件）；
FileSystem fs=file.getFileSystem（作业）；
FSDataInputStream fileIn=fs.open（split.getPath（））；
布尔skipFirstLine=false；
如果（编解码器！=null）
{
in=新的LineReader（codec.createInputStream（fileIn），作业）；
末端=9223372036854775807L；
}否则
{
如果（启动！=0L）
{
skipFirstLine=true；
开始--；
fileIn.seek（开始）；
}
in=新的行读取器（文件输入，作业）；
}
if（skipFirstLine）
start+=in.readLine（newtext（），0，（int）Math.min（2147483647L，end-start））；
pos=开始；
}
@凌驾
公共布尔值nextKeyValue（）引发IOException、InterruptedException{
System.out.println（“------------INSIDE-nextKeyValue（）--------------”；
if（key==null）{
key=新的LongWritable（）；
}
如果（值==null）{
值=新文本（）；
}
按键设置（pos）；
value.clear（）；
最终文本换行符=新文本（“\n”）；
Text newVal=新文本（）；
int newSize=0；
对于（int i=0；i我正在回答我自己的问题，因为这将帮助其他人解决我面临的问题。我导入的软件包有一个问题。
提到我犯的错误
CUSTOMINPUTFORMAT类
1） 缺少@Override注释
2） 从import org.apache.hadoop.mapred.InputSplit导入，而不是从org.apache.hadoop.mapreduce.InputSplit导入
自定义记录阅读器
1） 导入是从org.apache.hadoop.mapred.*完成的，而不是从org.apache.hadoop.mapreduce.*完成的；
我假设您认为它不起作用，因为您没有看到打印语句？@DebD，您是否尝试过放置job.setjarByClass（TestDriver.class）？@irW:我在eclipse中运行它，而不是在VM中运行。所以应该显示SOP语句。@SSaikia_JtheRocker:我不是在作业中运行它，所以我不需要放那个东西。即使我放了那个东西，也不会选择自定义的inputformat。也许你正在尝试的windows文件系统出了问题，它是我不熟悉的e代码？你能给我们看一下这次运行的日志吗？
    import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class CustomInputFormat extends TextInputFormat{

    public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context)
    {
        System.out.println(" ------------ INSIDE createRecordReader()--------------");
        return new CustomRecordReader();
    }
}

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.LineReader;

public class CustomRecordReader extends RecordReader {

    private CompressionCodecFactory compressionCodecs;
    private final int NLINESTOPROCESS = 3;
    private long start;
    private long pos;
    private long end;
    private LineReader in;
    private int maxLineLength;
    private LongWritable key;
    private Text value;

    @Override
    public void close() throws IOException {
        // TODO Auto-generated method stub

    }

    @Override
    public Object getCurrentKey() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return null;
    }

    @Override
    public Object getCurrentValue() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return null;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        return 0;
    }

    @Override
    public void initialize(InputSplit inputsplit,TaskAttemptContext taskattemptcontext) 
            throws IOException, InterruptedException {
        System.out.println(" ---------- INSIDE INITILISE:  THIS IS NOT PRINTING----------");
        FileSplit split = (FileSplit)inputsplit;
        Configuration job = taskattemptcontext.getConfiguration();
        maxLineLength = job.getInt("mapred.linerecordreader.maxlength", 2147483647);
        start = split.getStart();
        end = start + split.getLength();
        Path file = split.getPath();
        compressionCodecs = new CompressionCodecFactory(job);
        CompressionCodec codec = compressionCodecs.getCodec(file);
        FileSystem fs = file.getFileSystem(job);
        FSDataInputStream fileIn = fs.open(split.getPath());
        boolean skipFirstLine = false;
        if(codec != null)
        {
            in = new LineReader(codec.createInputStream(fileIn), job);
            end = 9223372036854775807L;
        } else
        {
            if(start != 0L)
            {
                skipFirstLine = true;
                start--;
                fileIn.seek(start);
            }
            in = new LineReader(fileIn, job);
        }
        if(skipFirstLine)
            start += in.readLine(new Text(), 0, (int)Math.min(2147483647L, end - start));
        pos = start;

    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {

        System.out.println(" ---------- INSIDE nextKeyValue()------------");
        if(key==null){
            key = new LongWritable();
        }
        if(value==null){
            value = new Text();
        }
        key.set(pos);
        value.clear();

        final Text newLine = new Text("\n");
        Text newVal = new Text();
         int newSize = 0;

        for(int i =0;i<NLINESTOPROCESS;i++){
             Text v = new Text();

             while(pos<end){
                 newSize = in.readLine(v, maxLineLength,Math.max((int)Math.min(Integer.MAX_VALUE, end-pos),maxLineLength));
                 value.append(v.getBytes(),0, v.getLength());
                 value.append(newLine.getBytes(),0, newLine.getLength());

                 if (newSize == 0) {
                        break;
                    }
                    pos += newSize;
                    if (newSize < maxLineLength) {
                        break;
                    }

             }
        }


        return false;
    }

}

    import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class CustomInputFormatmapper extends Mapper<LongWritable, Text, LongWritable, LongWritable> {

    public void map(LongWritable key, Text val, Context context)throws IOException, InterruptedException{

        String value = val.toString();
        String[] totalRows = value.split("\n");
        int count =totalRows.length;

        context.write(new LongWritable(Long.valueOf(count)), new LongWritable(1L));

    }
}

    import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class CustomInputFormatReducer extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable> {

    public void reduce(LongWritable key, Iterable<LongWritable> val, Context context) throws IOException, InterruptedException{
        System.out.println(" --------REDUCER--------");
        long count =0;
        for(LongWritable vals: val){
            count++;
        }
        context.write(key, new LongWritable(count));
    }

}