FileInputFormat，其中文件名为键，文本内容为值_File_Input_Map_Format_Hadoop

FileInputFormat，其中文件名为键，文本内容为值

file input map hadoop

FileInputFormat，其中文件名为键，文本内容为值,file,input,map,format,hadoop,File,Input,Map,Format,Hadoop,我想将整个文件用作MAP处理的单个记录，文件名作为键。我读过以下帖子：虽然顶级答案的理论是可靠的，但实际上没有提供任何代码或“操作方法” 这是我的自定义FileInputFormat和相应的RecordReader，它们可以编译，但不会生成任何记录数据。谢谢你的帮助 public class CommentsInput extends FileInputFormat<Text,Text> { protected boolean isSplitable(FileSyste

我想将整个文件用作

MAP

处理的单个记录，文件名作为键。
我读过以下帖子：
虽然顶级答案的理论是可靠的，但实际上没有提供任何代码或“操作方法”

这是我的自定义

FileInputFormat

和相应的

RecordReader

，它们可以编译，但不会生成任何记录数据。
谢谢你的帮助

public class CommentsInput
    extends FileInputFormat<Text,Text> {
protected boolean isSplitable(FileSystem fs, Path filename)
{
    return false;
}
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext ctx)
        throws IOException, InterruptedException {
    return new CommentFileRecordReader((FileSplit) split, ctx.getConfiguration());
}

公共类注释输入
扩展FileInputFormat{
受保护的布尔isSplitable（文件系统fs，路径文件名）
{
返回false；
}
@凌驾
公共RecordReader createRecordReader（InputSplit拆分，TaskAttemptContext ctx）
抛出IOException、InterruptedException{
返回新的CommentFileRecordReader（（FileSplit）split，ctx.getConfiguration（））；
}

/////////////////////////

public class CommentFileRecordReader
    extends RecordReader<Text,Text> {
private InputStream in;
private long start;
private long length;
private long position;
private Text key;
private Text value;
private boolean processed;
private FileSplit fileSplit;
private Configuration conf;

public CommentFileRecordReader(FileSplit fileSplit, Configuration conf) throws IOException
{
    this.fileSplit = fileSplit;
    this.conf=conf;
}

/** Boilerplate initialization code for file input streams. */
@Override
public void initialize(InputSplit split,
                     TaskAttemptContext context)
                        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    fileSplit = (FileSplit) split;
    this.start = fileSplit.getStart();
    this.length = fileSplit.getLength();
    this.position = 0;
    this.processed = false;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream in = fs.open(path);

    CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecs.getCodec(path);
    if (codec != null)
        this.in = codec.createInputStream(in);
    else
        this.in = in;

    // If using Writables:
    // key = new Text();
    // value = new Text();
}
public boolean next(Text key, Text value) throws IOException
{
    if(!processed)
    {
        key = new Text(fileSplit.getPath().toString());
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;
        byte[] contents = new byte[(int) fileSplit.getLength()];
        try
        {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents.toString());
        }
        finally
        {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

@Override
public boolean nextKeyValue() throws IOException {
    // TODO parse the next key value, update position and return true.
    return false;
}

@Override
public Text getCurrentKey() {
    return key;
}

@Override
public Text getCurrentValue() {
    return value;
}

/** Returns our progress within the split, as a float between 0 and 1. */
@Override
public float getProgress() {
    if (length == 0)
        return 0.0f;
    return Math.min(1.0f, position / (float)length);
}

@Override
public void close() throws IOException {
    if (in != null)
        in.close();
}
}

公共类CommentFileRecordReader
扩展RecordReader{
私有输入流；
私人长期启动；
私人长；
私人多头头寸；
私钥；
私有文本值；
私有布尔处理；
私有文件分割文件分割；
私有配置配置；
公共CommentFileRecordReader（FileSplit FileSplit，配置配置）引发IOException
{
this.fileSplit=fileSplit；
this.conf=conf；
}
/**文件输入流的样板初始化代码*/
@凌驾
公共无效初始化（InputSplit split，
TaskAttemptContext（上下文）
抛出IOException、InterruptedException{
conf=context.getConfiguration（）；
fileSplit=（fileSplit）拆分；
this.start=fileSplit.getStart（）；
this.length=fileSplit.getLength（）；
这个位置=0；
此参数=假；
Path Path=fileSplit.getPath（）；
FileSystem fs=path.getFileSystem（conf）；
FSDataInputStream in=fs.open（路径）；
CompressionCodecFactory codecs=新的CompressionCodecFactory（conf）；
CompressionCodec codec=codecs.getCodec（路径）；
如果（编解码器！=null）
this.in=codec.createInputStream（in）；
其他的
this.in=in；
//如果使用可写内容：
//键=新文本（）；
//值=新文本（）；
}
公共布尔next（文本键、文本值）引发IOException
{
如果（！已处理）
{
key=新文本（fileSplit.getPath（）.toString（））；
Path file=fileSplit.getPath（）；
FileSystem fs=file.getFileSystem（conf）；
FSDataInputStream in=null；
byte[]contents=新字节[（int）fileSplit.getLength（）]；
尝试
{
in=fs.open（文件）；
IOUtils.readFully（in，contents，0，contents.length）；
value.set（contents.toString（））；
}
最后
{
IOUtils.closeStream（in）；
}
已处理=真；
返回true；
}
返回false；
}
@凌驾
公共布尔值nextKeyValue（）引发IOException{
//TODO解析下一个键值，更新位置并返回true。
返回false；
}
@凌驾
公共文本getCurrentKey（）{
返回键；
}
@凌驾
公共文本getCurrentValue（）{
返回值；
}
/**以介于0和1之间的浮点形式返回分割内的进度*/
@凌驾
公共进度（）{
如果（长度==0）
返回0.0f；
返回数学最小值（1.0f，位置/（浮动）长度）；
}
@凌驾
public void close（）引发IOException{
if（in！=null）
in.close（）；
}
}

您需要找到一种方法来定义自己的密钥类，并确保您的类使用它。您可以查看如何定义自己的密钥类，并且可以通过调用hte

getName（）获得文件名

方法，然后使用它来制作您的密钥。

您是否找到了实现此目的的方法？您是否找到了正确的解决方案？