Hadoop 读取txt文件并写入HBase

Hadoop 读取txt文件并写入HBase,hadoop,hbase,Hadoop,Hbase,我正在尝试从一个txt文件读取数据并写入HBase Job Class Job job = new Job(conf, "HWriterJob"); job.setJarByClass(HWriterJob.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); job.setMapperClass(TokenizerMapper.class);

我正在尝试从一个txt文件读取数据并写入HBase

Job Class
   Job job = new Job(conf, "HWriterJob");
        job.setJarByClass(HWriterJob.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        job.setMapperClass(TokenizerMapper.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);

        TableMapReduceUtil.initTableReducerJob(table,null,job);

Mapper Class
@Override
    public void map(Text key, Text value, Context context)
            throws IOException, InterruptedException {

        String line = value.toString();

        StringTokenizer st = new StringTokenizer(line, "|");
        String result[] = new String[st.countTokens()];
        int i = 0;
        while (st.hasMoreTokens()) {
            result[i] = st.nextToken();
            i++;
        }

        Map<ImmutableBytesWritable,Put> resultSet = writeToHBase(result);

        for (Map.Entry<ImmutableBytesWritable,Put> entry : resultSet.entrySet()) {
            context.write(new Text(entry.getValue().getRow()), entry.getValue());
        }
    }

Reducer Class

public void reduce(Text key, Iterable<Put> values, Context context)
            throws IOException, InterruptedException {

        for (Put val : values) {
            context.write(key, val);
        }
    }
作业类
作业作业=新作业(conf,“HWriterJob”);
job.setJarByClass(hwritejob.class);
setInputPaths(作业,新路径(其他参数[0]);
setMapperClass(TokenizerMapper.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
TableMapReduceUtil.initTableReducerJob(表,空,作业);
映射器类
@凌驾
公共无效映射(文本键、文本值、上下文)
抛出IOException、InterruptedException{
字符串行=value.toString();
StringTokenizer st=新的StringTokenizer(行“|”);
字符串结果[]=新字符串[st.countTokens()];
int i=0;
而(st.hasMoreTokens()){
结果[i]=st.nextToken();
i++;
}
Map resultSet=writeToHBase(结果);
对于(Map.Entry:resultSet.entrySet()){
write(新文本(entry.getValue().getRow()),entry.getValue());
}
}
减速器类
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException{
for(Put val:值){
context.write(key,val);
}
}
但我也没有成功


我得到以下错误java.lang.ClassCastException:org.apache.hadoop.io.LongWritable不能强制转换为org.apache.hadoop.io.Text

显然MR默认为LongWritable作为MapOutputKeyClass,在您的情况下,它应该是文本,因此出现错误

尝试设置作业.setMapOutputKeyClass(Text.class)和 还可以适当地设置job.setMapOutputValueClass。

@job Class
            @Job Class
            Job job = new Job(conf, "HWriterJob");
    job.setJarByClass(HWriterJob.class);
    FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

    job.setMapperClass(TokenizerMapper.class);
    TextInputFormat.setInputPaths(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);
    job.setOutputFormatClass(TableOutputFormat.class);

    job.setNumReduceTasks(0);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
            @Mapper 
            Map<ImmutableBytesWritable,Put> resultSet = writeToHBase(result);

    for (Map.Entry<ImmutableBytesWritable,Put> entry : resultSet.entrySet()) {
        context.write(entry.getKey(), entry.getValue());
    }
作业作业=新作业(conf,“HWriterJob”); job.setJarByClass(hwritejob.class); setInputPaths(作业,新路径(其他参数[0]); setMapperClass(TokenizerMapper.class); setInputPath(作业,新路径(args[0]); setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); setOutputFormatClass(TableOutputFormat.class); job.setNumReduceTasks(0); 系统退出(作业等待完成(真)?0:1; @制图员 Map resultSet=writeToHBase(结果); 对于(Map.Entry:resultSet.entrySet()){ write(entry.getKey(),entry.getValue()); }