Hadoop 读取txt文件并写入HBase
我正在尝试从一个txt文件读取数据并写入HBaseHadoop 读取txt文件并写入HBase,hadoop,hbase,Hadoop,Hbase,我正在尝试从一个txt文件读取数据并写入HBase Job Class Job job = new Job(conf, "HWriterJob"); job.setJarByClass(HWriterJob.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); job.setMapperClass(TokenizerMapper.class);
Job Class
Job job = new Job(conf, "HWriterJob");
job.setJarByClass(HWriterJob.class);
FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
job.setMapperClass(TokenizerMapper.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
TableMapReduceUtil.initTableReducerJob(table,null,job);
Mapper Class
@Override
public void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer st = new StringTokenizer(line, "|");
String result[] = new String[st.countTokens()];
int i = 0;
while (st.hasMoreTokens()) {
result[i] = st.nextToken();
i++;
}
Map<ImmutableBytesWritable,Put> resultSet = writeToHBase(result);
for (Map.Entry<ImmutableBytesWritable,Put> entry : resultSet.entrySet()) {
context.write(new Text(entry.getValue().getRow()), entry.getValue());
}
}
Reducer Class
public void reduce(Text key, Iterable<Put> values, Context context)
throws IOException, InterruptedException {
for (Put val : values) {
context.write(key, val);
}
}
作业类
作业作业=新作业(conf,“HWriterJob”);
job.setJarByClass(hwritejob.class);
setInputPaths(作业,新路径(其他参数[0]);
setMapperClass(TokenizerMapper.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
TableMapReduceUtil.initTableReducerJob(表,空,作业);
映射器类
@凌驾
公共无效映射(文本键、文本值、上下文)
抛出IOException、InterruptedException{
字符串行=value.toString();
StringTokenizer st=新的StringTokenizer(行“|”);
字符串结果[]=新字符串[st.countTokens()];
int i=0;
而(st.hasMoreTokens()){
结果[i]=st.nextToken();
i++;
}
Map resultSet=writeToHBase(结果);
对于(Map.Entry:resultSet.entrySet()){
write(新文本(entry.getValue().getRow()),entry.getValue());
}
}
减速器类
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException{
for(Put val:值){
context.write(key,val);
}
}
但我也没有成功
我得到以下错误java.lang.ClassCastException:org.apache.hadoop.io.LongWritable不能强制转换为org.apache.hadoop.io.Text显然MR默认为LongWritable作为MapOutputKeyClass,在您的情况下,它应该是文本,因此出现错误 尝试设置作业.setMapOutputKeyClass(Text.class)和 还可以适当地设置job.setMapOutputValueClass。
@job Class
@Job Class
Job job = new Job(conf, "HWriterJob");
job.setJarByClass(HWriterJob.class);
FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
job.setMapperClass(TokenizerMapper.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.setNumReduceTasks(0);
System.exit(job.waitForCompletion(true) ? 0 : 1);
@Mapper
Map<ImmutableBytesWritable,Put> resultSet = writeToHBase(result);
for (Map.Entry<ImmutableBytesWritable,Put> entry : resultSet.entrySet()) {
context.write(entry.getKey(), entry.getValue());
}
作业作业=新作业(conf,“HWriterJob”);
job.setJarByClass(hwritejob.class);
setInputPaths(作业,新路径(其他参数[0]);
setMapperClass(TokenizerMapper.class);
setInputPath(作业,新路径(args[0]);
setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
setOutputFormatClass(TableOutputFormat.class);
job.setNumReduceTasks(0);
系统退出(作业等待完成(真)?0:1;
@制图员
Map resultSet=writeToHBase(结果);
对于(Map.Entry:resultSet.entrySet()){
write(entry.getKey(),entry.getValue());
}