Hadoop completebulkload提供RetriesHaustedException
我用importtsv命令导入了一个8GB的csv文件。然后我运行这个命令Hadoop completebulkload提供RetriesHaustedException,hadoop,hbase,Hadoop,Hbase,我用importtsv命令导入了一个8GB的csv文件。然后我运行这个命令 ./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb 过了一会儿,它在下面给出了这个错误 ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server or
./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb
过了一会儿,它在下面给出了这个错误
ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=14, exceptions:
Wed Oct 09 22:59:34 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36234 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:00:35 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36283 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:01:37 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36325 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:02:38 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hbase.RegionTooBusyException: failed to get a lock in 60000ms
at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5889)
at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5875)
at org.apache.hadoop.hbase.regionserver.HRegion.startBulkRegionOperation(HRegion.java:5834)
at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3628)
at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3611)
at org.apache.hadoop.hbase.regionserver.HRegionServer.bulkLoadHFiles(HRegionServer.java:2930)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1426)
Wed Oct 09 23:03:40 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36381 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:04:42 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36419 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:05:46 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36448 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:06:51 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36480 remote=localhost/127.0.0.1:50334]
如何克服这个问题?就在昨天,经过一些努力,我能够使用MapReduce成功地生成HFiles,并使用LoadIncrementalHFiles以编程方式加载到HBase中。所以我希望我能在这里帮助你 你能先试试这些东西吗
如果这仍然不能解决问题,请提供更多日志供我检查。这是我用于导入csv文件的代码示例
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class SampleUploader {
private static final String NAME = "SampleUploader";
static class Uploader extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
private long checkpoint = 100;
private long count = 0;
@Override
public void map(LongWritable key, Text line, Context context)
throws IOException {
// Split CSV line
String[] values = line.toString().split(",");
String rowStr = values[0].replaceAll("\"", "");
String titleStr = values[1].replaceAll("\"", "");
String bodyStr = values[2].replaceAll("\"", "");
String tagsStr = values[3].replaceAll("\"", "");
// Extract each value
byte[] row = Bytes.toBytes(rowStr.trim());
byte[] title = Bytes.toBytes(titleStr);
byte[] body = Bytes.toBytes(bodyStr);
byte[] tags = Bytes.toBytes(tagsStr);
Put put = new Put(row);
try {
put.add(Bytes.toBytes("st"), Bytes.toBytes("TITLE"), title);
put.add(Bytes.toBytes("st"), Bytes.toBytes("BODY"), body);
put.add(Bytes.toBytes("st"), Bytes.toBytes("TAGS"), tags);
} catch (Exception e1) {
System.out.println("PUT EXC");
e1.printStackTrace();
}
// Uncomment below to disable WAL. This will improve performance but
// means
// you will experience data loss in the case of a RegionServer
// crash.
// put.setWriteToWAL(false);
try {
context.write(new ImmutableBytesWritable(row), put);
} catch (InterruptedException e) {
System.out.println("WRITE EXC");
e.printStackTrace();
}
// Set status every checkpoint lines
if (++count % checkpoint == 0) {
context.setStatus("Emitting Put " + count);
}
}
}
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf) throws IOException {
Path inputPath = new Path("/home/coder/Downloads/Train3.csv");
String tableName = "sodata";
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Uploader.class);
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.master", "localhost:54310");
conf.set("hbase.client.write.buffer", "1000000000"); //This is set high not to miss any line due to memory restrictions.
Job job = configureJob(conf);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import java.io.IOException;
导入org.apache.hadoop.conf.Configuration;
导入org.apache.hadoop.fs.Path;
导入org.apache.hadoop.hbase.HBaseConfiguration;
导入org.apache.hadoop.hbase.client.Put;
导入org.apache.hadoop.hbase.KeyValue;
导入org.apache.hadoop.hbase.io.ImmutableBytesWritable;
导入org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
导入org.apache.hadoop.hbase.util.Bytes;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Job;
导入org.apache.hadoop.mapreduce.Mapper;
导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
导入org.apache.hadoop.util.GenericOptionsParser;
公共类样本上传器{
私有静态最终字符串NAME=“SampleUploader”;
静态类上载程序扩展
制图员{
专用长检查点=100;
私有长计数=0;
@凌驾
公共void映射(可长写键、文本行、上下文)
抛出IOException{
//分割CSV行
字符串[]值=line.toString().split(“,”);
字符串rowStr=值[0]。replaceAll(“\”,“);
字符串titleStr=值[1]。replaceAll(“\”,”);
字符串bodyStr=值[2]。replaceAll(“\”,“);
字符串tagsStr=values[3]。replaceAll(“\”,”);
//提取每个值
byte[]行=Bytes.toBytes(rowStr.trim());
字节[]标题=字节.字节(标题);
byte[]body=Bytes.toBytes(bodyStr);
字节[]标记=字节.toBytes(tagsStr);
Put Put=新Put(行);
试一试{
put.add(Bytes.toBytes(“st”)、Bytes.toBytes(“TITLE”)、TITLE);
put.add(Bytes.toBytes(“st”)、Bytes.toBytes(“BODY”)、BODY;
添加(Bytes.toBytes(“st”)、Bytes.toBytes(“TAGS”)、TAGS);
}捕获(异常e1){
系统输出打印项次(“输出EXC”);
e1.printStackTrace();
}
//取消下面的注释以禁用WAL。这将提高性能,但
//意味着
//在RegionServer的情况下,您将遇到数据丢失
//撞车。
//put.setWriteToWAL(false);
试一试{
write(新的ImmutableBytesWritable(行),put);
}捕捉(中断异常e){
System.out.println(“WRITE EXC”);
e、 printStackTrace();
}
//设置每个检查点行的状态
如果(++计数%检查点==0){
context.setStatus(“发出Put”+计数);
}
}
}
/**
*作业配置。
*/
公共静态作业配置作业(配置配置)引发IOException{
路径输入路径=新路径(“/home/coder/Downloads/Train3.csv”);
字符串tableName=“sodata”;
作业作业=新作业(conf,NAME+“”+tableName);
setJarByClass(Uploader.class);
setInputPath(作业,inputPath);
setInputFormatClass(TextInputFormat.class);
setMapperClass(Uploader.class);
TableMapReduceUtil.initTableReducerJob(tableName,null,job);
job.setNumReduceTasks(0);
返回工作;
}
公共静态void main(字符串[]args)引发异常{
Configuration=HBaseConfiguration.create();
conf.set(“hbase.master”、“localhost:54310”);
conf.set(“hbase.client.write.buffer”,“100000000”);//该值设置得很高,不会由于内存限制而漏掉任何一行。
作业作业=配置作业(conf);
系统退出(作业等待完成(真)?0:1;
}
}
我选择了另一种方式并离开了命令行方式。现在我已经通过编程完成了。