Hadoop completebulkload提供RetriesHaustedException

Hadoop completebulkload提供RetriesHaustedException,hadoop,hbase,Hadoop,Hbase,我用importtsv命令导入了一个8GB的csv文件。然后我运行这个命令 ./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb 过了一会儿,它在下面给出了这个错误 ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server or

我用importtsv命令导入了一个8GB的csv文件。然后我运行这个命令


./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb

过了一会儿,它在下面给出了这个错误


ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=14, exceptions:
Wed Oct 09 22:59:34 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36234 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:00:35 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36283 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:01:37 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36325 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:02:38 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hbase.RegionTooBusyException: failed to get a lock in 60000ms
    at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5889)
    at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5875)
    at org.apache.hadoop.hbase.regionserver.HRegion.startBulkRegionOperation(HRegion.java:5834)
    at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3628)
    at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3611)
    at org.apache.hadoop.hbase.regionserver.HRegionServer.bulkLoadHFiles(HRegionServer.java:2930)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
    at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1426)

Wed Oct 09 23:03:40 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36381 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:04:42 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36419 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:05:46 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36448 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:06:51 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36480 remote=localhost/127.0.0.1:50334]


如何克服这个问题?

就在昨天,经过一些努力,我能够使用MapReduce成功地生成HFiles,并使用LoadIncrementalHFiles以编程方式加载到HBase中。所以我希望我能在这里帮助你

你能先试试这些东西吗

  • 在运行completebulkload之前,请检查输出文件夹中是否已生成HFiles。假设您的输出文件夹是“output”,列族名称是“d”,那么您应该在output/d中包含HFiles/

  • 如果有,则运行completebulkload命令。假设您仍然得到上述异常。检查输出文件夹中是否仍存在HFiles。如果它们不在那里,大多数情况下即使控制台显示异常,数据也会加载到HBase中。检查HBAse表行数

    我建议这样做的原因是,我遇到了一个类似的问题,LoadIncrementalHFiles将文件加载到HBase并将其从输出文件夹中删除,但仍尝试从输出文件夹中读取HFile,这可能是您看到“等待通道准备读取时超时”的原因


  • 如果这仍然不能解决问题,请提供更多日志供我检查。

    这是我用于导入csv文件的代码示例

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.KeyValue;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class SampleUploader {
    
    private static final String NAME = "SampleUploader";
    
    static class Uploader extends
            Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
    
        private long checkpoint = 100;
        private long count = 0;
    
        @Override
        public void map(LongWritable key, Text line, Context context)
                throws IOException {
    
            // Split CSV line
            String[] values = line.toString().split(",");
    
            String rowStr = values[0].replaceAll("\"", "");
            String titleStr = values[1].replaceAll("\"", "");
            String bodyStr = values[2].replaceAll("\"", "");
            String tagsStr = values[3].replaceAll("\"", "");
    
            // Extract each value
            byte[] row = Bytes.toBytes(rowStr.trim());
            byte[] title = Bytes.toBytes(titleStr);
            byte[] body = Bytes.toBytes(bodyStr);
            byte[] tags = Bytes.toBytes(tagsStr);
    
            Put put = new Put(row);
            try {
    
                put.add(Bytes.toBytes("st"), Bytes.toBytes("TITLE"), title);
                put.add(Bytes.toBytes("st"), Bytes.toBytes("BODY"), body);
                put.add(Bytes.toBytes("st"), Bytes.toBytes("TAGS"), tags);
            } catch (Exception e1) {
                System.out.println("PUT EXC");
                e1.printStackTrace();
            }
    
            // Uncomment below to disable WAL. This will improve performance but
            // means
            // you will experience data loss in the case of a RegionServer
            // crash.
            // put.setWriteToWAL(false);
    
            try {
                context.write(new ImmutableBytesWritable(row), put);
            } catch (InterruptedException e) {
                System.out.println("WRITE EXC");
                e.printStackTrace();
            }
    
            // Set status every checkpoint lines
            if (++count % checkpoint == 0) {
                context.setStatus("Emitting Put " + count);
            }
        }
    }
    
    /**
     * Job configuration.
     */
    public static Job configureJob(Configuration conf) throws IOException {
        Path inputPath = new Path("/home/coder/Downloads/Train3.csv");
        String tableName = "sodata";
        Job job = new Job(conf, NAME + "_" + tableName);
        job.setJarByClass(Uploader.class);
        FileInputFormat.setInputPaths(job, inputPath);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(Uploader.class);
    
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
        return job;
    }
    
    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.master", "localhost:54310");
        conf.set("hbase.client.write.buffer", "1000000000"); //This is set high not to miss any line due to memory restrictions.
        Job job = configureJob(conf);
    
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
    }
    
    import java.io.IOException;
    导入org.apache.hadoop.conf.Configuration;
    导入org.apache.hadoop.fs.Path;
    导入org.apache.hadoop.hbase.HBaseConfiguration;
    导入org.apache.hadoop.hbase.client.Put;
    导入org.apache.hadoop.hbase.KeyValue;
    导入org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    导入org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    导入org.apache.hadoop.hbase.util.Bytes;
    导入org.apache.hadoop.io.LongWritable;
    导入org.apache.hadoop.io.Text;
    导入org.apache.hadoop.mapreduce.Job;
    导入org.apache.hadoop.mapreduce.Mapper;
    导入org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    导入org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    导入org.apache.hadoop.util.GenericOptionsParser;
    公共类样本上传器{
    私有静态最终字符串NAME=“SampleUploader”;
    静态类上载程序扩展
    制图员{
    专用长检查点=100;
    私有长计数=0;
    @凌驾
    公共void映射(可长写键、文本行、上下文)
    抛出IOException{
    //分割CSV行
    字符串[]值=line.toString().split(“,”);
    字符串rowStr=值[0]。replaceAll(“\”,“);
    字符串titleStr=值[1]。replaceAll(“\”,”);
    字符串bodyStr=值[2]。replaceAll(“\”,“);
    字符串tagsStr=values[3]。replaceAll(“\”,”);
    //提取每个值
    byte[]行=Bytes.toBytes(rowStr.trim());
    字节[]标题=字节.字节(标题);
    byte[]body=Bytes.toBytes(bodyStr);
    字节[]标记=字节.toBytes(tagsStr);
    Put Put=新Put(行);
    试一试{
    put.add(Bytes.toBytes(“st”)、Bytes.toBytes(“TITLE”)、TITLE);
    put.add(Bytes.toBytes(“st”)、Bytes.toBytes(“BODY”)、BODY;
    添加(Bytes.toBytes(“st”)、Bytes.toBytes(“TAGS”)、TAGS);
    }捕获(异常e1){
    系统输出打印项次(“输出EXC”);
    e1.printStackTrace();
    }
    //取消下面的注释以禁用WAL。这将提高性能,但
    //意味着
    //在RegionServer的情况下,您将遇到数据丢失
    //撞车。
    //put.setWriteToWAL(false);
    试一试{
    write(新的ImmutableBytesWritable(行),put);
    }捕捉(中断异常e){
    System.out.println(“WRITE EXC”);
    e、 printStackTrace();
    }
    //设置每个检查点行的状态
    如果(++计数%检查点==0){
    context.setStatus(“发出Put”+计数);
    }
    }
    }
    /**
    *作业配置。
    */
    公共静态作业配置作业(配置配置)引发IOException{
    路径输入路径=新路径(“/home/coder/Downloads/Train3.csv”);
    字符串tableName=“sodata”;
    作业作业=新作业(conf,NAME+“”+tableName);
    setJarByClass(Uploader.class);
    setInputPath(作业,inputPath);
    setInputFormatClass(TextInputFormat.class);
    setMapperClass(Uploader.class);
    TableMapReduceUtil.initTableReducerJob(tableName,null,job);
    job.setNumReduceTasks(0);
    返回工作;
    }
    公共静态void main(字符串[]args)引发异常{
    Configuration=HBaseConfiguration.create();
    conf.set(“hbase.master”、“localhost:54310”);
    conf.set(“hbase.client.write.buffer”,“100000000”);//该值设置得很高,不会由于内存限制而漏掉任何一行。
    作业作业=配置作业(conf);
    系统退出(作业等待完成(真)?0:1;
    }
    }
    
    我选择了另一种方式并离开了命令行方式。现在我已经通过编程完成了。