使用flume 1.4和自定义接收器将数据流式传输到hbase 0.98.1
我试图使用apache flume和自定义接收器将数据加载到hbase中,但出现以下错误:使用flume 1.4和自定义接收器将数据流式传输到hbase 0.98.1,hbase,apache-zookeeper,flume,znodes,Hbase,Apache Zookeeper,Flume,Znodes,我试图使用apache flume和自定义接收器将数据加载到hbase中,但出现以下错误: ERROR async.HBaseClient: The znode for the -ROOT- region doesn't exist! 14/05/14 17:12:08 ERROR lifecycle.LifecycleSupervisor: Unable to start SinkRunner: { policy:org.apache.flume.sink.DefaultSinkP
ERROR async.HBaseClient: The znode for the -ROOT- region doesn't exist! 14/05/14
17:12:08 ERROR lifecycle.LifecycleSupervisor: Unable to start SinkRunner: {
policy:org.apache.flume.sink.DefaultSinkProcessor@923288b counterGroup:{ name:null
counters:{} } } - Exception follows. org.apache.flume.FlumeException: Interrupted while waiting for Hbase Callbacks at org.apache.flume.sink.hbase.AsyncHBaseSink.start(AsyncHBaseSink.java:379)
注意:我的hbase运行正常,我可以使用java客户端创建表、放置数据和获取数据
这是my flume.conf:
# A single-node Flume configuration
# uses exec and tail and will write a file every 10K records or every 1 min
# Name the components on this agent
agent3.sources = source1
agent3.sinks = sink1
agent3.channels = channel1
# Describe/configure source1
agent3.sources.source1.type = exec
agent3.sources.source1.command = tail -f /tmp/testGenerate.csv
# Describe sink1
agent3.sinks.sink1.type =org.apache.flume.sink.hbase.AsyncHBaseSink
agent3.sinks.sink1.table = AdreamLumiHB
agent3.sinks.sink1.columnFamily =lumiCF
agent3.sinks.sink1.batchSize=5000
#agent3.sinks.sink1.serializer = com.hbase.log.util.SplittingSerializer
agent3.sinks.sink1.serializer =org.apache.flume.sink.hbase.SplittingSerializer
agent3.sinks.sink1.zookeeperQuorum=localhost
agent3.sinks.sink1.znodeParent=/hbase
#agent3.sinks.sink1.serializer = org.apache.flume.sink.hbase.SimpleAsyncHbaseEventSerializer
#agent3.sinks.sink1.serializer.regex =
#agent3.sinks.sink1.serializer.regexIgnoreCase = true
agent3.sinks.sink1.serializer.columns = id,nom,valeur,batiment,etage,piece
# Use a channel which buffers events to a file
# -- The component type name, needs to be FILE.
agent3.channels.channel1.type = FILE
# checkpointDir ~/.flume/file-channel/checkpoint The directory where checkpoint file will be stored
# dataDirs ~/.flume/file-channel/data The directory where log files will be stored
# The maximum size of transaction supported by the channel
agent3.channels.channel1.transactionCapacity = 1000000
# Amount of time (in millis) between checkpoints
agent3.channels.channel1.checkpointInterval 30000
# Max size (in bytes) of a single log file
agent3.channels.channel1.maxFileSize = 2146435071
# Maximum capacity of the channel
agent3.channels.channel1.capacity 10000000
#keep-alive 3 Amount of time (in sec) to wait for a put operation
#write-timeout 3 Amount of time (in sec) to wait for a write operation
# Bind the source and sink to the channel
agent3.sources.source1.channels = channel1
agent3.sinks.sink1.channel = channel1
这是我的hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:8020/hbase-0.98.1</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/home/alpha/hadoop_data/hbase-${user.name}</value>
</property>
<property>
<name>hbase.master.info.bindAddress</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hbase.master</name>
<value>localhost:60000</value>
<description>The host and port that the HBase master runs at.</description>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
<property>
<name>hbase.master.info.bindAddress</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>localhost</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/alpha/hadoop_data/hbase-data</value>
</property>
</configuration>
这是定制水槽
package org.apache.flume.sink.hbase;
import java.util.ArrayList;
import java.util.List;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.PutRequest;
import org.apache.flume.conf.ComponentConfiguration;
/**
* A serializer for the AsyncHBaseSink, which splits the event body into
* multiple columns and inserts them into a row whose key is available in
* the headers
*
* Originally from https://blogs.apache.org/flume/entry/streaming_data_into_apache_hbase
*
*/
public class SplittingSerializer implements AsyncHbaseEventSerializer {
private byte[] table;
private byte[] colFam;
private Event currentEvent;
private byte[][] columnNames;
private final List<PutRequest> puts = new ArrayList<PutRequest>();
private final List<AtomicIncrementRequest> incs = new ArrayList<AtomicIncrementRequest>();
private byte[] currentRowKey;
private final byte[] eventCountCol = "eventCount".getBytes();
private String delim;
@Override
public void initialize(byte[] table, byte[] cf) {
this.table = table;
this.colFam = cf;
}
@Override
public void setEvent(Event event) {
// Set the event and verify that the rowKey is not present
this.currentEvent = event;
String rowKeyStr = currentEvent.getHeaders().get("rowKey");
if (rowKeyStr == null) {
throw new FlumeException("No row key found in headers!");
}
currentRowKey = rowKeyStr.getBytes();
}
@Override
public List<PutRequest> getActions() {
// Split the event body and get the values for the columns
String eventStr = new String(currentEvent.getBody());
String[] cols = eventStr.split(",");
//String[] cols = eventStr.split(regEx);
//String[] cols = eventStr.split("\\s+");
//String[] cols = eventStr.split("\\t");
// String[] cols = eventStr.split(delim);
puts.clear();
/* String[] columnFamilyName;
byte[] bCol;
byte[] bFam;*/
for (int i = 0; i < cols.length; i++) {
//Generate a PutRequest for each column.
/* columnFamilyName = new String(columnNames[i]).split(":");
bFam = columnFamilyName[0].getBytes();
bCol = columnFamilyName[1].getBytes();
if (i == 0) {
currentRowKey = cols[i].getBytes();
}*/
PutRequest req = new PutRequest(table, currentRowKey, colFam,
columnNames[i], cols[i].getBytes());
/* PutRequest req = new PutRequest(table, currentRowKey, bFam,
bCol, cols[i].getBytes());*/
puts.add(req);
}
return puts;
}
@Override
public List<AtomicIncrementRequest> getIncrements() {
incs.clear();
//Increment the number of events received
incs.add(new AtomicIncrementRequest(table, "totalEvents".getBytes(), colFam, eventCountCol));
return incs;
}
@Override
public void cleanUp() {
table = null;
colFam = null;
currentEvent = null;
columnNames = null;
currentRowKey = null;
}
@Override
public void configure(Context context) {
//Get the column names from the configuration
String cols = new String(context.getString("columns"));
String[] names = cols.split(",");
columnNames = new byte[names.length][];
int i = 0;
for(String name : names) {
columnNames[i++] = name.getBytes();
}
// delim = new String(context.getString("delimiter"));
}
@Override
public void configure(ComponentConfiguration conf) {
}
}
package org.apache.flume.sink.hbase;
导入java.util.ArrayList;
导入java.util.List;
导入org.apache.flume.Context;
导入org.apache.flume.Event;
导入org.apache.flume.FlumeException;
导入org.hbase.async.AtomicIncrementRequest;
导入org.hbase.async.PutRequest;
导入org.apache.flume.conf.ComponentConfiguration;
/**
*AsyncHBaseSink的序列化程序,它将事件主体拆分为
*多个列并将它们插入到一行中,该行的键在中可用
*标题
*
*源于https://blogs.apache.org/flume/entry/streaming_data_into_apache_hbase
*
*/
公共类拆分序列化程序实现AsyncHbaseEventSerializer{
专用字节[]表;
专用字节[]colFam;
私人事件;
专用字节[][]列名称;
private final List put=new ArrayList();
私有最终列表incs=new ArrayList();
私有字节[]currentRowKey;
私有最终字节[]eventCountCol=“eventCount”.getBytes();
私用线绳;
@凌驾
公共无效初始化(字节[]表,字节[]cf){
this.table=表格;
this.colFam=cf;
}
@凌驾
公共无效设置事件(事件){
//设置事件并验证行键不存在
this.currentEvent=事件;
字符串rowKeyStr=currentEvent.getHeaders().get(“rowKey”);
if(rowKeyStr==null){
抛出新的FlumeException(“在标题中找不到行键!”);
}
currentRowKey=rowKeyStr.getBytes();
}
@凌驾
公共列表getActions(){
//拆分事件主体并获取列的值
String eventStr=新字符串(currentEvent.getBody());
字符串[]cols=eventStr.split(“,”);
//字符串[]cols=eventStr.split(regEx);
//字符串[]cols=eventStr.split(\\s+);
//字符串[]cols=eventStr.split(\\t”);
//字符串[]cols=eventStr.split(delim);
puts.clear();
/*字符串[]columnFamilyName;
字节[]bCol;
字节[]bFam*/
for(int i=0;i
请告诉我在flume LIB中放置此自定义代码的位置。谢谢
package org.apache.flume.sink.hbase;
import java.util.ArrayList;
import java.util.List;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.PutRequest;
import org.apache.flume.conf.ComponentConfiguration;
/**
* A serializer for the AsyncHBaseSink, which splits the event body into
* multiple columns and inserts them into a row whose key is available in
* the headers
*
* Originally from https://blogs.apache.org/flume/entry/streaming_data_into_apache_hbase
*
*/
public class SplittingSerializer implements AsyncHbaseEventSerializer {
private byte[] table;
private byte[] colFam;
private Event currentEvent;
private byte[][] columnNames;
private final List<PutRequest> puts = new ArrayList<PutRequest>();
private final List<AtomicIncrementRequest> incs = new ArrayList<AtomicIncrementRequest>();
private byte[] currentRowKey;
private final byte[] eventCountCol = "eventCount".getBytes();
private String delim;
@Override
public void initialize(byte[] table, byte[] cf) {
this.table = table;
this.colFam = cf;
}
@Override
public void setEvent(Event event) {
// Set the event and verify that the rowKey is not present
this.currentEvent = event;
String rowKeyStr = currentEvent.getHeaders().get("rowKey");
if (rowKeyStr == null) {
throw new FlumeException("No row key found in headers!");
}
currentRowKey = rowKeyStr.getBytes();
}
@Override
public List<PutRequest> getActions() {
// Split the event body and get the values for the columns
String eventStr = new String(currentEvent.getBody());
String[] cols = eventStr.split(",");
//String[] cols = eventStr.split(regEx);
//String[] cols = eventStr.split("\\s+");
//String[] cols = eventStr.split("\\t");
// String[] cols = eventStr.split(delim);
puts.clear();
/* String[] columnFamilyName;
byte[] bCol;
byte[] bFam;*/
for (int i = 0; i < cols.length; i++) {
//Generate a PutRequest for each column.
/* columnFamilyName = new String(columnNames[i]).split(":");
bFam = columnFamilyName[0].getBytes();
bCol = columnFamilyName[1].getBytes();
if (i == 0) {
currentRowKey = cols[i].getBytes();
}*/
PutRequest req = new PutRequest(table, currentRowKey, colFam,
columnNames[i], cols[i].getBytes());
/* PutRequest req = new PutRequest(table, currentRowKey, bFam,
bCol, cols[i].getBytes());*/
puts.add(req);
}
return puts;
}
@Override
public List<AtomicIncrementRequest> getIncrements() {
incs.clear();
//Increment the number of events received
incs.add(new AtomicIncrementRequest(table, "totalEvents".getBytes(), colFam, eventCountCol));
return incs;
}
@Override
public void cleanUp() {
table = null;
colFam = null;
currentEvent = null;
columnNames = null;
currentRowKey = null;
}
@Override
public void configure(Context context) {
//Get the column names from the configuration
String cols = new String(context.getString("columns"));
String[] names = cols.split(",");
columnNames = new byte[names.length][];
int i = 0;
for(String name : names) {
columnNames[i++] = name.getBytes();
}
// delim = new String(context.getString("delimiter"));
}
@Override
public void configure(ComponentConfiguration conf) {
}
}