Hadoop 加缪与卡夫卡合作的例子
我的用例是我想将Avro数据从卡夫卡推送到HDFS。加缪似乎是正确的工具,但我不能使它的工作。 我是加缪的新手,试图让加缪的榜样发挥作用, 现在我正试图让加缪的例子发挥作用。然而,我仍然面临一些问题 DummyLogKafkaProducerClient的代码段Hadoop 加缪与卡夫卡合作的例子,hadoop,apache-kafka,hadoop2,camus,Hadoop,Apache Kafka,Hadoop2,Camus,我的用例是我想将Avro数据从卡夫卡推送到HDFS。加缪似乎是正确的工具,但我不能使它的工作。 我是加缪的新手,试图让加缪的榜样发挥作用, 现在我正试图让加缪的例子发挥作用。然而,我仍然面临一些问题 DummyLogKafkaProducerClient的代码段 package com.linkedin.camus.example.schemaregistry; import java.util.Date; import java.util.HashMap; import java.util
package com.linkedin.camus.example.schemaregistry;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import com.linkedin.camus.etl.kafka.coders.KafkaAvroMessageEncoder;
import com.linkedin.camus.example.records.DummyLog;
public class DummyLogKafkaProducerClient {
public static void main(String[] args) {
Properties props = new Properties();
props.put("metadata.broker.list", "localhost:6667");
// props.put("serializer.class", "kafka.serializer.StringEncoder");
// props.put("partitioner.class", "example.producer.SimplePartitioner");
//props.put("request.required.acks", "1");
ProducerConfig config = new ProducerConfig(props);
Producer<String, byte[]> producer = new Producer<String, byte[]>(config);
KafkaAvroMessageEncoder encoder = get_DUMMY_LOG_Encoder();
for (int i = 0; i < 500; i++) {
KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>("DUMMY_LOG", encoder.toBytes(getDummyLog()));
producer.send(data);
}
}
public static DummyLog getDummyLog() {
Random random = new Random();
DummyLog dummyLog = DummyLog.newBuilder().build();
dummyLog.setId(random.nextLong());
dummyLog.setLogTime(new Date().getTime());
Map<CharSequence, CharSequence> machoStuff = new HashMap<CharSequence, CharSequence>();
machoStuff.put("macho1", "abcd");
machoStuff.put("macho2", "xyz");
dummyLog.setMuchoStuff(machoStuff);
return dummyLog;
}
public static KafkaAvroMessageEncoder get_DUMMY_LOG_Encoder() {
KafkaAvroMessageEncoder encoder = new KafkaAvroMessageEncoder("DUMMY_LOG", null);
Properties props = new Properties();
props.put(KafkaAvroMessageEncoder.KAFKA_MESSAGE_CODER_SCHEMA_REGISTRY_CLASS, "com.linkedin.camus.example.schemaregistry.DummySchemaRegistry");
encoder.init(props, "DUMMY_LOG");
return encoder;
}
}
package com.linkedin.camus.example.schemaregistry;
导入java.util.Date;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Properties;
导入java.util.Random;
导入kafka.javaapi.producer.producer;
导入kafka.producer.KeyedMessage;
导入kafka.producer.ProducerConfig;
导入com.linkedin.camus.etl.kafka.coders.kafkaavromessagencoder;
导入com.linkedin.camus.example.records.DummyLog;
公共类DummyLogKafkaProducerClient{
公共静态void main(字符串[]args){
Properties props=新属性();
put(“metadata.broker.list”,“localhost:6667”);
//put(“serializer.class”、“kafka.serializer.StringEncoder”);
//props.put(“partitioner.class”、“example.producer.SimplePartitioner”);
//道具放置(“请求.要求.确认”,“1”);
ProducerConfig config=新的ProducerConfig(道具);
生产者生产者=新生产者(配置);
KafkaAvroMessageEncoder=get_DUMMY_LOG_encoder();
对于(int i=0;i<500;i++){
KeyedMessage data=新的KeyedMessage(“DUMMY_LOG”,encoder.toBytes(getDummyLog());
生产者。发送(数据);
}
}
公共静态DummyLog getDummyLog(){
随机=新随机();
DummyLog DummyLog=DummyLog.newBuilder().build();
setId(random.nextLong());
setLogTime(newdate().getTime());
Map machoStuff=新HashMap();
machoStuff.put(“macho1”、“abcd”);
machoStuff.put(“macho2”,“xyz”);
dummyLog.setMuchoStuff(machoStuff);
返回dummyLog;
}
公共静态Kafkaavromessagencoder get_DUMMY_LOG_Encoder(){
KafkaAvroMessageEncoder编码器=新的KafkaAvroMessageEncoder(“伪日志”,null);
Properties props=新属性();
put(KafkaAvroMessageEncoder.KAFKA_MESSAGE_CODER_SCHEMA_REGISTRY_CLASS,“com.linkedin.camus.example.schemaregistry.DummySchemaRegistry”);
encoder.init(道具,“虚拟日志”);
返回编码器;
}
}
我还添加了DummySchemareRegistry的默认无参数构造函数,因为它给出了实例化异常
package com.linkedin.camus.example.schemaregistry;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import com.linkedin.camus.example.records.DummyLog;
import com.linkedin.camus.example.records.DummyLog2;
import com.linkedin.camus.schemaregistry.MemorySchemaRegistry;
/**
* This is a little dummy registry that just uses a memory-backed schema registry to store two dummy Avro schemas. You
* can use this with camus.properties
*/
public class DummySchemaRegistry extends MemorySchemaRegistry<Schema> {
public DummySchemaRegistry(Configuration conf) {
super();
super.register("DUMMY_LOG", DummyLog.newBuilder().build().getSchema());
super.register("DUMMY_LOG_2", DummyLog2.newBuilder().build()
.getSchema());
}
public DummySchemaRegistry() {
super();
super.register("DUMMY_LOG", DummyLog.newBuilder().build().getSchema());
super.register("DUMMY_LOG_2", DummyLog2.newBuilder().build().getSchema());
}
}
package com.linkedin.camus.example.schemaregistry;
导入org.apache.avro.Schema;
导入org.apache.hadoop.conf.Configuration;
导入com.linkedin.camus.example.records.DummyLog;
导入com.linkedin.camus.example.records.DummyLog2;
导入com.linkedin.camus.schemaregistry.MemorySchemaRegistry;
/**
*这是一个小型虚拟注册表,它只使用一个内存支持的模式注册表来存储两个虚拟Avro模式。你
*可以将其与camus.properties一起使用
*/
公共类DummySchemaRegistry扩展了MemorySchemaRegistry{
公共DummySchemareRegistry(配置配置){
超级();
super.register(“DUMMY_LOG”,DummyLog.newBuilder().build().getSchema());
super.register(“DUMMY_LOG_2”,DummyLog2.newBuilder().build())
.getSchema());
}
公共部门架构登记(){
超级();
super.register(“DUMMY_LOG”,DummyLog.newBuilder().build().getSchema());
super.register(“DUMMY_LOG_2”,DummyLog2.newBuilder().build().getSchema());
}
}
下面是运行程序后得到的异常跟踪
线程“main”中出现异常
com.linkedin.camus.coders.MessageEncoderException:
org.apache.avro.AvroRuntimeException:
org.apache.avro.AvroRuntimeException:字段id类型:长位置:0未设置
并且在上没有默认值
com.linkedin.camus.etl.kafka.coders.kafkavromessageencoder.init(kafkavromessageencoder.java:55)
在
com.linkedin.camus.example.schemaregistry.DummyLogKafkaProducerClient.get_DUMMY_LOG_Encoder(DummyLogKafkaProducerClient.java:57)
在
com.linkedin.camus.example.schemaregistry.DummyLogKafkaProducerClient.main(DummyLogKafkaProducerClient.java:32)
原因:org.apache.avro.AvroRuntimeException:
org.apache.avro.AvroRuntimeException:字段id类型:长位置:0未设置
并且在上没有默认值
com.linkedin.camus.example.records.DummyLog$Builder.build(DummyLog.java:214)
在
com.linkedin.camus.example.schemaregistry.DummySchemaRegistry.(DummySchemaRegistry.java:16)
在sun.reflect.nativeConstructor附件mpl.newInstance0(本机
方法)在
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
在
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
位于java.lang.reflect.Constructor.newInstance(Constructor.java:408)
位于java.lang.Class.newInstance(Class.java:438)
com.linkedin.camus.etl.kafka.coders.KafkaAvroMessageEncoder.init(KafkaAvroMessageEncoder.java:52)
... 2更多原因:org.apache.avro.AvroRuntimeException:字段id
类型:长位置:0未设置,并且没有默认值
org.apache.avro.data.RecordBuilderBase.defaultValue(RecordBuilderBase.java:151)
在
com.linkedin.camus.example.records.DummyLog$Builder.build(DummyLog.java:209)
... 9更多
我想加缪希望Avro模式具有默认值。我已将dummyLog.avsc更改为跟随并重新编译- { “名称空间”:“com.linkedin.camus.example.records”, “类型”:“记录”, “名称”:“DummyLog”, “文档”:“不太重要的东西的日志。”, “字段”:[ { “名称”:“id”, “类型”:“int”, “默认值”:0 }, { “名称”:“日志时间”, “类型”:“int”, “默认值”:0 } ] } 让我知道它是否适合你 谢谢,
Ambarish您可以按如下方式默认任何字符串或长字段
{"type":"record","name":"CounterData","namespace":"org.avro.usage.tutorial","fields":[{"name":"word","type":["string","null"]},{"name":"count","type":["long","null"]}]}
加缪没有
super.register("DUMMY_LOG_2", LogEvent.newBuilder().build().getSchema());
super.register("logEventAvro", LogEvent.SCHEMA$);