Java 从POJO到Avro记录的通用转换

Java 从POJO到Avro记录的通用转换,java,avro,Java,Avro,我正在寻找一种以通用方式将POJO转换为avro对象的方法。该实现对于POJO类的任何更改都应该是健壮的。我已经实现了这一点,但明确地填写了avro记录(参见下面的示例) 有没有办法摆脱硬编码字段名,只需从对象中填充avro记录?反射是唯一的方法,还是avro提供了这种开箱即用的功能 import java.util.Date; import java.util.HashMap; import java.util.Map; import org.apache.avro.Schema; impo

我正在寻找一种以通用方式将POJO转换为avro对象的方法。该实现对于POJO类的任何更改都应该是健壮的。我已经实现了这一点,但明确地填写了avro记录(参见下面的示例)

有没有办法摆脱硬编码字段名,只需从对象中填充avro记录?反射是唯一的方法,还是avro提供了这种开箱即用的功能

import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.reflect.ReflectData;

public class PojoToAvroExample {

    static class PojoParent {
        public final Map<String, String> aMap = new HashMap<String, String>();
        public final Map<String, Integer> anotherMap = new HashMap<String, Integer>();
    }

    static class Pojo extends PojoParent {
        public String uid;
        public Date eventTime;
    }

    static Pojo createPojo() {
        Pojo foo = new Pojo();
        foo.uid = "123";
        foo.eventTime = new Date();
        foo.aMap.put("key", "val");
        foo.anotherMap.put("key", 42);
        return foo;
    }

    public static void main(String[] args) {
        // extract the avro schema corresponding to Pojo class
        Schema schema = ReflectData.get().getSchema(Pojo.class);
        System.out.println("extracted avro schema: " + schema);
        // create avro record corresponding to schema
        Record avroRecord = new Record(schema);
        System.out.println("corresponding empty avro record: " + avroRecord);

        Pojo foo = createPojo();
        // TODO: to be replaced by generic variant:
        // something like avroRecord.importValuesFrom(foo);
        avroRecord.put("uid", foo.uid);
        avroRecord.put("eventTime", foo.eventTime);
        avroRecord.put("aMap", foo.aMap);
        avroRecord.put("anotherMap", foo.anotherMap);
        System.out.println("expected avro record: " + avroRecord);
    }
}
import java.util.Date;
导入java.util.HashMap;
导入java.util.Map;
导入org.apache.avro.Schema;
导入org.apache.avro.generic.GenericData.Record;
导入org.apache.avro.reflect.ReflectData;
公共类Pojotoavroe示例{
静态类PojoParent{
公共最终映射aMap=newhashmap();
public final Map anotherMap=new HashMap();
}
静态类Pojo扩展了PojoParent{
公共字符串uid;
公共日期事件时间;
}
静态Pojo createPojo(){
Pojo foo=新的Pojo();
foo.uid=“123”;
foo.eventTime=新日期();
foo.aMap.put(“key”、“val”);
foo.anotherMap.put(“键”,42);
返回foo;
}
公共静态void main(字符串[]args){
//提取与Pojo类对应的avro模式
Schema Schema=ReflectData.get().getSchema(Pojo.class);
System.out.println(“提取的avro模式:“+schema”);
//创建与模式对应的avro记录
记录avroRecord=新记录(模式);
System.out.println(“对应的空avro记录:+avroRecord”);
Pojo foo=createPojo();
//TODO:将由通用变量替换:
//类似avroRecord.importValuesFrom(foo);
avroRecord.put(“uid”,foo.uid);
avroRecord.put(“eventTime”,foo.eventTime);
avroRecord.put(“aMap”,foo.aMap);
avroRecord.put(“anotherMap”,foo.anotherMap);
System.out.println(“预期avro记录:+avroRecord”);
}
}

我自己也需要这样的东西。您需要的库位于avrojar文件中,但奇怪的是,似乎没有从avrotools命令行调用它的方法

将其调用为:java GenerateSchemaFromPOJO com.example.pojo.Person.java

import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;

import org.apache.avro.Schema;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.avro.AvroFactory;
import com.fasterxml.jackson.dataformat.avro.AvroSchema;
import com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator;
import com.fasterxml.jackson.dataformat.avro.schema.VisitorFormatWrapperImpl;

public class GenerateSchemaFromPOJO {

    public static void main(String[] args) {
        String className  = null;
        String outputFile = null;
        Writer outputWriter = null;
        try {
            if(args.length != 2) {
                System.out.println("Usage: java " + GenerateSchemaFromPOJO.class.getCanonicalName() + " classname output-schema-file.json");
                System.exit(1);
            }
            className = args[0];
            outputFile = args[1];

            Class<?> clazz = Class.forName(className);

            AvroFactory avroFactory = new AvroFactory();
            ObjectMapper mapper = new ObjectMapper(avroFactory);

            AvroSchemaGenerator gen = new AvroSchemaGenerator();
            mapper.acceptJsonFormatVisitor(clazz, gen);
            AvroSchema schemaWrapper = gen.getGeneratedSchema();

            Schema avroSchema = schemaWrapper.getAvroSchema();
            String asJson = avroSchema.toString(true);

            outputWriter = new FileWriter(outputFile);
            outputWriter.write(asJson);
        } catch (Exception ex) {
            System.err.println("caught " + ex);
            ex.printStackTrace();
            System.exit(1);
        } finally {
            if(outputWriter != null) {
                try {
                    outputWriter.close();
                } catch (IOException e) {
                    System.err.println("Caught " + e + " while trying to close outputWriter to " + outputFile);;
                    e.printStackTrace();
                }
            }
        }
    }
}
导入java.io.FileWriter;
导入java.io.IOException;
导入java.io.Writer;
导入org.apache.avro.Schema;
导入com.fasterxml.jackson.databind.ObjectMapper;
导入com.fasterxml.jackson.dataformat.avro.AvroFactory;
导入com.fasterxml.jackson.dataformat.avro.AvroSchema;
导入com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator;
导入com.fasterxml.jackson.dataformat.avro.schema.VisitorFormatWrapperImpl;
公共类GenerateSchemaFromPOJO{
公共静态void main(字符串[]args){
字符串className=null;
字符串outputFile=null;
Writer outputWriter=null;
试一试{
如果(参数长度!=2){
System.out.println(“用法:java”+GenerateSchemaFromPOJO.class.getCanonicalName()+“classname输出模式文件.json”);
系统出口(1);
}
className=args[0];
outputFile=args[1];
Class clazz=Class.forName(className);
AvroFactory AvroFactory=新AvroFactory();
ObjectMapper mapper=新的ObjectMapper(avroFactory);
AvroSchemaGenerator gen=新的AvroSchemaGenerator();
mapper.acceptJsonFormatVisitor(clazz,gen);
AvroSchema schemawraper=gen.getGeneratedSchema();
Schema avroSchema=schemawraper.getAvroSchema();
字符串asJson=avroSchema.toString(true);
outputWriter=新文件编写器(outputFile);
write(asJson);
}捕获(例外情况除外){
系统错误打印项次(“捕获”+ex);
例如printStackTrace();
系统出口(1);
}最后{
如果(outputWriter!=null){
试一试{
outputWriter.close();
}捕获(IOE异常){
System.err.println(“在尝试将outputWriter关闭到“+outputFile”时捕获“+e+”);;
e、 printStackTrace();
}
}
}
}
}

以下是转换的一般方法

public static <V> byte[] toBytesGeneric(final V v, final Class<V> cls) {
        final ByteArrayOutputStream bout = new ByteArrayOutputStream();
        final Schema schema = ReflectData.get().getSchema(cls);
        final DatumWriter<V> writer = new ReflectDatumWriter<V>(schema);
        final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null);
        try {
            writer.write(v, binEncoder);
            binEncoder.flush();
        } catch (final Exception e) {
            throw new RuntimeException(e);
        }


        return bout.toByteArray();
    }

public static void main(String[] args) {
    PojoClass pojoObject = new PojoClass();
    toBytesGeneric(pojoObject, PojoClass.class);
}
publicstaticbyte[]toBytesGeneric(final V V,final Class cls){
final ByteArrayOutputStream bout=新ByteArrayOutputStream();
最终模式Schema=ReflectData.get().getSchema(cls);
最终DatumWriter=新的反射DatumWriter(模式);
final BinaryEncoder binEncoder=EncoderFactory.get().BinaryEncoder(大约,空);
试一试{
writer.write(v,binEncoder);
binEncoder.flush();
}捕获(最终异常e){
抛出新的运行时异常(e);
}
返回bout.toByteArray();
}
公共静态void main(字符串[]args){
PojoClass pojoObject=新PojoClass();
toBytesGeneric(pojoObject,PojoClass.class);
}
您正在使用Spring吗

我使用Spring特性为此构建了一个映射器。但也可以通过原始反射UTIL构建这样的映射器:

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.ReflectData;
import org.springframework.beans.PropertyAccessorFactory;
import org.springframework.util.Assert;

public class GenericRecordMapper {

    public static GenericData.Record mapObjectToRecord(Object object) {
        Assert.notNull(object, "object must not be null");
        final Schema schema = ReflectData.get().getSchema(object.getClass());
        final GenericData.Record record = new GenericData.Record(schema);
        schema.getFields().forEach(r -> record.put(r.name(), PropertyAccessorFactory.forDirectFieldAccess(object).getPropertyValue(r.name())));
        return record;
    }

    public static <T> T mapRecordToObject(GenericData.Record record, T object) {
        Assert.notNull(record, "record must not be null");
        Assert.notNull(object, "object must not be null");
        final Schema schema = ReflectData.get().getSchema(object.getClass());
        Assert.isTrue(schema.getFields().equals(record.getSchema().getFields()), "Schema fields didn't match");
        record.getSchema().getFields().forEach(d -> PropertyAccessorFactory.forDirectFieldAccess(object).setPropertyValue(d.name(), record.get(d.name()) == null ? record.get(d.name()) : record.get(d.name()).toString()));
        return object;
    }

}
反序列化

GenericData.Record deserialized = (GenericData.Record) avroDeserializer.deserialize("topic", serialized);

YourPojo yourPojo = GenericRecordMapper.mapRecordToObject(deserialized, new YourPojo());
使用,将pojo转换为字节[]非常容易,类似于jackson/json:

byte[] avroData = avroMapper.writer(schema).writeValueAsBytes(pojo);
p、 s.

jackson不仅处理JSON,还处理XML/Avro/Protobuf/YAML等,具有非常相似的类和API。

除了我对@TranceMaster answer的评论之外,下面的修改版本适用于我的基本类型和Java集:

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.ReflectData;
import org.springframework.beans.PropertyAccessorFactory;
import org.springframework.util.Assert;

public class GenericRecordMapper {

    public static GenericData.Record mapObjectToRecord(Object object) {
        Assert.notNull(object, "object must not be null");
        final Schema schema = ReflectData.get().getSchema(object.getClass());
        System.out.println(schema);
        final GenericData.Record record = new GenericData.Record(schema);
        schema.getFields().forEach(r -> record.put(r.name(), PropertyAccessorFactory.forDirectFieldAccess(object).getPropertyValue(r.name())));
        return record;
    }

    public static <T> T mapRecordToObject(GenericData.Record record, T object) {
        Assert.notNull(record, "record must not be null");
        Assert.notNull(object, "object must not be null");

        final Schema schema = ReflectData.get().getSchema(object.getClass());
        Assert.isTrue(schema.getFields().equals(record.getSchema().getFields()), "Schema fields didn't match");

        record
                .getSchema()
                .getFields()
                .forEach(field ->
                    PropertyAccessorFactory
                            .forDirectFieldAccess(object)
                            .setPropertyValue(field.name(), record.get(field.name()))
                );
        return object;
    }
}
import org.apache.avro.Schema;
导入org.apache.avro.generic.GenericData;
导入org.apache.avro.reflect.ReflectData;
导入org.springframework.beans.PropertyAccessorFactory;
导入org.springframework.util.Assert;
公共类GenericRecordMapper{
公共静态GenericData.Record mapObjectToRecord(对象){
Assert.notNull(对象,“对象不能为null”);
最终模式=Re
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.ReflectData;
import org.springframework.beans.PropertyAccessorFactory;
import org.springframework.util.Assert;

public class GenericRecordMapper {

    public static GenericData.Record mapObjectToRecord(Object object) {
        Assert.notNull(object, "object must not be null");
        final Schema schema = ReflectData.get().getSchema(object.getClass());
        System.out.println(schema);
        final GenericData.Record record = new GenericData.Record(schema);
        schema.getFields().forEach(r -> record.put(r.name(), PropertyAccessorFactory.forDirectFieldAccess(object).getPropertyValue(r.name())));
        return record;
    }

    public static <T> T mapRecordToObject(GenericData.Record record, T object) {
        Assert.notNull(record, "record must not be null");
        Assert.notNull(object, "object must not be null");

        final Schema schema = ReflectData.get().getSchema(object.getClass());
        Assert.isTrue(schema.getFields().equals(record.getSchema().getFields()), "Schema fields didn't match");

        record
                .getSchema()
                .getFields()
                .forEach(field ->
                    PropertyAccessorFactory
                            .forDirectFieldAccess(object)
                            .setPropertyValue(field.name(), record.get(field.name()))
                );
        return object;
    }
}