Java 使用对象类的映射收集对象时Flink收集器问题
我面临一个问题,当我从flink flatmap collector收集对象时,我没有正确地收集到值。我得到的是对象引用,它没有给我实际的值Java 使用对象类的映射收集对象时Flink收集器问题,java,java-8,apache-flink,flink-streaming,Java,Java 8,Apache Flink,Flink Streaming,我面临一个问题,当我从flink flatmap collector收集对象时,我没有正确地收集到值。我得到的是对象引用,它没有给我实际的值 dataStream.filter(new FilterFunction<GenericRecord>() { @Override public boolean filter(GenericRecord record) throws Exception { if (record.get("use
dataStream.filter(new FilterFunction<GenericRecord>() {
@Override
public boolean filter(GenericRecord record) throws Exception {
if (record.get("user_id") != null) {
return true;
}
return false;
}
}).flatMap(new ProfileEventAggregateFlatMapFunction(aggConfig))
.map(new MapFunction<ProfileEventAggregateEmittedTuple, String>() {
@Override
public String map(
ProfileEventAggregateEmittedTuple profileEventAggregateEmittedTupleNew)
throws Exception {
String res=null;
try {
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
res= mapper.writeValueAsString(profileEventAggregateEmittedTupleNew);
} catch (Exception e) {
e.printStackTrace();
}
return res;
}
}).print();
public class ProfileEventAggregateFlatMapFunction extends
RichFlatMapFunction<GenericRecord, ProfileEventAggregateEmittedTuple> {
private final ProfileEventAggregateTupleEmitter aggregator;
ObjectMapper mapper = ObjectMapperPool.getInstance().get();
public ProfileEventAggregateFlatMapFunction(String config) throws IOException {
this.aggregator = new ProfileEventAggregateTupleEmitter(config);
}
@Override
public void flatMap(GenericRecord event,
Collector<ProfileEventAggregateEmittedTuple> collector) throws Exception {
try {
List<ProfileEventAggregateEmittedTuple> aggregateTuples = aggregator.runAggregates(event);
for (ProfileEventAggregateEmittedTuple tuple : aggregateTuples) {
collector.collect(tuple);
}
}}
这是我在map function.map中得到的
请帮助我了解发生了什么,为什么我没有得到正确的数据
public class ProfileEventAggregateEmittedTuple implements Cloneable, Serializable {
private String profileType;
private String key;
private String businessType;
private String name;
private List<ProfileEventAggregate> aggregates = new ArrayList<ProfileEventAggregate>();
private long startTime;
private long endTime;
public String getProfileType() {
return profileType;
}
public void setProfileType(String profileType) {
this.profileType = profileType;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getBusinessType() {
return businessType;
}
public void setBusinessType(String businessType) {
this.businessType = businessType;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<ProfileEventAggregate> getAggregates() {
return aggregates;
}
public void addAggregate(ProfileEventAggregate aggregate) {
this.aggregates.add(aggregate);
}
public void setAggregates(List<ProfileEventAggregate> aggregates) {
this.aggregates = aggregates;
}
public long getStartTime() {
return startTime;
}
public void setStartTime(long startTime) {
this.startTime = startTime;
}
public long getEndTime() {
return endTime;
}
public void setEndTime(long endTime) {
this.endTime = endTime;
}
@Override
public ProfileEventAggregateEmittedTuple clone() {
ProfileEventAggregateEmittedTuple clone = new ProfileEventAggregateEmittedTuple();
clone.setProfileType(this.profileType);
clone.setKey(this.key);
clone.setBusinessType(this.businessType);
clone.setName(this.name);
for (ProfileEventAggregate aggregate : this.aggregates) {
clone.addAggregate(aggregate.clone());
}
return clone;
}
public class ProfileEventAggregate implements Cloneable, Serializable {
private String entityType;
private Map<String, Object> dimension =new LinkedHashMap<String, Object>();
private Map<String, Object> metrics = new LinkedHashMap<String, Object>();
public Map<String, Object> getDimension() {
return dimension;
}
public void setDimension(Map<String, Object> dimension) {
this.dimension.putAll(dimension);
}
public void addDimension(String dimensionKey, Object dimensionValue) {
this.dimension.put(dimensionKey, dimensionValue);
}
public Map<String, Object> getMetrics() {
return metrics;
}
public void addMetric(String metricKey, Object metricValue) {
this.metrics.put(metricKey, metricValue);
}
public void setMetrics(Map<String, Object> metrics) {
this.metrics.putAll(metrics);
}
public String getEntityType() {
return entityType;
}
public void setEntityType(String entityType) {
this.entityType = entityType;
}
@Override
public ProfileEventAggregate clone() {
ProfileEventAggregate clone = new ProfileEventAggregate();
clone.setEntityType(this.entityType);
clone.getDimension().putAll(this.getDimension());
clone.getMetrics().putAll(this.metrics);
return clone;
}
public class profileEventAggregateeMitteTuple实现了可克隆、可序列化的{
私有字符串类型;
私钥;
私有字符串业务类型;
私有字符串名称;
私有列表聚合=新的ArrayList();
私人长启动时间;
私人长时间;
公共字符串getProfileType(){
返回配置文件类型;
}
公共void setProfileType(字符串profileType){
this.profileType=profileType;
}
公共字符串getKey(){
返回键;
}
公共无效设置键(字符串键){
this.key=key;
}
公共字符串getBusinessType(){
返回业务类型;
}
公共业务类型(字符串业务类型){
this.businessType=businessType;
}
公共字符串getName(){
返回名称;
}
公共void集合名(字符串名){
this.name=名称;
}
公共列表getAggregates(){
返回总量;
}
公共void addAggregate(ProfileEventAggregate聚合){
此.聚合.添加(聚合);
}
公共集合(列出集合){
这是。骨料=骨料;
}
公共长getStartTime(){
返回起始时间;
}
公共无效设置开始时间(长开始时间){
this.startTime=startTime;
}
公共长getEndTime(){
返回结束时间;
}
公共无效设置结束时间(长结束时间){
this.endTime=endTime;
}
@凌驾
公共配置文件eventaggregateemittedtuple clone(){
ProfileEventAggregateMittedTuple克隆=新建ProfileEventAggregateMittedTuple();
clone.setProfileType(this.profileType);
clone.setKey(this.key);
clone.setBusinessType(this.businessType);
clone.setName(this.name);
for(ProfileEventAggregate聚合:this.aggregates){
clone.addAggregate(aggregate.clone());
}
返回克隆;
}
公共类ProfileEventAggregate实现了可克隆、可序列化{
私有字符串entityType;
私有映射维度=新建LinkedHashMap();
私有映射度量=新LinkedHashMap();
公共地图getDimension(){
返回维度;
}
公共void setDimension(映射维度){
this.dimension.putAll(dimension);
}
public void addDimension(字符串dimensionKey,对象dimensionValue){
this.dimension.put(dimensionKey,dimensionValue);
}
公共映射getMetrics(){
回报指标;
}
public void addMetric(字符串metricKey,对象metricValue){
this.metrics.put(metricKey,metricValue);
}
公共无效集合度量(映射度量){
this.metrics.putAll(metrics);
}
公共字符串getEntityType(){
返回实体类型;
}
公共void setEntityType(字符串entityType){
this.entityType=entityType;
}
@凌驾
公共配置文件EventAggregate克隆(){
ProfileEventAggregate克隆=新建ProfileEventAggregate();
clone.setEntityType(this.entityType);
clone.getDimension().putAll(this.getDimension());
clone.getMetrics().putAll(this.metrics);
返回克隆;
}
如果不这样做,则会使用配置的序列化程序(似乎是Avro?)复制对象
在您的例子中,在无法推断合理模式的情况下使用Map
最简单的修复方法是启用objectreuse
。否则请确保序列化程序与数据匹配。因此,您可以在使用AvroSerializer#copy
的地方添加一个单元测试,并确保您的POJO是您想要坚持使用Avro reflect的方法,或者更好地使用模式优先的方法,即使用特定的Avro
让我们讨论一些备选方案:
- 使用
。直接访问genericord
,而不是将其转换为Java类型。这通常是完整记录灵活时的唯一方法(例如,作业接受任何输入并将其写入S3)genericord
- 非规范化架构。您将使用
,而不是使用一些class EventInformation{int id;String predicate;Object value;}
。您需要对所有信息进行分组以进行处理。但是,您将在Avro中遇到相同的类型问题class Event{int id;Map data;}
- 使用宽模式。查看前面的方法,如果事先知道不同的谓词,那么您可以使用它来创建宽模式
其中所有的条目都可以为空,而且大多数条目实际上都是类事件{int id;Long predicate1;Integer predicate2;…String predicateN;}
。编码null
非常便宜null
- Ditch Avro。Avro是全类型的。您可能希望使用更具动态性的内容。Protobuf必须支持任意子消息
- 使用Kryo.Kryo可以序列化任意对象树,但代价是速度较慢且开销较大
如果您想写入数据,还需要考虑一种解决方案,其中添加类型信息以进行适当的反序列化。例如,请查看此解决方案。但是有更多的方法来实现它。谢谢。我添加了pojo类,请让我知道其中的错误。这就像我担心的那样:您在无法使用inf的位置使用
映射
er是一个合理的模式。对于度量,您可能希望使用Map
或Map
。如果dimension
捕获任意值,您可能必须求助于Map
。请注意,一般来说,这些模式在序列化成本和计算方面是完全无效的。通常要好得多使用具有可空类型的广泛模式。我希望使其通用化。我没有为每个事件使用特定的类,因为有很多事件,每次新事件发生时,我都必须进行代码更改。我有
profileEventAggregateEmittedTuple = {ProfileEventAggregateEmittedTuple@7935}
profileType = "userprofile"
key = "1152473"
businessType = "keyless"
name = "consumer"
aggregates = {GenericData$Array@7948} size = 1
0 = {ProfileEventAggregate@7950} "geo_id {geo_id=java.lang.Object@863dce2} {keyless_select_destination_cnt=java.lang.Object@7cdb4bfc, total_estimated_distance=java.lang.Object@52e81f57}"
entityType = "geo_id"
dimension = {HashMap@7952} size = 1
"geo_id" -> {Object@7957}
key = "geo_id"
value = {Object@7957}
Class has no fields
metrics = {HashMap@7953} size = 2
"keyless_select_destination_cnt" -> {Object@7962}
key = "keyless_select_destination_cnt"
value = {Object@7962}
Class has no fields
"total_estimated_distance" -> {Object@7963}
public class ProfileEventAggregateEmittedTuple implements Cloneable, Serializable {
private String profileType;
private String key;
private String businessType;
private String name;
private List<ProfileEventAggregate> aggregates = new ArrayList<ProfileEventAggregate>();
private long startTime;
private long endTime;
public String getProfileType() {
return profileType;
}
public void setProfileType(String profileType) {
this.profileType = profileType;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getBusinessType() {
return businessType;
}
public void setBusinessType(String businessType) {
this.businessType = businessType;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<ProfileEventAggregate> getAggregates() {
return aggregates;
}
public void addAggregate(ProfileEventAggregate aggregate) {
this.aggregates.add(aggregate);
}
public void setAggregates(List<ProfileEventAggregate> aggregates) {
this.aggregates = aggregates;
}
public long getStartTime() {
return startTime;
}
public void setStartTime(long startTime) {
this.startTime = startTime;
}
public long getEndTime() {
return endTime;
}
public void setEndTime(long endTime) {
this.endTime = endTime;
}
@Override
public ProfileEventAggregateEmittedTuple clone() {
ProfileEventAggregateEmittedTuple clone = new ProfileEventAggregateEmittedTuple();
clone.setProfileType(this.profileType);
clone.setKey(this.key);
clone.setBusinessType(this.businessType);
clone.setName(this.name);
for (ProfileEventAggregate aggregate : this.aggregates) {
clone.addAggregate(aggregate.clone());
}
return clone;
}
public class ProfileEventAggregate implements Cloneable, Serializable {
private String entityType;
private Map<String, Object> dimension =new LinkedHashMap<String, Object>();
private Map<String, Object> metrics = new LinkedHashMap<String, Object>();
public Map<String, Object> getDimension() {
return dimension;
}
public void setDimension(Map<String, Object> dimension) {
this.dimension.putAll(dimension);
}
public void addDimension(String dimensionKey, Object dimensionValue) {
this.dimension.put(dimensionKey, dimensionValue);
}
public Map<String, Object> getMetrics() {
return metrics;
}
public void addMetric(String metricKey, Object metricValue) {
this.metrics.put(metricKey, metricValue);
}
public void setMetrics(Map<String, Object> metrics) {
this.metrics.putAll(metrics);
}
public String getEntityType() {
return entityType;
}
public void setEntityType(String entityType) {
this.entityType = entityType;
}
@Override
public ProfileEventAggregate clone() {
ProfileEventAggregate clone = new ProfileEventAggregate();
clone.setEntityType(this.entityType);
clone.getDimension().putAll(this.getDimension());
clone.getMetrics().putAll(this.metrics);
return clone;
}