Google bigquery 用不同的键名连接两个表
尝试实现以下场景Google bigquery 用不同的键名连接两个表,google-bigquery,google-cloud-dataflow,apache-beam,Google Bigquery,Google Cloud Dataflow,Apache Beam,尝试实现以下场景 用相同的键连接两个表(A,B) 过滤表(c) 将步骤1的结果与步骤2的结果合并。这里它有不同的键名,但值相同(例如:第一个表列名为“id”,第二个表列名为“Fid”,但两个值相同) 使用云数据流执行代码时出现以下错误 严重:2018-12-03T13:52:47.634Z:java.lang.IllegalStateException:应为唯一键,但发现键127348 35;为null,带有值{HEADER_ID=18219955,ORDER_TYPE_ID=2124,ORDE
WithKeys<String, TableRow> headerKey = WithKeys.of( (TableRow row) -> String.format("%s",row.get("PARTY_ID"))).withKeyType(TypeDescriptors.strings());
PCollection<KV<String,TableRow>> mainInput = p.apply("ReadCustomerAccount",BigQueryIO.readTableRows().from(options.getCustAccount())).apply("WithKeys", headerKey);
PCollection<KV<String,TableRow>> sideInput = p.apply("ReadCustomerParty",BigQueryIO.readTableRows().from(options.getPartyTable())).apply("WithKeys", headerKey);
PCollection<TableRow> result = CommonFunctions.innerJoinBQTbls("InnerJoin",mainInput,sideInput);
@SuppressWarnings("serial")
PCollection<TableRow> finalResultCollection = result.apply("Process", ParDo.of(new DoFn<TableRow, TableRow>()
{
@ProcessElement
public void processElement(ProcessContext c)
{
TableRow keyString = c.element();
TableRow mainList = (TableRow) keyString.get("main");
TableRow sideList = (TableRow) keyString.get("side");
TableRow targetRow = new TableRow();
targetRow.set("partyID", Integer.valueOf(keyString.get("key").toString()));
targetRow.set("accountNumber", mainList.get("ACCOUNT_NUMBER"));
targetRow.set("customerName", sideList.get("PARTY_NAME"));
targetRow.set("updatedDate",keyString.get("updatedDate"));
c.output(targetRow);
}
}));
PCollection<TableRow> headerData = p.apply("ReadInvoice",BigQueryIO.readTableRows().from(options.getOrderHeaderAll()));
PCollection<TableRow> pc934Collection = headerData.apply(Filter.by(
(TableRow t) -> {
String orgCode = t.get("SHIP_FROM_ORG_ID").toString();
if (orgCode.equals("934")) {
return true;
}
return false;
}
));
WithKeys<String, TableRow> soltoOrg = WithKeys.of(
(TableRow row) ->
String.format("%s#%s",
row.get("SOLD_TO_ORG_ID"),
row.get("CUST_ACCOUNT_ID")))
.withKeyType(TypeDescriptors.strings());
PCollection<KV<String,TableRow>> customerHeaderAccount = pc934Collection.apply("WithKeys", soltoOrg);
PCollection<KV<String,TableRow>> customerHeaderAll = finalResultCollection.apply("WithKeys", soltoOrg);
PCollection<TableRow> secondResult = CommonFunctions.innerJoinBQTbls("InnerJoin1",customerHeaderAll,customerHeaderAccount);
@SuppressWarnings("serial")
PCollection<TableRow> secondResultCollection = secondResult.apply("Process", ParDo.of(new DoFn<TableRow, TableRow>()
{
@ProcessElement
public void processElement(ProcessContext c)
{
TableRow keyString = c.element();
TableRow mainList = (TableRow) keyString.get("main");
TableRow sideList = (TableRow) keyString.get("side");
TableRow targetRow = new TableRow();
targetRow.set("orderNumber", mainList.get("ORDER_NUMBER"));
targetRow.set("headerId", Integer.valueOf(mainList.get("HEADER_ID").toString()));
targetRow.set("partyID", Integer.valueOf(keyString.get("key").toString()));
targetRow.set("accountNumber", mainList.get("ACCOUNT_NUMBER"));
targetRow.set("customerName", sideList.get("PARTY_NAME"));
targetRow.set("updatedDate",keyString.get("updatedDate"));
c.output(targetRow);
}
}));
WithKeys headerKey=WithKeys.of((TableRow行)->String.format(“%s”,row.get(“PARTY_ID”)))。withKeyType(TypeDescriptors.String());
PCollection main input=p.apply(“ReadCustomerAccount”,BigQueryIO.readTableRows()。from(options.getCustAccount())).apply(“WithKeys”,headerKey);
PCollection sideInput=p.apply(“ReadCustomerParty”,BigQueryIO.readTableRows()。from(options.getPartyTable())).apply(“WithKeys”,headerKey);
PCollection result=CommonFunctions.innerJoinBQTbls(“InnerJoin”,mainInput,sideInput);
@抑制警告(“串行”)
PCollection finalResultCollection=result.apply(“Process”),ParDo.of(new DoFn()
{
@过程元素
公共void processElement(ProcessContext c)
{
TableRow键串=c.element();
TableRow mainList=(TableRow)keyString.get(“main”);
TableRow侧列表=(TableRow)keyString.get(“侧”);
TableRow targetRow=新建TableRow();
targetRow.set(“partyID”,Integer.valueOf(keyString.get(“key”).toString());
targetRow.set(“accountNumber”,mainList.get(“ACCOUNT_NUMBER”);
targetRow.set(“customerName”,sideList.get(“PARTY_NAME”);
targetRow.set(“UpdateDate”,keyString.get(“UpdateDate”);
c、 输出(targetRow);
}
}));
PCollection headerData=p.apply(“ReadInvoice”,BigQueryIO.readTableRows()。from(options.getOrderHeaderAll());
PCollection pc934Collection=headerData.apply(Filter.by(
(表t)->{
字符串orgCode=t.get(“SHIP_FROM_ORG_ID”).toString();
if(组织代码等于(“934”)){
返回true;
}
返回false;
}
));
WithKeys soltoOrg=WithKeys.of(
(TableRow行)->
String.format(“%s#%s”,
row.get(“卖给组织ID”),
获取(“客户帐户ID”))
.withKeyType(TypeDescriptors.strings());
PCollection customerHeaderAccount=pc934Collection.apply(“WithKeys”,soltoOrg);
PCollection customerHeaderAll=finalResultCollection.apply(“WithKeys”,soltoOrg);
PCollection secondResult=CommonFunctions.innerJoinBQTbls(“InnerJoin1”,CustomerHeaderal,customerHeaderAccount);
@抑制警告(“串行”)
PCollection secondResultCollection=secondResult.apply(“过程”,ParDo.of(new DoFn())
{
@过程元素
公共void processElement(ProcessContext c)
{
TableRow键串=c.element();
TableRow mainList=(TableRow)keyString.get(“main”);
TableRow侧列表=(TableRow)keyString.get(“侧”);
TableRow targetRow=新建TableRow();
targetRow.set(“订单号”,mainList.get(“订单号”);
targetRow.set(“headerId”,Integer.valueOf(mainList.get(“HEADER_ID”).toString());
targetRow.set(“partyID”,Integer.valueOf(keyString.get(“key”).toString());
targetRow.set(“accountNumber”,mainList.get(“ACCOUNT_NUMBER”);
targetRow.set(“customerName”,sideList.get(“PARTY_NAME”);
targetRow.set(“UpdateDate”,keyString.get(“UpdateDate”);
c、 输出(targetRow);
}
}));
您的一个键很可能为空。您可以通过不将其作为主键来解决此问题。主键不能为空,或者,如果它们是复合主键,则不能包含空。相反,请将其设置为唯一索引。例如,对主键使用“自动编号”字段