Google bigquery 用不同的键名连接两个表

Google bigquery 用不同的键名连接两个表,google-bigquery,google-cloud-dataflow,apache-beam,Google Bigquery,Google Cloud Dataflow,Apache Beam,尝试实现以下场景 用相同的键连接两个表(A,B) 过滤表(c) 将步骤1的结果与步骤2的结果合并。这里它有不同的键名,但值相同(例如:第一个表列名为“id”,第二个表列名为“Fid”,但两个值相同) 使用云数据流执行代码时出现以下错误 严重:2018-12-03T13:52:47.634Z:java.lang.IllegalStateException:应为唯一键,但发现键127348 35;为null,带有值{HEADER_ID=18219955,ORDER_TYPE_ID=2124,ORDE

尝试实现以下场景

  • 用相同的键连接两个表(A,B)
  • 过滤表(c)
  • 将步骤1的结果与步骤2的结果合并。这里它有不同的键名,但值相同(例如:第一个表列名为“id”,第二个表列名为“Fid”,但两个值相同)
  • 使用云数据流执行代码时出现以下错误

    严重:2018-12-03T13:52:47.634Z:java.lang.IllegalStateException:应为唯一键,但发现键127348 35;为null,带有值{HEADER_ID=18219955,ORDER_TYPE_ID=2124,ORDER_NUMBER=729637,ORDER_DATE=10/29/2018 4:01:25下午,事务性_CURR_CODE=USD,CUST_PO_NUMBER=942634,卖给_ORG_ID=127348,从_ORG_ID=934发货,价目表_ID=7035,创建_DATE=2018-10-29 16:10:41 UTC,最后更新_DATE=2018-10-29 16:10:13,预订状态_=UTC}{HEADER_ID=18219945,ORDER_TYPE_ID=2124,ORDER_NUMBER=729636,ORDER_DATE=10/29/2018 3:56:05下午,事务性_CURR_CODE=USD,CUST_PO_NUMBER=941674,卖给_ORG_ID=127348,从_ORG_ID=934发货,价目表_ID=7035,创建_DATE=2018-10-29 15:10:20 UTC,最后更新_DATE=2018-10-29 16:10:34,预订状态代码=UTC}}在window org.apache.beam.sdk.transforms.windowing中。GlobalWindow@6c5cc8ee. 位于org.apache.beam.runners.dataflow.BatchViewOverrides$BatchViewAsMultimap$ToismRecordFormAppliekDofn.processElement(BatchViewOverrides.java:442)

    以下是我尝试的全部代码:

    WithKeys<String, TableRow> headerKey = WithKeys.of( (TableRow row) -> String.format("%s",row.get("PARTY_ID"))).withKeyType(TypeDescriptors.strings());
    
                PCollection<KV<String,TableRow>> mainInput = p.apply("ReadCustomerAccount",BigQueryIO.readTableRows().from(options.getCustAccount())).apply("WithKeys", headerKey);
                PCollection<KV<String,TableRow>> sideInput = p.apply("ReadCustomerParty",BigQueryIO.readTableRows().from(options.getPartyTable())).apply("WithKeys", headerKey);
    
    
                PCollection<TableRow> result  = CommonFunctions.innerJoinBQTbls("InnerJoin",mainInput,sideInput);
    
                @SuppressWarnings("serial")
                PCollection<TableRow> finalResultCollection =  result.apply("Process", ParDo.of(new DoFn<TableRow, TableRow>() 
                {
                      @ProcessElement
                      public void processElement(ProcessContext c) 
                      {
                          TableRow keyString = c.element();
    
                          TableRow mainList = (TableRow) keyString.get("main");
                          TableRow sideList = (TableRow) keyString.get("side");
    
                          TableRow targetRow = new TableRow();
    
                          targetRow.set("partyID", Integer.valueOf(keyString.get("key").toString()));
                          targetRow.set("accountNumber", mainList.get("ACCOUNT_NUMBER"));
                          targetRow.set("customerName", sideList.get("PARTY_NAME"));
                          targetRow.set("updatedDate",keyString.get("updatedDate"));
    
                          c.output(targetRow);
                      }
                }));
    
                PCollection<TableRow> headerData = p.apply("ReadInvoice",BigQueryIO.readTableRows().from(options.getOrderHeaderAll()));
    
                PCollection<TableRow> pc934Collection = headerData.apply(Filter.by(
                         (TableRow t) -> {
                             String orgCode = t.get("SHIP_FROM_ORG_ID").toString();
                             if (orgCode.equals("934")) {
                                 return true;
                             }
                             return false;
                         }
                        ));
    
                WithKeys<String, TableRow> soltoOrg = WithKeys.of(
                        (TableRow row) ->
                            String.format("%s#%s",
                                row.get("SOLD_TO_ORG_ID"),
                                row.get("CUST_ACCOUNT_ID")))
                        .withKeyType(TypeDescriptors.strings());
    
                PCollection<KV<String,TableRow>> customerHeaderAccount = pc934Collection.apply("WithKeys", soltoOrg);
                PCollection<KV<String,TableRow>> customerHeaderAll = finalResultCollection.apply("WithKeys", soltoOrg);
    
                PCollection<TableRow> secondResult  = CommonFunctions.innerJoinBQTbls("InnerJoin1",customerHeaderAll,customerHeaderAccount);
    
                @SuppressWarnings("serial")
                PCollection<TableRow> secondResultCollection =  secondResult.apply("Process", ParDo.of(new DoFn<TableRow, TableRow>() 
                {
                      @ProcessElement
                      public void processElement(ProcessContext c) 
                      {
                          TableRow keyString = c.element();
    
                          TableRow mainList = (TableRow) keyString.get("main");
                          TableRow sideList = (TableRow) keyString.get("side");
    
                          TableRow targetRow = new TableRow();
    
                          targetRow.set("orderNumber", mainList.get("ORDER_NUMBER"));
                          targetRow.set("headerId",  Integer.valueOf(mainList.get("HEADER_ID").toString()));
                          targetRow.set("partyID", Integer.valueOf(keyString.get("key").toString()));
                          targetRow.set("accountNumber", mainList.get("ACCOUNT_NUMBER"));
                          targetRow.set("customerName", sideList.get("PARTY_NAME"));
                          targetRow.set("updatedDate",keyString.get("updatedDate"));
    
                          c.output(targetRow);
                      }
                }));
    
    WithKeys headerKey=WithKeys.of((TableRow行)->String.format(“%s”,row.get(“PARTY_ID”)))。withKeyType(TypeDescriptors.String());
    PCollection main input=p.apply(“ReadCustomerAccount”,BigQueryIO.readTableRows()。from(options.getCustAccount())).apply(“WithKeys”,headerKey);
    PCollection sideInput=p.apply(“ReadCustomerParty”,BigQueryIO.readTableRows()。from(options.getPartyTable())).apply(“WithKeys”,headerKey);
    PCollection result=CommonFunctions.innerJoinBQTbls(“InnerJoin”,mainInput,sideInput);
    @抑制警告(“串行”)
    PCollection finalResultCollection=result.apply(“Process”),ParDo.of(new DoFn()
    {
    @过程元素
    公共void processElement(ProcessContext c)
    {
    TableRow键串=c.element();
    TableRow mainList=(TableRow)keyString.get(“main”);
    TableRow侧列表=(TableRow)keyString.get(“侧”);
    TableRow targetRow=新建TableRow();
    targetRow.set(“partyID”,Integer.valueOf(keyString.get(“key”).toString());
    targetRow.set(“accountNumber”,mainList.get(“ACCOUNT_NUMBER”);
    targetRow.set(“customerName”,sideList.get(“PARTY_NAME”);
    targetRow.set(“UpdateDate”,keyString.get(“UpdateDate”);
    c、 输出(targetRow);
    }
    }));
    PCollection headerData=p.apply(“ReadInvoice”,BigQueryIO.readTableRows()。from(options.getOrderHeaderAll());
    PCollection pc934Collection=headerData.apply(Filter.by(
    (表t)->{
    字符串orgCode=t.get(“SHIP_FROM_ORG_ID”).toString();
    if(组织代码等于(“934”)){
    返回true;
    }
    返回false;
    }
    ));
    WithKeys soltoOrg=WithKeys.of(
    (TableRow行)->
    String.format(“%s#%s”,
    row.get(“卖给组织ID”),
    获取(“客户帐户ID”))
    .withKeyType(TypeDescriptors.strings());
    PCollection customerHeaderAccount=pc934Collection.apply(“WithKeys”,soltoOrg);
    PCollection customerHeaderAll=finalResultCollection.apply(“WithKeys”,soltoOrg);
    PCollection secondResult=CommonFunctions.innerJoinBQTbls(“InnerJoin1”,CustomerHeaderal,customerHeaderAccount);
    @抑制警告(“串行”)
    PCollection secondResultCollection=secondResult.apply(“过程”,ParDo.of(new DoFn())
    {
    @过程元素
    公共void processElement(ProcessContext c)
    {
    TableRow键串=c.element();
    TableRow mainList=(TableRow)keyString.get(“main”);
    TableRow侧列表=(TableRow)keyString.get(“侧”);
    TableRow targetRow=新建TableRow();
    targetRow.set(“订单号”,mainList.get(“订单号”);
    targetRow.set(“headerId”,Integer.valueOf(mainList.get(“HEADER_ID”).toString());
    targetRow.set(“partyID”,Integer.valueOf(keyString.get(“key”).toString());
    targetRow.set(“accountNumber”,mainList.get(“ACCOUNT_NUMBER”);
    targetRow.set(“customerName”,sideList.get(“PARTY_NAME”);
    targetRow.set(“UpdateDate”,keyString.get(“UpdateDate”);
    c、 输出(targetRow);
    }
    }));
    
    您的一个键很可能为空。您可以通过不将其作为主键来解决此问题。主键不能为空,或者,如果它们是复合主键,则不能包含空。相反,请将其设置为唯一索引。例如,对主键使用“自动编号”字段