Google cloud dataflow >在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollect

Google cloud dataflow >在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollect,google-cloud-dataflow,apache-beam,Google Cloud Dataflow,Apache Beam,>在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollectionTuple并对两个元组执行联接时,这基本上失败了。使用的output.apply(SqlTransform.query(“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainCl


>在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,
dept
位置
beam
emp
内部连接
emp
作为
deptno
=
deptno
deptno
当我将两个PCollection组合到PCollectionTuple并对两个元组执行联接时,这基本上失败了。使用的output.apply(SqlTransform.query(“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainClass=com.nitesh.gcp.feature.beamSQL1-Dexec.args=“--temposition=gs://dataflow-8431909583/tempsql--project=“dataflowtest-233007”谢谢,但没有多大帮助。需要查看完整的堆栈跟踪以找到问题的根源。还可以与其他运行程序一起尝试,看看是否有帮助。对上述问题的一些补充:当我运行的同一代码没有join时,它工作正常,但当我执行join时,我遇到了错误。有人能在这方面帮助我吗?谢谢n Advancec您可以发布更详细的错误日志,如堆栈跟踪或引发异常的行号吗?感谢@ihji的响应,请查找以下错误详细信息:信息:SQL:SELECT
emp
empno
emp
ename
emp
job
emp
mgr
emp
e>
emp
emp
sal
emp
comm
emp
deptno
deptno
作为
deptno0
dept
dname
dept
位置
FROM
beam
ode>beam
dept
作为
emp
deptno
=
deptno
位于com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)的
(使用的“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainClass=com.nitesh.gcp.feature.beamSQL1-Dexec.args=“--temposition=gs://dataflow-8431909583/tempsql--project=“dataflowtest-233007”谢谢,但帮助不大。需要查看完整的堆栈跟踪以找到问题的根源。还可以尝试其他运行程序,看看是否有帮助。
package com.nitesh.gcp.feature;

import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;

import java.util.stream.Collectors;


public class beamSQL1 {

    public static final String EMPHEADER = "empno,ename,job,mgr,hiredate,sal,comm,deptno";
    public static final String DEPTHEADER = "deptno,dname,location";
    public static final Schema EMPSCHEMA = Schema.builder()
            .addStringField("empno")
            .addStringField("ename")
            .addStringField("job")
            .addStringField("mgr")
            .addStringField("hiredate")
            .addStringField("sal")
            .addStringField("comm")
            .addStringField("deptno")
            .build();
    public static final Schema DEPTSCHEMA = Schema.builder()
            .addStringField("deptno")
            .addStringField("dname")
            .addStringField("location")
            .build();


    public static void main(String[] args) {
        PipelineOptionsFactory.register(DataflowPipelineOptions.class);
        DataflowPipelineOptions options = PipelineOptionsFactory
                .fromArgs(args)
                .withValidation()
                .as(DataflowPipelineOptions.class);

        Pipeline pipeline = Pipeline.create(options);
        PCollection<String> employee = pipeline.apply("Read From GCS", TextIO.read().from("gs://amazon-test/sqlData/employee.txt"));
        PCollection<String> department = pipeline.apply("Read From GCS", TextIO.read().from("gs://amazon-test/sqlData/department.txt"));
        PCollection<Row> employeeRow = employee.apply("Transform To Row", ParDo.of(new RowParDo())).setRowSchema(EMPSCHEMA);
        PCollection<Row> departmentRow = department.apply("Transform To Row", ParDo.of(new RowParDoForDept())).setRowSchema(DEPTSCHEMA);
        PCollectionTuple output = PCollectionTuple.of(new TupleTag<>("emp"), employeeRow).and(new TupleTag<>("dept"), departmentRow);


        output.apply(
                SqlTransform.query(
                        // "SELECT emp.empno,emp.ename,dept.deptno,dept.dname FROM emp JOIN dept ON emp.deptno = dept.deptno"))
                        "SELECT * from emp JOIN dept ON emp.deptno = dept.deptno"))

                /* p2.apply("Transform Sql", SqlTransform.query(
                         "SELECT * " +
                                 "FROM PCOLLECTION order by sal desc LIMIT 14")
                 )*/
                .apply("TransForm To String", ParDo.of(new RowToString()))
                .apply("Write To GCS", TextIO.write().to("gs://amazon-test/sqlData/output/outputSql.csv").withoutSharding());

        pipeline.run();
    }

    //ParDo for String -> Row (SQL)
    public static class RowParDo extends DoFn<String, Row> {
        @ProcessElement
        public void processElement(ProcessContext c) {
            if (!c.element().equalsIgnoreCase(EMPHEADER)) {
                String[] vals = c.element().split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
                Row appRow = Row
                        .withSchema(EMPSCHEMA)
                        .addValues(vals[0], vals[1], vals[2], vals[3], vals[4], vals[5], vals[6], vals[7])
                        .build();
                c.output(appRow);
            }
        }
    }

    //ParDo for Row (SQL) -> String
    public static class RowToString extends DoFn<Row, String> {
        @ProcessElement
        public void processElement(ProcessContext c) {
            String line = c.element().getValues()
                    .stream()
                    .map(Object::toString)
                    .collect(Collectors.joining(","));
            c.output(line);
        }
    }

    //ParDo for String -> Row (SQL)
    public static class RowParDoForDept extends DoFn<String, Row> {
        @ProcessElement
        public void processElement(ProcessContext c) {
            if (!c.element().equalsIgnoreCase(DEPTHEADER)) {
                String[] vals = c.element().split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
                Row appRow = Row
                        .withSchema(DEPTSCHEMA)
                        .addValues(vals[0], vals[1], vals[2])
                        .build();
                c.output(appRow);
            }
        }
    }


}