Google cloud dataflow >在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollect
Google cloud dataflow >在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollect,google-cloud-dataflow,apache-beam,Google Cloud Dataflow,Apache Beam,>在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,dept位置从beam到emp内部连接emp作为deptno=deptnodeptno当我将两个PCollection组合到PCollectionTuple并对两个元组执行联接时,这基本上失败了。使用的output.apply(SqlTransform.query(“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainCl
>在com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)上,
dept
位置从beam
到emp
内部连接emp
作为deptno
=deptno
deptno
当我将两个PCollection组合到PCollectionTuple并对两个元组执行联接时,这基本上失败了。使用的output.apply(SqlTransform.query(“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainClass=com.nitesh.gcp.feature.beamSQL1-Dexec.args=“--temposition=gs://dataflow-8431909583/tempsql--project=“dataflowtest-233007”谢谢,但没有多大帮助。需要查看完整的堆栈跟踪以找到问题的根源。还可以与其他运行程序一起尝试,看看是否有帮助。对上述问题的一些补充:当我运行的同一代码没有join时,它工作正常,但当我执行join时,我遇到了错误。有人能在这方面帮助我吗?谢谢n Advancec您可以发布更详细的错误日志,如堆栈跟踪或引发异常的行号吗?感谢@ihji的响应,请查找以下错误详细信息:信息:SQL:SELECTemp
empno
,emp
ename
,emp
job
,emp
mgr
,emp
e> emp
,emp
sal
,emp
comm
,emp
deptno
,deptno
作为deptno0
,dept
dname
,dept
位置FROMbeam
ode>beam
dept
作为emp
deptno
=deptno
位于com.nitesh.gcp.feature.beamSQL1.main(beamSQL1.java:55)的(使用的“join”)命令:mvn compile exec:java-Pdirect runner-Dexec.mainClass=com.nitesh.gcp.feature.beamSQL1-Dexec.args=“--temposition=gs://dataflow-8431909583/tempsql--project=“dataflowtest-233007”谢谢,但帮助不大。需要查看完整的堆栈跟踪以找到问题的根源。还可以尝试其他运行程序,看看是否有帮助。
package com.nitesh.gcp.feature;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionTuple;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.sdk.values.TupleTag;
import java.util.stream.Collectors;
public class beamSQL1 {
public static final String EMPHEADER = "empno,ename,job,mgr,hiredate,sal,comm,deptno";
public static final String DEPTHEADER = "deptno,dname,location";
public static final Schema EMPSCHEMA = Schema.builder()
.addStringField("empno")
.addStringField("ename")
.addStringField("job")
.addStringField("mgr")
.addStringField("hiredate")
.addStringField("sal")
.addStringField("comm")
.addStringField("deptno")
.build();
public static final Schema DEPTSCHEMA = Schema.builder()
.addStringField("deptno")
.addStringField("dname")
.addStringField("location")
.build();
public static void main(String[] args) {
PipelineOptionsFactory.register(DataflowPipelineOptions.class);
DataflowPipelineOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(DataflowPipelineOptions.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<String> employee = pipeline.apply("Read From GCS", TextIO.read().from("gs://amazon-test/sqlData/employee.txt"));
PCollection<String> department = pipeline.apply("Read From GCS", TextIO.read().from("gs://amazon-test/sqlData/department.txt"));
PCollection<Row> employeeRow = employee.apply("Transform To Row", ParDo.of(new RowParDo())).setRowSchema(EMPSCHEMA);
PCollection<Row> departmentRow = department.apply("Transform To Row", ParDo.of(new RowParDoForDept())).setRowSchema(DEPTSCHEMA);
PCollectionTuple output = PCollectionTuple.of(new TupleTag<>("emp"), employeeRow).and(new TupleTag<>("dept"), departmentRow);
output.apply(
SqlTransform.query(
// "SELECT emp.empno,emp.ename,dept.deptno,dept.dname FROM emp JOIN dept ON emp.deptno = dept.deptno"))
"SELECT * from emp JOIN dept ON emp.deptno = dept.deptno"))
/* p2.apply("Transform Sql", SqlTransform.query(
"SELECT * " +
"FROM PCOLLECTION order by sal desc LIMIT 14")
)*/
.apply("TransForm To String", ParDo.of(new RowToString()))
.apply("Write To GCS", TextIO.write().to("gs://amazon-test/sqlData/output/outputSql.csv").withoutSharding());
pipeline.run();
}
//ParDo for String -> Row (SQL)
public static class RowParDo extends DoFn<String, Row> {
@ProcessElement
public void processElement(ProcessContext c) {
if (!c.element().equalsIgnoreCase(EMPHEADER)) {
String[] vals = c.element().split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
Row appRow = Row
.withSchema(EMPSCHEMA)
.addValues(vals[0], vals[1], vals[2], vals[3], vals[4], vals[5], vals[6], vals[7])
.build();
c.output(appRow);
}
}
}
//ParDo for Row (SQL) -> String
public static class RowToString extends DoFn<Row, String> {
@ProcessElement
public void processElement(ProcessContext c) {
String line = c.element().getValues()
.stream()
.map(Object::toString)
.collect(Collectors.joining(","));
c.output(line);
}
}
//ParDo for String -> Row (SQL)
public static class RowParDoForDept extends DoFn<String, Row> {
@ProcessElement
public void processElement(ProcessContext c) {
if (!c.element().equalsIgnoreCase(DEPTHEADER)) {
String[] vals = c.element().split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
Row appRow = Row
.withSchema(DEPTSCHEMA)
.addValues(vals[0], vals[1], vals[2])
.build();
c.output(appRow);
}
}
}
}