Java 触发一个接一个扩展PTransforms的类?
我有两个类扩展了Java 触发一个接一个扩展PTransforms的类?,java,google-cloud-platform,cloud,apache-beam,dataflow,Java,Google Cloud Platform,Cloud,Apache Beam,Dataflow,我有两个类扩展了PTransform,分别称为CompositeCall2和CompositeCall 我必须先调用CompositeCall,然后在CompositeCall中完成工作后,我必须在数据流程序中调用CompositeCall2。我正在制作数据流作业的模板,所以并行处理正在进行,这使得我的工作很困难 代码:Tester.java package Testing2; import java.util.List; import org.apache.beam.runners.dat
PTransform
,分别称为CompositeCall2
和CompositeCall
我必须先调用CompositeCall
,然后在CompositeCall
中完成工作后,我必须在数据流程序中调用CompositeCall2
。我正在制作数据流作业的模板,所以并行处理正在进行,这使得我的工作很困难
代码:Tester.java
package Testing2;
import java.util.List;
import org.apache.beam.runners.dataflow.DataflowRunner;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.runners.direct.DirectRunner;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PDone;
import com.google.api.services.bigquery.model.TableRow;
public class Tester {
public interface FileData extends PipelineOptions {
@Description("name Of the File")
@Validation.Required
ValueProvider<String> getInputFile();
void setInputFile(ValueProvider<String> value);
@Description("Path Of File From Where We need To Read Data")
@Validation.Required
ValueProvider<String> getOutputFile();
void setOutputFile(ValueProvider<String> value);
}
public static void main(String[] args) throws InterruptedException {
DataflowPipelineOptions options=PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject("testing1-180111");
options.setTempLocation("gs://kishan-bucket/staging");
options.setTemplateLocation("gs://kiss-bucket/templates/Test1");
options.setRunner(DataflowRunner.class);
Pipeline p = Pipeline.create(options);
PDone dta = p.begin().apply("Creating File",Create.of("Kishan")).apply(new CompositeCall(p));
p.apply("Creating File",Create.of("Kishan")).apply(new CompositeCall2(p));
p.run().waitUntilFinish();
}
}
包测试2;
导入java.util.List;
导入org.apache.beam.runners.dataflow.DataflowRunner;
导入org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
导入org.apache.beam.runners.direct.DirectRunner;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.io.TextIO;
导入org.apache.beam.sdk.options.Description;
导入org.apache.beam.sdk.options.PipelineOptions;
导入org.apache.beam.sdk.options.pipelineoptions工厂;
导入org.apache.beam.sdk.options.Validation;
导入org.apache.beam.sdk.options.ValueProvider;
导入org.apache.beam.sdk.transforms.Create;
导入org.apache.beam.sdk.transforms.View;
导入org.apache.beam.sdk.values.PCollection;
导入org.apache.beam.sdk.values.PCollectionView;
导入org.apache.beam.sdk.values.PDone;
导入com.google.api.services.bigquery.model.TableRow;
公共类测试员{
公共接口文件数据扩展管道选项{
@说明(“文件名”)
@验证。必需
ValueProvider getInputFile();
作废setInputFile(ValueProvider值);
@说明(“我们需要从中读取数据的文件路径”)
@验证。必需
ValueProvider getOutputFile();
作废setOutputFile(ValueProvider值);
}
公共静态void main(字符串[]args)引发InterruptedException{
DataflowPipelineOptions=PipelineOptions工厂.as(DataflowPipelineOptions.class);
选项。setProject(“测试1-180111”);
选项。设置位置(“gs://kishan bucket/staging”);
options.setTemplateLocation(“gs://kiss bucket/templates/Test1”);
options.setRunner(DataflowRunner.class);
Pipeline p=Pipeline.create(选项);
PDone dta=p.begin().apply(“创建文件”,Create.of(“Kishan”)).apply(新复合调用(p));
p、 apply(“创建文件”,Create.of(“Kishan”)).apply(新的CompositeCall2(p));
p、 run().waitUntilFinish();
}
}
这就是我打电话的方式。
这两个类都在做相同的工作,只是在文件中打印数据并写入数据
包装测试2
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CompositeCall2 extends PTransform <PCollection<String>,PCollection<String>> {
private static final long serialVersionUID = 1L;
static Pipeline p;
public CompositeCall2(Pipeline p1) {
this.p = p1;
}
private static final Logger LOG = LoggerFactory.getLogger(CompositeCall2.class);
@Override
public PCollection<String> expand(PCollection<String> input) {
PCollection<String> data;
input.apply(ParDo.of(new testing())).apply(TextIO.write().to("gs://kiss-bucket/test1.txt"));
LOG.info("Enter Second Stage Called");
return input;
}
static class testing extends DoFn<String,String>{
@ProcessElement
public void processElement(ProcessContext c) throws InterruptedException{
LOG.info("Enter Second Stage");
c.output("Data Is"+c.element());
}
}
}
import java.util.List;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.io.TextIO;
导入org.apache.beam.sdk.transforms.DoFn;
导入org.apache.beam.sdk.transforms.ptTransform;
导入org.apache.beam.sdk.transforms.ParDo;
导入org.apache.beam.sdk.values.PCollection;
导入org.apache.beam.sdk.values.PCollectionView;
导入org.slf4j.Logger;
导入org.slf4j.LoggerFactory;
公共类CompositeCall2扩展了PTransform{
私有静态最终长serialVersionUID=1L;
静态管道p;
公共合成CALL 2(管道p1){
这个p=p1;
}
私有静态最终记录器LOG=LoggerFactory.getLogger(CompositeCall2.class);
@凌驾
公共PCollection展开(PCollection输入){
p收集数据;
input.apply(ParDo.of(newtesting()).apply(TextIO.write())to(“gs://kiss bucket/test1.txt”);
LOG.info(“进入调用的第二阶段”);
返回输入;
}
静态类测试扩展了DoFn{
@过程元素
public void processElement(ProcessContext c)引发InterruptedException{
日志信息(“进入第二阶段”);
c、 输出(“数据为”+c.element());
}
}
}
如何同步流程,以便在一次转换后运行另一次转换?这有点旧,但我今天遇到了它,并认为我可能会与其他有类似问题的人分享-有一个转换允许您在应用后续操作之前等待前一次转换完成 这有点旧,但我今天遇到了它,我想我可以与其他有类似问题的人分享-有一个转换允许您在应用后续操作之前等待先前的转换完成