Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/google-cloud-platform/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 触发一个接一个扩展PTransforms的类?_Java_Google Cloud Platform_Cloud_Apache Beam_Dataflow - Fatal编程技术网

Java 触发一个接一个扩展PTransforms的类?

Java 触发一个接一个扩展PTransforms的类?,java,google-cloud-platform,cloud,apache-beam,dataflow,Java,Google Cloud Platform,Cloud,Apache Beam,Dataflow,我有两个类扩展了PTransform,分别称为CompositeCall2和CompositeCall 我必须先调用CompositeCall,然后在CompositeCall中完成工作后,我必须在数据流程序中调用CompositeCall2。我正在制作数据流作业的模板,所以并行处理正在进行,这使得我的工作很困难 代码:Tester.java package Testing2; import java.util.List; import org.apache.beam.runners.dat

我有两个类扩展了
PTransform
,分别称为
CompositeCall2
CompositeCall

我必须先调用
CompositeCall
,然后在
CompositeCall
中完成工作后,我必须在数据流程序中调用
CompositeCall2
。我正在制作数据流作业的模板,所以并行处理正在进行,这使得我的工作很困难

代码:Tester.java

package Testing2;

import java.util.List;

import org.apache.beam.runners.dataflow.DataflowRunner;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.runners.direct.DirectRunner;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PDone;
import com.google.api.services.bigquery.model.TableRow;

public class Tester {

    public interface FileData extends PipelineOptions {
         @Description("name Of the File")
         @Validation.Required
         ValueProvider<String> getInputFile();
         void setInputFile(ValueProvider<String> value);

         @Description("Path Of File From Where We need To Read Data")
         @Validation.Required
         ValueProvider<String> getOutputFile();
         void setOutputFile(ValueProvider<String> value);

    }

    public static void main(String[] args) throws InterruptedException {

        DataflowPipelineOptions options=PipelineOptionsFactory.as(DataflowPipelineOptions.class);
        options.setProject("testing1-180111");
        options.setTempLocation("gs://kishan-bucket/staging");
        options.setTemplateLocation("gs://kiss-bucket/templates/Test1");
        options.setRunner(DataflowRunner.class);
        Pipeline p = Pipeline.create(options);

        PDone dta = p.begin().apply("Creating File",Create.of("Kishan")).apply(new CompositeCall(p));
        p.apply("Creating File",Create.of("Kishan")).apply(new CompositeCall2(p));
        p.run().waitUntilFinish();  
    }
}
包测试2;
导入java.util.List;
导入org.apache.beam.runners.dataflow.DataflowRunner;
导入org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
导入org.apache.beam.runners.direct.DirectRunner;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.io.TextIO;
导入org.apache.beam.sdk.options.Description;
导入org.apache.beam.sdk.options.PipelineOptions;
导入org.apache.beam.sdk.options.pipelineoptions工厂;
导入org.apache.beam.sdk.options.Validation;
导入org.apache.beam.sdk.options.ValueProvider;
导入org.apache.beam.sdk.transforms.Create;
导入org.apache.beam.sdk.transforms.View;
导入org.apache.beam.sdk.values.PCollection;
导入org.apache.beam.sdk.values.PCollectionView;
导入org.apache.beam.sdk.values.PDone;
导入com.google.api.services.bigquery.model.TableRow;
公共类测试员{
公共接口文件数据扩展管道选项{
@说明(“文件名”)
@验证。必需
ValueProvider getInputFile();
作废setInputFile(ValueProvider值);
@说明(“我们需要从中读取数据的文件路径”)
@验证。必需
ValueProvider getOutputFile();
作废setOutputFile(ValueProvider值);
}
公共静态void main(字符串[]args)引发InterruptedException{
DataflowPipelineOptions=PipelineOptions工厂.as(DataflowPipelineOptions.class);
选项。setProject(“测试1-180111”);
选项。设置位置(“gs://kishan bucket/staging”);
options.setTemplateLocation(“gs://kiss bucket/templates/Test1”);
options.setRunner(DataflowRunner.class);
Pipeline p=Pipeline.create(选项);
PDone dta=p.begin().apply(“创建文件”,Create.of(“Kishan”)).apply(新复合调用(p));
p、 apply(“创建文件”,Create.of(“Kishan”)).apply(新的CompositeCall2(p));
p、 run().waitUntilFinish();
}
}
这就是我打电话的方式。 这两个类都在做相同的工作,只是在文件中打印数据并写入数据

包装测试2

import java.util.List;

import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CompositeCall2 extends PTransform <PCollection<String>,PCollection<String>> {


    private static final long serialVersionUID = 1L;
    static Pipeline p;

    public CompositeCall2(Pipeline p1) {
        this.p = p1;
    }
    private static final Logger LOG = LoggerFactory.getLogger(CompositeCall2.class);

    @Override
    public PCollection<String> expand(PCollection<String> input) {
        PCollection<String> data;
        input.apply(ParDo.of(new testing())).apply(TextIO.write().to("gs://kiss-bucket/test1.txt"));
        LOG.info("Enter Second Stage Called");
        return input;           
    }

    static class testing extends DoFn<String,String>{

        @ProcessElement
        public void processElement(ProcessContext c) throws InterruptedException{
            LOG.info("Enter Second Stage");
            c.output("Data Is"+c.element());
        }
    }
}
import java.util.List;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.io.TextIO;
导入org.apache.beam.sdk.transforms.DoFn;
导入org.apache.beam.sdk.transforms.ptTransform;
导入org.apache.beam.sdk.transforms.ParDo;
导入org.apache.beam.sdk.values.PCollection;
导入org.apache.beam.sdk.values.PCollectionView;
导入org.slf4j.Logger;
导入org.slf4j.LoggerFactory;
公共类CompositeCall2扩展了PTransform{
私有静态最终长serialVersionUID=1L;
静态管道p;
公共合成CALL 2(管道p1){
这个p=p1;
}
私有静态最终记录器LOG=LoggerFactory.getLogger(CompositeCall2.class);
@凌驾
公共PCollection展开(PCollection输入){
p收集数据;
input.apply(ParDo.of(newtesting()).apply(TextIO.write())to(“gs://kiss bucket/test1.txt”);
LOG.info(“进入调用的第二阶段”);
返回输入;
}
静态类测试扩展了DoFn{
@过程元素
public void processElement(ProcessContext c)引发InterruptedException{
日志信息(“进入第二阶段”);
c、 输出(“数据为”+c.element());
}
}
}

如何同步流程,以便在一次转换后运行另一次转换?

这有点旧,但我今天遇到了它,并认为我可能会与其他有类似问题的人分享-有一个转换允许您在应用后续操作之前等待前一次转换完成

这有点旧,但我今天遇到了它,我想我可以与其他有类似问题的人分享-有一个转换允许您在应用后续操作之前等待先前的转换完成