Google cloud dataflow 如何使用其他命令行参数显式设置直接运行程序?

Google cloud dataflow 如何使用其他命令行参数显式设置直接运行程序?,google-cloud-dataflow,apache-beam,apache-beam-io,Google Cloud Dataflow,Apache Beam,Apache Beam Io,我编写了这个管道,但当我将其作为jar运行时,当我在build.gradle中指定了直接运行程序时,以及当我尝试传递参数--runner=direct或 --runner=Directrunner。下面是我的代码和build.gradle文件。我正在运行gradle任务fatJar来创建jar,导航到build/libs文件夹来运行jar,并看到这个错误。这是我正在使用的命令java-jar filepipeline-all-1.0-SNAPSHOT.jar--input=“../testdat

我编写了这个管道,但当我将其作为jar运行时,当我在build.gradle中指定了直接运行程序时,以及当我尝试传递参数--runner=direct或 --runner=Directrunner。下面是我的代码和build.gradle文件。我正在运行gradle任务fatJar来创建jar,导航到build/libs文件夹来运行jar,并看到这个错误。这是我正在使用的命令
java-jar filepipeline-all-1.0-SNAPSHOT.jar--input=“../testdata”--output=“./manifest.json”--runner=DirectRunner
在此问题上的任何帮助都将不胜感激

我的文件夹结构如下所示: --src --主要 --爪哇 --com.pipeline --BeamPipeline.java

格雷德尔先生

plugins {
    id 'java'

}

group 'com.dustin'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8

repositories {
    mavenCentral()
}

task fatJar(type: Jar) {
    manifest {
        attributes 'Implementation-Title': 'Gradle Jar File',
                'Implementation-Version': version,
                'Main-Class': 'com.pipeline.BeamPipeline'
    }
    baseName = project.name + '-all'
    from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
    with jar
}


apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'

dependencies {
    runtime group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
    compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
    runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'

    testCompile group: 'junit', name: 'junit', version: '4.12'

    compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
    compile group: 'commons-io', name: 'commons-io', version: '2.6'
    compile group: 'commons-codec', name:'commons-codec', version:'1.12'
    compileOnly 'org.projectlombok:lombok:1.18.6'
    compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
    compile group: 'org.json', name: 'json', version: '20180813'
    annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
管道:

package com.pipeline;

import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineRunner;
import org.apache.beam.sdk.io.FileIO;
import org.apache.beam.sdk.options.*;
import org.apache.beam.sdk.transforms.*;
import org.apache.beam.sdk.values.KV;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;



public class BeamPipeline {
    private static final Logger log = LoggerFactory.getLogger(BeamPipeline.class);
    public static interface MyOptions extends PipelineOptions {

        @Validation.Required
        @Description("Input Path(with gs:// prefix)")
        String getInput();
        void setInput(String value);

        @Validation.Required
        @Description("Output Path (with gs:// prefix)")
        String getOutput();
        void setOutput(String value);

    }


    public static void main(String[] args) {

        MyOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
        Pipeline p = Pipeline.create(options);

        File dir = new File(options.getInput());
        String output = options.getOutput();

        for (File file : dir.listFiles()) {
                String inputString = file.toString();
                p
                        .apply("Match Files", FileIO.match().filepattern(inputString))
                        .apply("Read Files", FileIO.readMatches())
                        .apply(MapElements.via(new SimpleFunction<FileIO.ReadableFile, KV<String, String>>() {
                            public KV<String, String> apply(FileIO.ReadableFile file) {
                                String temp = null;

                                try {
                                    temp = file.readFullyAsUTF8String();

                                } catch (IOException e) {

                                }

                                String sha256hex = org.apache.commons.codec.digest.DigestUtils.sha256Hex(temp);

                                return KV.of(file.getMetadata().resourceId().toString(), sha256hex);

                            }
                        }))
                        .apply("Print", ParDo.of(new DoFn<KV<String, String>, Void>() {
                            @ProcessElement
                            public void processElement(ProcessContext c) throws IOException {

                                FileWriter fileWriter = new FileWriter(output,true);
                                JSONObject obj = new JSONObject();

                                obj.put(c.element().getKey(), c.element().getValue());

                                fileWriter.write(obj.toString());
                                fileWriter.close();

                                log.info(String.format("File: %s, SHA-256 %s", c.element().getKey(), c.element().getValue()));

                            }
                        }));
                }
            p.run().waitUntilFinish();
        }
}
package.com.pipeline;
导入org.apache.beam.sdk.Pipeline;
导入org.apache.beam.sdk.PipelineRunner;
导入org.apache.beam.sdk.io.FileIO;
导入org.apache.beam.sdk.options.*;
导入org.apache.beam.sdk.transforms.*;
导入org.apache.beam.sdk.values.KV;
导入org.json.JSONObject;
导入org.slf4j.Logger;
导入org.slf4j.LoggerFactory;
导入java.io.File;
导入java.io.FileWriter;
导入java.io.IOException;
公共类波束管道{
私有静态最终记录器log=LoggerFactory.getLogger(BeamPipeline.class);
公共静态接口MyOptions扩展了PipelineOptions{
@验证。必需
@说明(“输入路径(带gs://前缀)”)
字符串getInput();
void setInput(字符串值);
@验证。必需
@说明(“输出路径(带gs://前缀)”)
字符串getOutput();
void setOutput(字符串值);
}
公共静态void main(字符串[]args){
MyOptions options=PipelineOptionsFactory.fromArgs(args).withValidation().as(MyOptions.class);
Pipeline p=Pipeline.create(选项);
File dir=新文件(options.getInput());
字符串输出=options.getOutput();
对于(文件:dir.listFiles()){
String inputString=file.toString();
P
.apply(“匹配文件”,FileIO.Match().filepattern(inputString))
.apply(“读取文件”,FileIO.readMatches())
.apply(MapElements.via)(新的SimpleFunction(){
公共KV应用(FileIO.ReadableFile文件){
字符串temp=null;
试一试{
temp=file.readFullyAsUTF8String();
}捕获(IOE异常){
}
字符串sha256hex=org.apache.commons.codec.digest.DigestUtils.sha256hex(temp);
返回KV.of(file.getMetadata().resourceId().toString(),sha256hex);
}
}))
.应用(“打印”,第页,共页(新DoFn)(){
@过程元素
public void processElement(ProcessContext c)引发IOException{
FileWriter FileWriter=新的FileWriter(输出,true);
JSONObject obj=新的JSONObject();
对象put(c.element().getKey(),c.element().getValue());
write(obj.toString());
fileWriter.close();
log.info(String.format(“文件:%s,SHA-256%s”,c.element().getKey(),c.element().getValue());
}
}));
}
p、 run().waitUntilFinish();
}
}

beam runners direct java
作为运行时依赖项添加,因此不会添加到fat jar中。 您可以添加
beam runners directjava
作为编译时依赖项来使用它

plugins {
    id 'java'
}

group 'com.dustin'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8

repositories {
    mavenCentral()
}

task fatJar(type: Jar) {
    manifest {
        attributes 'Implementation-Title': 'Gradle Jar File',
                'Implementation-Version': version,
                'Main-Class': 'com.pipeline.BeamPipeline'
    }
    baseName = project.name + '-all'
    from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
    with jar
}


apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'

dependencies {
    compile group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
    compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
    runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'

    testCompile group: 'junit', name: 'junit', version: '4.12'

    compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
    compile group: 'commons-io', name: 'commons-io', version: '2.6'
    compile group: 'commons-codec', name:'commons-codec', version:'1.12'
    compileOnly 'org.projectlombok:lombok:1.18.6'
    compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
    compile group: 'org.json', name: 'json', version: '20180813'
    annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
或者,如果您不想用fatjar打包
DirectRunner
,而只想将其用于测试,则可以创建单独的
DirectRunner
jar,并在运行管道时将其添加到类部分

plugins {
    id 'java'
}

group 'com.dustin'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8

repositories {
    mavenCentral()
}

task fatJar(type: Jar) {
    manifest {
        attributes 'Implementation-Title': 'Gradle Jar File',
                'Implementation-Version': version,
                'Main-Class': 'com.pipeline.BeamPipeline'
    }
    baseName = project.name + '-all'
    from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
    with jar
}

task directrunnerjar(type: Jar) {
    manifest {
        attributes 'Implementation-Title': 'Gradle Jar File',
                'Implementation-Version': version,
                'Main-Class': 'com.pipeline.BeamPipeline'
    }
    baseName = project.name + '-runtime'
    from { configurations.runtime.collect { it.isDirectory() ? it : zipTree(it) } }
    with jar
}


apply plugin: 'application'
mainClassName = 'src.main.java.com.pipeline.BeamPipeline'

dependencies {
    runtime group: 'org.apache.beam', name: 'beam-runners-direct-java', version:'2.8.0'
    compile group: 'org.apache.beam', name: 'beam-sdks-java-core', version:'2.8.0'
    runtime group: 'org.slf4j', name: 'slf4j-jdk14', version:'1.7.25'

    testCompile group: 'junit', name: 'junit', version: '4.12'

    compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
    compile group: 'commons-io', name: 'commons-io', version: '2.6'
    compile group: 'commons-codec', name:'commons-codec', version:'1.12'
    compileOnly 'org.projectlombok:lombok:1.18.6'
    compile group: 'com.google.code.gson', name: 'gson', version: '2.7'
    compile group: 'org.json', name: 'json', version: '20180813'
    annotationProcessor 'org.projectlombok:lombok:1.18.6'
}
java-cp“libs/myartifact-runtime-1.0-SNAPSHOT.jar:libs/filepipeline-all-1.0-SNAPSHOT”com.pipeline.BeamPipeline--input=“../testdata”--output=“/manifest.json”--runner=DirectRunner