Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/amazon-web-services/12.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 监控AWS EMR作业运行进度的最佳实践是什么?_Java_Amazon Web Services_Emr_Elastic Map Reduce_Amazon Emr - Fatal编程技术网

Java 监控AWS EMR作业运行进度的最佳实践是什么?

Java 监控AWS EMR作业运行进度的最佳实践是什么?,java,amazon-web-services,emr,elastic-map-reduce,amazon-emr,Java,Amazon Web Services,Emr,Elastic Map Reduce,Amazon Emr,我有以下代码来运行EMR作业,并且它成功运行。我还想监控运行状态。我使用的是descripebeJobFlowsAPI,但它说根据不同的标准,此API已被弃用 有谁能帮助我们了解监控EMR运行进度的最佳实践是什么 public class EmrJobRunner { public static void main(String[] args) { // args is [input_file_path, output_directory], make sure output_dir

我有以下代码来运行EMR作业,并且它成功运行。我还想监控运行状态。我使用的是
descripebeJobFlows
API,但它说根据不同的标准,此API已被弃用

有谁能帮助我们了解监控EMR运行进度的最佳实践是什么

public class EmrJobRunner {
  public static void main(String[] args) {
    // args is [input_file_path, output_directory], make sure output_directory does not exist
    String inputFilePath = "s3://mybucket/emr/input";
    String outputDirectory = "s3://mybucket/emr/output/" + System.currentTimeMillis();
    String jarName = "WordCount.jar";
    String jarPath = "s3://mybucket/emr/" + jarName;
    String logPath = "s3://mybucket/emr/logs";

    String TERMINATE_JOB_FLOW = "TERMINATE_JOB_FLOW";
    String CONTINUE = "CONTINUE";

    AWSCredentials credentials = new BasicAWSCredentials("pub_key", "sec_key");
    StepFactory stepFactory = new StepFactory();

    AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
    emr.setRegion(Region.getRegion(Regions.AP_SOUTHEAST_1));

    StepConfig enableDebugging = new StepConfig()
      .withName("Enable debugging")
      .withActionOnFailure(TERMINATE_JOB_FLOW)
      .withHadoopJarStep(stepFactory.newEnableDebuggingStep());

    StepConfig installHive = new StepConfig()
      .withName("Install Hive")
      .withActionOnFailure(TERMINATE_JOB_FLOW)
      .withHadoopJarStep(stepFactory.newInstallHiveStep());

    StepConfig runScript = new StepConfig()
      .withName("Run Script")
      .withActionOnFailure(CONTINUE)
      .withHadoopJarStep(stepFactory.newRunHiveScriptStep("s3://dummy/dummy.hive"));

    List<String> jarArgs = Arrays.asList(inputFilePath, outputDirectory);
    HadoopJarStepConfig jarCfg= new HadoopJarStepConfig()
      .withJar(jarPath)
      .withArgs(jarArgs);
    StepConfig runJar = new StepConfig()
      .withName(jarName)
      .withActionOnFailure(TERMINATE_JOB_FLOW)
      .withHadoopJarStep(jarCfg);

    JobFlowInstancesConfig instanceCfg = new JobFlowInstancesConfig()
      .withKeepJobFlowAliveWhenNoSteps(false)
      .withTerminationProtected(true)
      .withInstanceCount(3)
      .withMasterInstanceType(InstanceType.C1Medium.toString())
      .withSlaveInstanceType(InstanceType.C1Medium.toString())
      .withHadoopVersion("2.4.0");

    List<StepConfig> steps = Arrays.asList(enableDebugging, installHive, runScript, runJar);

    RunJobFlowRequest request = new RunJobFlowRequest()
      .withName("My EMR Job Flow")
      .withAmiVersion("3.3.2")
      .withInstances(instanceCfg)
      .withLogUri(logPath);
      .withSteps(steps);

    RunJobFlowResult result = emr.runJobFlow(request);
    // saying DescribeJobFlows is deprecated
    // DescribeJobFlowsResult jobFlowDescResult = emr.DescribeJobFlows(DescribeJobFlowsRequest describeJobFlowsRequest);
  }

}
公共类EmrJobRunner{
公共静态void main(字符串[]args){
//args是[输入\文件\路径,输出\目录],请确保输出\目录不存在
字符串inputFilePath=“s3://mybucket/emr/input”;
字符串outputDirectory=“s3://mybucket/emr/output/”+System.currentTimeMillis();
String jarName=“WordCount.jar”;
字符串jarPath=“s3://mybucket/emr/”+jarName;
字符串logPath=“s3://mybucket/emr/logs”;
字符串TERMINATE\u JOB\u FLOW=“TERMINATE\u JOB\u FLOW”;
String CONTINUE=“CONTINUE”;
AWSCredentials凭证=新的基本凭证(“发布密钥”、“秒密钥”);
StepFactory StepFactory=新的StepFactory();
AmazonElasticMapReduce emr=新的AmazonElasticMapReduceClient(凭证);
emr.setRegion(Region.getRegion(Regions.AP_东南_1));
StepConfig enableDebugging=新的StepConfig()
.withName(“启用调试”)
.withActionOnFailure(终止作业流)
.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
StepConfig installHive=new StepConfig()
.withName(“安装配置单元”)
.withActionOnFailure(终止作业流)
.withHadoopJarStep(stepFactory.newInstallHiveStep());
StepConfig runScript=new StepConfig()
.withName(“运行脚本”)
.withActionOnFailure(继续)
.withHadoopJarStep(stepFactory.newRunHiveScriptStep(“s3://dummy/dummy.hive”);
List jarArgs=Arrays.asList(inputFilePath,outputDirectory);
HadoopJarStepConfig jarCfg=新的HadoopJarStepConfig()
.withJar(jarPath)
.withArgs(jarArgs);
StepConfig runJar=new StepConfig()
.withName(jarName)
.withActionOnFailure(终止作业流)
.使用HadoopJarStep(jarCfg);
JobFlowInstanceConfig instanceCfg=新的JobFlowInstanceConfig()
.在关闭时保持作业流有效(false)
.withTerminationProtected(真)
.带实例计数(3)
.withMasterInstanceType(InstanceType.C1Medium.toString())
.WithLaveInstanceType(InstanceType.C1Medium.toString())
.使用Hadoop版本(“2.4.0”);
List steps=Arrays.asList(enabledbugging、installHive、runScript、runJar);
RunJobFlowRequest=新的RunJobFlowRequest()
.withName(“我的电子病历工作流程”)
.带AMI版本(“3.3.2”)
.withInstances(instanceCfg)
.withLogUri(logPath);
.使用步骤(步骤);
RunJobFlowResult=emr.runJobFlow(请求);
//不推荐使用DescribeJobFlows
//DescribeJobFlowsResult JobFlowsDescriptResult=emr.DescribeJobFlows(DescribeJobFlowsRequest DescribeJobFlowsRequest);
}
}

由于不推荐使用
DescribeJobFlows
,因此监视群集状态是监视作业运行进度的另一种方法

    RunJobFlowResult runJobResult = emr.runJobFlow(runJobFlowRequest);
    System.out.printf("Run JobFlowId is: %s\n", runJobResult.getJobFlowId());

    while(true) {
      DescribeClusterRequest desc = new DescribeClusterRequest()
        .withClusterId(runJobResult.getJobFlowId());
      DescribeClusterResult clusterResult = emr.describeCluster(desc);
      Cluster cluster = clusterResult.getCluster();
      String status = cluster.getStatus().getState();
      System.out.printf("Status: %s\n", status);
      if(status.equals(ClusterState.TERMINATED.toString()) || status.equals(ClusterState.TERMINATED_WITH_ERRORS.toString())) {
        break;
      }
      try {
        TimeUnit.SECONDS.sleep(30);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
      // maybe other handle
    }