4
我有以下代碼來運行EMR作業,並且它運行成功。我也想監視運行狀態。我使用DescribeJobFlows
API,但它表示根據http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/elasticmapreduce/AmazonElasticMapReduceClient.html已棄用此API。監控AWS EMR作業運行進度的最佳做法是什麼?
有沒有人可以幫助監控EMR運行進度的最佳實踐?
public class EmrJobRunner {
public static void main(String[] args) {
// args is [input_file_path, output_directory], make sure output_directory does not exist
String inputFilePath = "s3://mybucket/emr/input";
String outputDirectory = "s3://mybucket/emr/output/" + System.currentTimeMillis();
String jarName = "WordCount.jar";
String jarPath = "s3://mybucket/emr/" + jarName;
String logPath = "s3://mybucket/emr/logs";
String TERMINATE_JOB_FLOW = "TERMINATE_JOB_FLOW";
String CONTINUE = "CONTINUE";
AWSCredentials credentials = new BasicAWSCredentials("pub_key", "sec_key");
StepFactory stepFactory = new StepFactory();
AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(credentials);
emr.setRegion(Region.getRegion(Regions.AP_SOUTHEAST_1));
StepConfig enableDebugging = new StepConfig()
.withName("Enable debugging")
.withActionOnFailure(TERMINATE_JOB_FLOW)
.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
StepConfig installHive = new StepConfig()
.withName("Install Hive")
.withActionOnFailure(TERMINATE_JOB_FLOW)
.withHadoopJarStep(stepFactory.newInstallHiveStep());
StepConfig runScript = new StepConfig()
.withName("Run Script")
.withActionOnFailure(CONTINUE)
.withHadoopJarStep(stepFactory.newRunHiveScriptStep("s3://dummy/dummy.hive"));
List<String> jarArgs = Arrays.asList(inputFilePath, outputDirectory);
HadoopJarStepConfig jarCfg= new HadoopJarStepConfig()
.withJar(jarPath)
.withArgs(jarArgs);
StepConfig runJar = new StepConfig()
.withName(jarName)
.withActionOnFailure(TERMINATE_JOB_FLOW)
.withHadoopJarStep(jarCfg);
JobFlowInstancesConfig instanceCfg = new JobFlowInstancesConfig()
.withKeepJobFlowAliveWhenNoSteps(false)
.withTerminationProtected(true)
.withInstanceCount(3)
.withMasterInstanceType(InstanceType.C1Medium.toString())
.withSlaveInstanceType(InstanceType.C1Medium.toString())
.withHadoopVersion("2.4.0");
List<StepConfig> steps = Arrays.asList(enableDebugging, installHive, runScript, runJar);
RunJobFlowRequest request = new RunJobFlowRequest()
.withName("My EMR Job Flow")
.withAmiVersion("3.3.2")
.withInstances(instanceCfg)
.withLogUri(logPath);
.withSteps(steps);
RunJobFlowResult result = emr.runJobFlow(request);
// saying DescribeJobFlows is deprecated
// DescribeJobFlowsResult jobFlowDescResult = emr.DescribeJobFlows(DescribeJobFlowsRequest describeJobFlowsRequest);
}
}