0
我正在嘗試讀取和分析hadoop中的mapreduce最終輸出。以下是我的「作業」文件中代碼的一部分。我想使用FileSystem(Hadoop API)來讀取輸出文件,但是,我有問題在哪裏把代碼以粗體突出顯示(在雙重雙星之間)。如果我把它放在system.exit之下,恐怕代碼會被跳過。Java:讀取hadoop減速器的輸出文件
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: format is <in> <out> <keyword>");
System.exit(2);
}
**Path distCache = new Path("/");
String fileSys = conf.get("fs.default.name");
HashMap<String, Integer> jobCountMap = new HashMap<String, Integer>();**
conf.set("jobTest", otherArgs[2]);
Job job = new Job(conf, "job count");
job.setJarByClass(JobResults.class);
job.setMapperClass(JobMapper.class);
job.setCombinerClass(JobReducer.class);
job.setReducerClass(JobReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
distCache = new Path(args[2]);
// FileSystem fs = distCache.getFileSystem(conf); // for Amazon AWS
if (fileSys.split(":")[0].trim().equalsIgnoreCase("s3n")) distCache = new Path("s3n:/" + distCache);
FileSystem fs = FileSystem.get(conf); // for local cluster
Path pathPattern = new Path(distCache, "part-r-[0-9]*");
FileStatus[] list = fs.globStatus(pathPattern);
for (FileStatus status : list)
{
// DistributedCache.addCacheFile(status.getPath().toUri(), conf);
try {
BufferedReader brr = new BufferedReader(new FileReader(status.getPath().toString()));
String line;
while ((line = brr.readLine()) != null)
{
String[] resultsCount = line.split("\\|");
jobCountMap.put(resultsCount[0], Integer.parseInt(resultsCount[1].trim()));
}
} catch (FileNotFoundException e)
{
e.printStackTrace();
} catch (IOException e)
{
e.printStackTrace();
}
}
System.out.println("the size of Hashmap is: " + jobCountMap.size());
System.exit(job.waitForCompletion(true) ? 0 : 1);
}