2015-04-07 89 views
0

我已經安裝hadoop集羣環境(master & slave)。工作順利。在hadoop中運行不帶reducer的WordCount

我試過wordcount和grep使用(hadoop.example.jar)文件,也工作正常。

現在,我想編輯(hadoop.example.jar)以僅運行沒有簡化器的映射器。有沒有辦法做到這一點?

我讀了一些文章,說我必須將setNumReducerTask(0)的值設置爲零,但我不知道如何?使用(hadoop.example.jar)文件。

回答

0

您無法更改hadoop.example.jar文件。

您需要創建自己的自定義代碼並將其導出爲jar文件。

修改後的單詞計數代碼應該是:

package org.myorg; 

import java.io.IOException; 
import java.util.*; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.conf.*; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapreduce.*; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 

public class WordCount { 

public static class Map extends Mapper<LongWritable, Text, Text, IntWritable { 
private final static IntWritable one = new IntWritable(1); 
private Text word = new Text(); 

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 
    String line = value.toString(); 
    StringTokenizer tokenizer = new StringTokenizer(line); 
    while (tokenizer.hasMoreTokens()) { 
     word.set(tokenizer.nextToken()); 
     context.write(word, one); 
    } 
} 
} 

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { 

public void reduce(Text key, Iterable<IntWritable> values, Context context) 
    throws IOException, InterruptedException { 
    int sum = 0; 
    for (IntWritable val : values) { 
     sum += val.get(); 
    } 
    context.write(key, new IntWritable(sum)); 
} 
} 

public static void main(String[] args) throws Exception { 
Configuration conf = new Configuration(); 
Job job = new Job(conf, "wordcount"); 

job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(IntWritable.class); 

job.setMapperClass(Map.class); 
job.setReducerClass(Reduce.class); 
**job.setNumReduceTasks(0); ** 

job.setInputFormatClass(TextInputFormat.class); 
job.setOutputFormatClass(TextOutputFormat.class); 

FileInputFormat.addInputPath(job, new Path(args[0])); 
FileOutputFormat.setOutputPath(job, new Path(args[1])); 

job.waitForCompletion(true); 
} 

} 

The origianl source code