2017-02-25 42 views
1

參照此頁面,我遇到了類似的問題。我需要提供一個map和reduce方法來計算字長(1到n)的頻率。 reference links我已經試過答案的方法有這個實現。MapReduce Hadoop Word長度頻率不起作用

import java.io.IOException; 
import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 

public class WordCount { 

    //Mapper which implement the mapper() function 
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { 
    //public static class TokenizerMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> { 

    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 
     StringTokenizer itr = new StringTokenizer(value.toString()); 
     while (itr.hasMoreTokens()) { 
     //check whether word is start from a or b 
     String wordToCheck = itr.nextToken(); 
     word.set(String.valueOf(wordToCheck.length())); 
     context.write(word, one); 
     //if (wordToCheck.startsWith("a")||wordToCheck.startsWith("b")){ 
     // word.set(wordToCheck); 
     // context.write(word, one); 
     //} 
     //check for word length 
     //if (wordToCheck.length() > 8) { 
     // } 
     } 
    } 
    } 
    //Reducer which implement the reduce() function 
    public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { 
    private IntWritable result = new IntWritable(); 

    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 
     int sum = 0; 
     for (IntWritable val : values) { 
     sum += val.get(); 
     } 
     result.set(sum); 
     context.write(key, result); 
    } 
    } 
    //Driver class to specific the Mapper and Reducer 
    public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    Job job = Job.getInstance(conf, "word count"); 
    job.setJarByClass(WordCount.class); 
    job.setMapperClass(TokenizerMapper.class); 
    job.setReducerClass(IntSumReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 
    job.setMapOutputKeyClass(Text.class); 
    job.setMapOutputValueClass(IntWritable.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

我有以下例外。

17/02/25 17:02:34 INFO mapreduce.Job: map 0% reduce 0% 
17/02/25 17:02:36 INFO mapreduce.Job: map 100% reduce 0% 
17/02/25 17:02:36 INFO mapreduce.Job: Task Id : attempt_1488013180963_0001_m_000000_2, Status : FAILED 
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable 
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1069) 
    at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:712) 
    at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89) 
    at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112) 
    at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124) 
    at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145) 
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784) 
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) 
    at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:415) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656) 
    at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 

我在Eclipse Kepler中開發這個類,並在ubuntu LTXTerminal中使用hadoop 2.6.3作爲jar文件運行這個類。問題是什麼?我也嘗試按照答案中的建議使用IntWritable,但它也有類似的反應。

回答

1

我不是100%確定的,但是當你使用文件作爲輸入時,映射器應該有LongWritable類型的鍵(對應於文件中的行號)和Text值(文件行爲文本)。

因此,可能的解決辦法是用

public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable> { 
+0

好,回答問題,以取代

public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { 

。非常感謝。 –