2016-04-02 51 views
1

我得到以下錯誤:Hadoop的IO錯誤:從地圖鍵類型不匹配:預計org.apache.hadoop.io.Text,收到RegexMatcher.CustomKey

java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey 
    at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) 
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) 
Caused by: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received RegexMatcher.CustomKey 

首先,我定義自定義數據類型映射簡化命名CustomKey

public class CustomKey implements Writable { 

    public Text userId; 
    public Text friendId; 

    public CustomKey() { 

     this.userId = new Text(); 
     this.friendId = new Text(); 

    } 

    public CustomKey(String userId, String friendId) { 

     this.userId = new Text(userId); 
     this.friendId = new Text(friendId); 

    } 

    @Override 
    public void write(DataOutput out) throws IOException { 
     userId.write(out); 
     userId.write(out); 
    } 

    @Override 
    public void readFields(DataInput in) throws IOException { 
     userId.readFields(in); 
     friendId.readFields(in); 
    } 



} 

然後,我創建一個映射SingleClassv2LogMapper

public static class SingleClassv2LogMapper extends Mapper<Object, Text, CustomKey, IntWritable> { 
    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 

    @Override 
    protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { 

     Configuration conf = context.getConfiguration(); 
     String regex = conf.get("regex"); 
     String delimeter = conf.get("delimeter"); 
     String currentLine = value.toString(); 
     String tag = RingIdLogParser.parseHashTag(value.toString()); 
     String body = RingIdLogParser.parseBody(value.toString()); 
     if (tag != null) { 
      if (tag.equals(RegularExpressionBundle.updateMultipleMessageStatus)) { 
       CustomKey customKey = RingIdLogParser.parseUserFrinedInfo(body); 
       int messageNo = RingIdLogParser.getMessageCount(body); 
       context.write(customKey, new IntWritable(messageNo)); 
      } 
     } 
    } 

} 

和減速器

public static class SingleClassv2LogReducer extends Reducer<CustomKey, IntWritable, Text, IntWritable> { 

    TextArrayWritable sum = new TextArrayWritable(); 

    @Override 
    protected void reduce(CustomKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 

     int sum = 0; 
     for (IntWritable value : values) { 
      sum = sum + value.get(); 

     } 
     String compactUser = key.userId.toString() +" "+ key.friendId.toString(); 
     context.write(new Text(compactUser), new IntWritable(sum)); 
    } 

} 

我現在應該怎麼辦?任何人在這裏幫助我PLZ。

駕駛員相關的代碼看起來像這樣

Configuration conf = new Configuration(); 
Job job = Job.getInstance(conf, "Regex Matcher"); 
job.setJarByClass(SingleClassLogDriverv2.class); 
job.setMapperClass(SingleClassv2LogMapper.class); 
job.setCombinerClass(SingleClassv2LogCombiner.class); 
job.setReducerClass(SingleClassv2LogReducer.class); 
job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(IntWritable.class); 
job.setMapOutputKeyClass(CustomKey.class); 
job.setMapOutputValueClass(IntWritable.class); 
FileInputFormat.addInputPath(job, new Path(args[0])); 
FileOutputFormat.setOutputPath(job, new Path(args[1])); 
System.exit(job.waitForCompletion(true) ? 0 : 1); 
+0

你應該分享你的驅動程序 –

回答

0

創建使用Eclipse的JAR時,我所面臨的類似問題以及可比較的map-reduce,我的問題是打印的字計數除了傳統的WordCount程序之外,還有數字前面的390k數字。這裏是我的12個文件中的數字列表,其中也包含一次redundency。

java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable 

此後這一點我糾正,我想說下面聚集的結果 -

716900482 Seventy One Crore Sixty Nine Lac Four Hundred Eighty Two only. 

我已經開發了一個Maven構建工具,用於打印在單詞的數量,從而明確地補充說,JAR到我的項目。

Project - CountInWords View

所以,在這裏,我們去與我的計劃,類似於字計數程序,但目的不同 -

package com.whodesire.count; 

import java.io.IOException; 
import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 

import com.whodesire.numstats.AmtInWords; 

public class CountInWords { 


    public static class NumberTokenizerMapper 
        extends Mapper <Object, Text, LongWritable, Text> { 

     private static final Text theOne = new Text("1"); 
     private LongWritable longWord = new LongWritable(); 

     public void map(Object key, Text value, Context context) { 

      try{ 
       StringTokenizer itr = new StringTokenizer(value.toString()); 
       while (itr.hasMoreTokens()) { 
        longWord.set(Long.parseLong(itr.nextToken())); 
        context.write(longWord, theOne); 
       } 
      }catch(ClassCastException cce){ 
       System.out.println("ClassCastException raiseddd..."); 
       System.exit(0); 
      }catch(IOException | InterruptedException ioe){ 
       ioe.printStackTrace(); 
       System.out.println("IOException | InterruptedException raiseddd..."); 
       System.exit(0); 
      } 
     } 
    } 

    public static class ModeReducerCumInWordsCounter 
      extends Reducer <LongWritable, Text, LongWritable, Text>{ 
     private Text result = new Text(); 

     //This is the user defined reducer function which is invoked for each unique key 
     public void reduce(LongWritable key, Iterable<Text> values, 
       Context context) throws IOException, InterruptedException { 

      /*** Putting the key, which is a LongWritable value, 
         putting in AmtInWords constructor as String***/ 
      AmtInWords aiw = new AmtInWords(key.toString()); 
      result.set(aiw.getInWords()); 

      //Finally the word and counting is sent to Hadoop MR and thus to target 
      context.write(key, result); 
     } 
    } 

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 

     /**** 
     *** all random numbers generated inside input files has been 
     *** generated using url https://andrew.hedges.name/experiments/random/ 
     ****/ 

     //Load the configuration files and add them to the the conf object 
     Configuration conf = new Configuration();  

     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 

     Job job = new Job(conf, "CountInWords"); 

     //Specify the jar which contains the required classes for the job to run. 
     job.setJarByClass(CountInWords.class); 

     job.setMapperClass(NumberTokenizerMapper.class); 
     job.setCombinerClass(ModeReducerCumInWordsCounter.class); 
     job.setReducerClass(ModeReducerCumInWordsCounter.class); 

     //Set the output key and the value class for the entire job 
     job.setMapOutputKeyClass(LongWritable.class); 
     job.setMapOutputValueClass(Text.class); 

     //Set the Input (format and location) and similarly for the output also 
     FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 

     //Setting the Results to Single Target File 
     job.setNumReduceTasks(1); 

     //Submit the job and wait for it to complete 
     System.exit(job.waitForCompletion(true) ? 0 : 1); 

    } 

} 

我知道這是一個爲時已晚答覆,但希望這會幫助別人也找到一種方法,謝謝。

相關問題