2014-09-21 23 views
0

我編寫了一個MapReduce作業,用於在使用舊API存儲在HDFS中的輸入文件中查找搜索字符串(通過命令行參數傳遞)的發生。MapReduce舊API - 將命令行參數傳遞到地圖

下面是我的Driver類 -

public class StringSearchDriver 
{ 

    public static void main(String[] args) throws IOException 
    { 
     JobConf jc = new JobConf(StringSearchDriver.class); 
     jc.set("SearchWord", args[2]); 
     jc.setJobName("String Search"); 
     FileInputFormat.addInputPath(jc, new Path(args[0])); 
     FileOutputFormat.setOutputPath(jc, new Path(args[1])); 
     jc.setMapperClass(StringSearchMap.class); 
     jc.setReducerClass(StringSearchReduce.class); 
     jc.setOutputKeyClass(Text.class); 
     jc.setOutputValueClass(IntWritable.class); 
     JobClient.runJob(jc); 
    } 
} 

下面是我的映射類 -

public class StringSearchMap extends MapReduceBase implements 
     Mapper<LongWritable, Text, Text, IntWritable> 
{ 
    String searchWord; 

    public void configure(JobConf jc) 
    { 
     searchWord = jc.get("SearchWord"); 

    } 



    @Override 
    public void map(LongWritable key, Text value, 
      OutputCollector<Text, IntWritable> out, Reporter reporter) 
      throws IOException 
    { 
     String[] input = value.toString().split(""); 

     for(String word:input) 
     { 
      if (word.equalsIgnoreCase(searchWord)) 
       out.collect(new Text(word), new IntWritable(1)); 
     } 
    } 

} 

在運行作業(通過命令行字符串 「HI」),我得到下面的錯誤 -

14/09/21 22:35:41 INFO mapred.JobClient: Task Id : attempt_201409212134_0005_m_000001_2, Status : FAILED 
java.lang.ClassCastException: interface javax.xml.soap.Text 
    at java.lang.Class.asSubclass(Class.java:3129) 
    at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795) 
    at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:964) 
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:422) 
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366) 
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:416) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190) 
    at org.apache.hadoop.mapred.Child.main(Child.java:249) 

請建議。

+0

'ClassCastException:interface javax.xml.soap.Text'看起來像在源代碼中自動導入了錯誤的Text類? – jkovacs 2014-09-21 21:49:32

回答

1

您自動導入錯誤導入。 而不是進口org.apache.hadoop.io.Text進口javax.xml.soap.Text

你可以找到在此blog樣本錯誤進口。

一點,最好是採用新API

編輯

我使用新的API

import java.io.IOException; 
import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

/** 
* @author Unmesha sreeveni 
* @Date 23 sep 2014 
*/ 
public class StringSearchDriver extends Configured implements Tool { 
    public static class Map extends 
    Mapper<LongWritable, Text, Text, IntWritable> { 

     private final static IntWritable one = new IntWritable(1); 
     private Text word = new Text(); 

     public void map(LongWritable key, Text value, Context context) 
       throws IOException, InterruptedException { 
      Configuration conf = context.getConfiguration(); 
      String line = value.toString(); 
      String searchString = conf.get("word"); 
      StringTokenizer tokenizer = new StringTokenizer(line); 
      while (tokenizer.hasMoreTokens()) { 
       String token = tokenizer.nextToken(); 
       if(token.equals(searchString)){ 
        word.set(token); 
        context.write(word, one); 
       } 

      } 
     } 
    } 

    public static class Reduce extends 
    Reducer<Text, IntWritable, Text, IntWritable> { 

     public void reduce(Text key, Iterable<IntWritable> values, 
       Context context) throws IOException, InterruptedException { 

      int sum = 0; 
      for (IntWritable val : values) { 
       sum += val.get(); 
      } 
      context.write(key, new IntWritable(sum)); 
     } 
    } 
    public static void main(String[] args) throws Exception { 
     Configuration conf = new Configuration(); 
     int res = ToolRunner.run(conf, new StringSearchDriver(), args); 
     System.exit(res); 

    } 
    @Override 
    public int run(String[] args) throws Exception { 
     // TODO Auto-generated method stub 
     if (args.length != 3) { 
      System.out 
      .printf("Usage: Search String <input dir> <output dir> <search word> \n"); 
      System.exit(-1); 
     } 

     String source = args[0]; 
     String dest = args[1]; 
     String searchword = args[2]; 
     Configuration conf = new Configuration(); 
     conf.set("word", searchword); 
     Job job = new Job(conf, "Search String"); 
     job.setJarByClass(StringSearchDriver.class); 
     FileSystem fs = FileSystem.get(conf); 

     Path in =new Path(source); 
     Path out =new Path(dest); 
     if (fs.exists(out)) { 
      fs.delete(out, true); 
     } 

     job.setMapOutputKeyClass(Text.class); 
     job.setMapOutputValueClass(IntWritable.class); 
     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(IntWritable.class); 
     job.setMapperClass(Map.class); 
     job.setReducerClass(Reduce.class); 
     job.setInputFormatClass(TextInputFormat.class); 
     job.setOutputFormatClass(TextOutputFormat.class); 
     FileInputFormat.addInputPath(job, in); 
     FileOutputFormat.setOutputPath(job, out); 
     boolean sucess = job.waitForCompletion(true); 
     return (sucess ? 0 : 1); 
    } 
} 

這工作。

+0

謝謝SreeVeni。我沒有意識到錯誤的軟件包在自動導入時已導入。我更正了軟件包並且成功運行,沒有發生任何錯誤。現在,問題是我沒有得到期望的輸出。我通過「hi」作爲搜索詞,我的HDFS示例文件包含這個詞。運行作業後生成的輸出文件仍爲空。邏輯看起來不正確? – Hadooper 2014-09-22 19:13:54

+0

請參閱我的編輯。希望這可以幫助你 – 2014-09-23 05:01:39

+0

這對於新的API來說是很好的。但我應該使用舊的API對其進行編碼。 – Hadooper 2014-09-23 07:11:33

0

For Text;需要的hadoop包是org.apache.hadoop.io。檢查你的包

import java.io.IOException; 
import java.util.*; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.conf.*; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 
import org.apache.hadoop.util.*;