2015-01-02 47 views
0
package lab.dummy; 

import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapred.Mapper; 
import org.apache.hadoop.mapred.Reducer; 
import org.apache.hadoop.mapred.JobConf; 
import org.apache.hadoop.mapred.MapReduceBase; 
import org.apache.hadoop.mapred.OutputCollector; 
import org.apache.hadoop.mapred.Reporter; 
import org.apache.hadoop.mapred.KeyValueTextInputFormat; 
import org.apache.hadoop.mapred.TextOutputFormat; 
import org.apache.hadoop.mapred.JobClient; 
import org.apache.hadoop.mapred.FileInputFormat; 
import org.apache.hadoop.mapred.FileOutputFormat; 
import java.io.IOException; 
import java.util.Iterator; 
import java.util.StringTokenizer; 
import java.lang.*; 

public class PatCitedCount extends Configured implements Tool 
{ 
public static class MapClass extends MapReduceBase implements Mapper<Text, Text, IntWritable, IntWritable> 
{ 

    private final static IntWritable uno = new IntWritable(1); 

    private IntWritable citationCount= new IntWritable(); 

    public void map(Text key, Text value, 

    OutputCollector<IntWritable, IntWritable> output, 
    Reporter reporter) throws IOException 
    { 

    citationCount.set(Integer.parseInt(value.toString())); 

    output.collect(citationCount,uno); 
    } 
} 
public static class Reduce extends MapReduceBase 

implements Reducer<IntWritable, IntWritable, IntWritable, IntWritable> 
{ 
public void reduce(IntWritable key, Iterator<IntWritable> values, 
OutputCollector<IntWritable, IntWritable> output, Reporter reporter) 
throws IOException 
    { 

    int count= 0; 

    while(values.hasNext()) 
     { 
     count+=values.next().get(); 
     } 
    output.collect(key, new IntWritable(count)); 
     } 
} 
public int run(String[] args) throws Exception 
    { 

    Configuration conf = getConf(); 

    JobConf job = new JobConf(conf, PatCitedCount.class); 

    job.setJarByClass(getClass()); 

    Path in = new Path(args[0]); 

    Path out = new Path(args[1]); 

    FileInputFormat.setInputPaths(job, in); 

    FileOutputFormat.setOutputPath(job, out); 

    job.setJobName("PatCitedCount"); 

    job.setMapperClass(MapClass.class); 

    job.setReducerClass(Reduce.class); 

    job.setInputFormat(KeyValueTextInputFormat.class); 

    job.setOutputFormat(TextOutputFormat.class); 

    job.setOutputKeyClass(IntWritable.class); 

    job.setOutputValueClass(IntWritable.class); 

    JobClient.runJob(job); 

    return 0; 

     } 

    public static void main(String[] args) throws Exception 
{ 

int res= ToolRunner.run(new Configuration(), new PatCitedCount(), args); 

System.exit(res); 
} 
} 

我收到以下錯誤:錯誤的MapReduce(Hadoop的)計劃 - citationHistogram

java.lang.NumberFormatException: For input string: "" 
-at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
-at java.lang.Integer.parseInt(Integer.java:504) 
-at java.lang.Integer.parseInt(Integer.java:527) 
-at lab.dummy.PatCitedCount$MapClass.map(PatCitedCount.java:60) 
-at lab.dummy.PatCitedCount$MapClass.map(PatCitedCount.java:1) 
-at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) 
-at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450) 
-at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) 
-at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168) 
-at java.security.AccessController.doPrivileged(Native Method) 
-at javax.security.auth.Subject.doAs(Subject.java:415) 
-at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614) 
-at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163) 
+0

檢查你的輸入文件,值是「」,你試圖將空字符串轉換爲整數。 – SMA

+0

謝謝...我已經徹底檢查過輸入的txt文件,並發現在雙引號「」內有一些值。我刪除了那些和程序成功完成。 – gbs74

+0

我已經添加了如下答案,以便將來可以幫助其他人面對同樣的問題。請檢查一下,看看是否有幫助,並通過接受答案來解決這個問題,如果它有幫助。 – SMA

回答

0

你有,你的映射器從HDFS讀取文件中的一些空值。您試圖將該空值轉換爲無效的數字,因此您將得到NumberFormatException。

您需要檢查並擺脫那些線或在你的映射器,你將需要手動檢查類似:

try { 
    citationCount.set(Integer.parseInt(value.toString())); 
    output.collect(citationCount,uno); 
} catch (NumberFormatException nfe) { 
    //handle exception if you want. 
} 
0

使用的最後一個工作的(citation_count)導致輸入的文件,而不是「cite75_99.txt」。一切都會好起來的。 :D