2017-02-23 52 views
0

我真的試圖運行這個簡單的映射減少計算每個單詞的文本文件的外觀(此代碼在課堂上給出)代碼:簡單的地圖,減少代碼的空指針異常失敗

import java.io.IOException; 

import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.Partitioner; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.io.LongWritable; 

public class WordCount { 

public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable> { 
    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 

    @Override 
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 
     StringTokenizer itr = new StringTokenizer(value.toString()); 
     while (itr.hasMoreTokens()) { 
     word.set(itr.nextToken()); 
     context.write(word, one); 
     } 
    } 
    } 

    public static class ReduceClass extends Reducer<Text,IntWritable,Text,IntWritable> { 
    @Override 
    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 
     int sum = 0; 
     for (IntWritable value : values) { 
     sum += value.get(); 
     } 
     context.write(key, new IntWritable(sum)); 
    } 
    } 

    public static class PartitionerClass extends Partitioner<Text, IntWritable> { 
     @Override 
     public int getPartition(Text key, IntWritable value, int numPartitions) { 
     return getLanguage(key) % numPartitions; 
     } 

     private int getLanguage(Text key) { 
     if (key.getLength() > 0) { 
      int c = key.charAt(0); 
      if (c >= Long.decode("0x05D0").longValue() && c <= Long.decode("0x05EA").longValue()) 
       return 1; 
     } 
     return 0; 
     } 
    } 

public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    //conf.set("mapred.map.tasks","10"); 
    //conf.set("mapred.reduce.tasks","2"); 
    Job job = new Job(conf, "word count"); 
    job.setJarByClass(WordCount.class); 
    job.setMapperClass(MapClass.class); 
    job.setPartitionerClass(PartitionerClass.class); 
    job.setCombinerClass(ReduceClass.class); 
    job.setReducerClass(ReduceClass.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 

} 

我得到這個空指針異常,不知道它來自哪裏。我在我的pom.xml依賴項中包含了hadoop-common,hadoop-mapreduce-client-core,hadoop-hdfs。在你的代碼

log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory). 
log4j:WARN Please initialize the log4j system properly. 
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. 
Exception in thread "main" java.lang.NullPointerException 
    at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012) 
    at org.apache.hadoop.util.Shell.runCommand(Shell.java:404) 
    at org.apache.hadoop.util.Shell.run(Shell.java:379) 
    at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589) 
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:678) 
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:661) 
    at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639) 
    at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:435) 
    at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:277) 
    at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:125) 
    at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:344) 
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268) 
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:422) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) 
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265) 
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286) 
    at WordCount.main(WordCount.java:74) 

回答

0

看,我可以看到的唯一的問題是,你不設置reduce任務的數量,但你的分區期待那裏是兩個。您將使用默認號碼1

你可以嘗試設置,在你的驅動程序中使用:

job.setNumReduceTasks(2); 
0

之前運行的代碼,我們應該先了解功能,而我們正試圖對數據進行分區的某些部分。

對數據進行分區需要將reducer設置爲正在生成的多個值。 (由於我們不知道來自文件的不同數量的值,因此將reducer值設置爲更高的數字,然後可以使用lazy Output不生成具有0個記錄的文件)。

默認情況下,減速機被設置爲 '1',所以設置這個值應該有所幫助: -

job.setNumReduceTasks(integer_value);