2015-11-06 31 views
0

我正在運行下面的代碼,並且沒有輸出生成(當然,輸出文件夾和reducer輸出文件已創建,但沒有任何文件)。從日誌中,我懷疑映射器沒有發射任何東西。Hadoop - 映射器沒有發射任何東西

代碼:

package com.telefonica.iot.tidoop.mrlib; 

import com.telefonica.iot.tidoop.mrlib.utils.Constants; 
import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 
import org.apache.log4j.Logger; 

public class Count extends Configured implements Tool { 

    private static final Logger LOGGER = Logger.getLogger(Count.class); 

    public static class UnitEmitter extends Mapper<Object, Text, Text, LongWritable> { 

     private final Text commonKey = new Text("common-key"); 

     @Override 
     public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 
      context.write(commonKey, new LongWritable(1)); 
     } // map 

    } // UnitEmitter 

    public static class Adder extends Reducer<Text, LongWritable, Text, LongWritable> { 

     @Override 
     public void reduce(Text key, Iterable<LongWritable> values, Context context) 
      throws IOException, InterruptedException { 
      long sum = 0; 

      for (LongWritable value : values) { 
       sum += value.get(); 
      } // for 

      context.write(key, new LongWritable(sum)); 
     } // reduce 

    } // Adder 

    public static class AdderWithTag extends Reducer<Text, LongWritable, Text, LongWritable> { 

     private String tag; 

     @Override 
     public void setup(Context context) throws IOException, InterruptedException { 
      tag = context.getConfiguration().get(Constants.PARAM_TAG, ""); 
     } // setup 

     @Override 
     public void reduce(Text key, Iterable<LongWritable> values, Context context) 
      throws IOException, InterruptedException { 
      long sum = 0; 

      for (LongWritable value : values) { 
       sum += value.get(); 
      } // for 

      context.write(new Text(tag), new LongWritable(sum)); 
     } // reduce 

    } // AdderWithTag 

    public static void main(String[] args) throws Exception { 
     int res = ToolRunner.run(new Configuration(), new Filter(), args); 
     System.exit(res); 
    } // main 

    @Override 
    public int run(String[] args) throws Exception { 
     // check the number of arguments, show the usage if it is wrong 
     if (args.length != 3) { 
      showUsage(); 
      return -1; 
     } // if 

     // get the arguments 
     String input = args[0]; 
     String output = args[1]; 
     String tag = args[2]; 

     // create and configure a MapReduce job 
     Configuration conf = this.getConf(); 
     conf.set(Constants.PARAM_TAG, tag); 
     Job job = Job.getInstance(conf, "tidoop-mr-lib-count"); 
     job.setNumReduceTasks(1); 
     job.setJarByClass(Count.class); 
     job.setMapperClass(UnitEmitter.class); 
     job.setCombinerClass(Adder.class); 
     job.setReducerClass(AdderWithTag.class); 
     job.setMapOutputKeyClass(Text.class); 
     job.setMapOutputValueClass(LongWritable.class); 
     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(LongWritable.class); 
     FileInputFormat.addInputPath(job, new Path(input)); 
     FileOutputFormat.setOutputPath(job, new Path(output)); 

     // run the MapReduce job 
     return job.waitForCompletion(true) ? 0 : 1; 
    } // main 

    private void showUsage() { 
     System.out.println("..."); 
    } // showUsage 

} // Count 

執行的命令,並輸出日誌:

$ hadoop jar target/tidoop-mr-lib-0.0.0-SNAPSHOT-jar-with-dependencies.jar com.telefonica.iot.tidoop.mrlib.Count -libjars target/tidoop-mr-lib-0.0.0-SNAPSHOT-jar-with-dependencies.jar tidoop/numbers tidoop/numbers_count onetag 
15/11/05 17:24:52 INFO input.FileInputFormat: Total input paths to process : 1 
15/11/05 17:24:52 WARN snappy.LoadSnappy: Snappy native library is available 
15/11/05 17:24:53 INFO util.NativeCodeLoader: Loaded the native-hadoop library 
15/11/05 17:24:53 INFO snappy.LoadSnappy: Snappy native library loaded 
15/11/05 17:24:53 INFO mapred.JobClient: Running job: job_201507101501_23002 
15/11/05 17:24:54 INFO mapred.JobClient: map 0% reduce 0% 
15/11/05 17:25:00 INFO mapred.JobClient: map 100% reduce 0% 
15/11/05 17:25:07 INFO mapred.JobClient: map 100% reduce 33% 
15/11/05 17:25:08 INFO mapred.JobClient: map 100% reduce 100% 
15/11/05 17:25:09 INFO mapred.JobClient: Job complete: job_201507101501_23002 
15/11/05 17:25:09 INFO mapred.JobClient: Counters: 25 
15/11/05 17:25:09 INFO mapred.JobClient: Job Counters 
15/11/05 17:25:09 INFO mapred.JobClient:  Launched reduce tasks=1 
15/11/05 17:25:09 INFO mapred.JobClient:  SLOTS_MILLIS_MAPS=5350 
15/11/05 17:25:09 INFO mapred.JobClient:  Total time spent by all reduces waiting after reserving slots (ms)=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Total time spent by all maps waiting after reserving slots (ms)=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Rack-local map tasks=1 
15/11/05 17:25:09 INFO mapred.JobClient:  Launched map tasks=1 
15/11/05 17:25:09 INFO mapred.JobClient:  SLOTS_MILLIS_REDUCES=8702 
15/11/05 17:25:09 INFO mapred.JobClient: FileSystemCounters 
15/11/05 17:25:09 INFO mapred.JobClient:  FILE_BYTES_READ=6 
15/11/05 17:25:09 INFO mapred.JobClient:  HDFS_BYTES_READ=1968928 
15/11/05 17:25:09 INFO mapred.JobClient:  FILE_BYTES_WRITTEN=108226 
15/11/05 17:25:09 INFO mapred.JobClient: Map-Reduce Framework 
15/11/05 17:25:09 INFO mapred.JobClient:  Map input records=598001 
15/11/05 17:25:09 INFO mapred.JobClient:  Reduce shuffle bytes=6 
15/11/05 17:25:09 INFO mapred.JobClient:  Spilled Records=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Map output bytes=0 
15/11/05 17:25:09 INFO mapred.JobClient:  CPU time spent (ms)=2920 
15/11/05 17:25:09 INFO mapred.JobClient:  Total committed heap usage (bytes)=355663872 
15/11/05 17:25:09 INFO mapred.JobClient:  Combine input records=0 
15/11/05 17:25:09 INFO mapred.JobClient:  SPLIT_RAW_BYTES=124 
15/11/05 17:25:09 INFO mapred.JobClient:  Reduce input records=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Reduce input groups=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Combine output records=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Physical memory (bytes) snapshot=328683520 
15/11/05 17:25:09 INFO mapred.JobClient:  Reduce output records=0 
15/11/05 17:25:09 INFO mapred.JobClient:  Virtual memory (bytes) snapshot=1466642432 
15/11/05 17:25:09 INFO mapred.JobClient:  Map output records=0 

輸出文件的內容:

$ hadoop fs -cat /user/frb/tidoop/numbers_count/part-r-00000 
[[email protected] tidoop-mr-lib]$ hadoop fs -ls /user/frb/tidoop/numbers_count/ 
Found 3 items 
-rw-r--r-- 3 frb frb   0 2015-11-05 17:25 /user/frb/tidoop/numbers_count/_SUCCESS 
drwxr----- - frb frb   0 2015-11-05 17:24 /user/frb/tidoop/numbers_count/_logs 
-rw-r--r-- 3 frb frb   0 2015-11-05 17:25 /user/frb/tidoop/numbers_count/part-r-00000 

正在發生的事情任何提示?

+0

什麼是您的具體要求是什麼?...從它的代碼是喜歡你的映射器具有隻發出一個value..And您要添加的輸入路徑,但沒有使用來自任何數據嗎? – madhu

+0

從日誌中您的地圖輸出記錄爲零 – madhu

+0

該代碼是關於計算輸入文件夾內文件的行數。是的,我已經注意到映射器沒有輸出任何東西。 – frb

回答

1

奇怪。我試着用你的工作使用Mapper(身份映射器)。

如果映射不輸出任何東西都必須有你的Hadoop安裝,或作業的配置很奇怪。

+0

奇怪的是其他不同的MapReduce作業正常工作。因此,一些與這個特殊的代碼......我想不通這是問題:S – frb

+0

什麼是你想用映射器代碼,該怎麼辦?你總是輸入冒落的值相同的密鑰和不論您更換LongWritable與對象偏移計算。 –