2016-12-30 65 views
1

我正在使用Cloudera來實現mapreduce作業。我的輸入是JSON,看起來像這樣:Reducer中的MapReduce值始終爲1

{"reviewerID": "A2PUSR7ROG0Z6T", "asin": "9742356831", "reviewerName": "Terry Bisgrove \"Mr.E.Man\"", "helpful": [2, 2], "reviewText": "I like other styles of Mae Ploy curry paste, but the green just doesn't work for me. Overwhelming garlic, no heat, and very bland. I would not purchase this product again.", "overall": 3.0, "summary": "OK Product", "unixReviewTime": 1344297600, "reviewTime": "08 7, 2012"} 
{"reviewerID": "A2ANBEX40KLY4O", "asin": "9742356831", "reviewerName": "TrishS \"TrishS\"", "helpful": [3, 4], "reviewText": "I have both the red and green curry paste. The green is milder. I use both of them in variety of dishes and often spice up soups and stews that need a little zing. It is so convient to have them in the frig.", "overall": 5.0, "summary": "Tasty and fast", "unixReviewTime": 1310601600, "reviewTime": "07 14, 2011"} 
{"reviewerID": "A1C8NAHYR6Z10F", "asin": "B00004S1C5", "reviewerName": "A. Horikawa", "helpful": [1, 2], "reviewText": "These dyes create awesome colors for kids crafts. I have used them to make finger paint, paint, play dough, and salt dough.Another reviewer stated that they are not natural - this is CORRECT. They are definitely artificial dyes. I tried making my own dyes, and when that fell through, these worked great in a pinch. You only need a couple drops for really vibrant color. And they are pretty easy to clean - don't stain after they've been made into whatever craft.Good product for the price!", "overall": 5.0, "summary": "Great for kids crafts!", "unixReviewTime": 1344297600, "reviewTime": "08 7, 2012"} 
{"reviewerID": "A14YSMLYLJEMET", "asin": "B00004S1C5", "reviewerName": "Amazon Customer", "helpful": [8, 11], "reviewText": "This product is no where near natural/organic-I only wish I had seen the other reviews before purchasing! It contains all the things I did not want-which is why I was looking for a natural alternative. They need to have an ingredient list on here to avoid this...I am "returning" item. I am trying to avoid my children's exposure to yellow 5, red 40 and so on...I do not understand how they can still make these things knowing what they can cause. This may be fine for someone that doesn't read labels or care what their kids eat-but not for my family.", "overall": 1.0, "summary": "Not natural/organic at all", "unixReviewTime": 1364515200, "reviewTime": "03 29, 2013"} 
... 

我的映射器從「ASIN」,這JSON的「reviewText」選擇值:

import java.io.IOException; 
import java.util.StringTokenizer; 

import org.apache.hadoop.io.DoubleWritable; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.json.JSONObject; 

public class SentimentMapper extends Mapper<LongWritable, Text, Text, Text> { 

    @Override 
    public void map(LongWritable key, Text value, Context context) 
     throws IOException, InterruptedException { 

     JSONObject obj = new JSONObject(value.toString()); 
     context.write(new Text(obj.getString("asin")), new Text(obj.getString("reviewText"))); 

    } 

} 

最後,我減速經過的所有值並寫入值列表的大小爲每個鍵:

import java.io.IOException; 
import java.util.ArrayList; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Reducer; 

public class SentimentReducer extends Reducer<Text, Text, Text, Text> { 

    @Override 
    public void reduce(Text key, Iterable<Text> values, Context context) 
     throws IOException, InterruptedException { 

     ArrayList<String> list = new ArrayList<String>(); 
     for(Text val : values) { 
      list.add(new String(val.toString())); 
     } 

     context.write(key, new Text(String.valueOf(list.size()))); 

    } 
} 

不幸的是我結束了這樣的結果:

616719923X 1 
9742356831 1 
B00004S1C5 1 
B0000531B7 1 
B00005344V 1 
B0000537AF 1 
B00005C2M2 1 
B00006IUTN 1 
B0000CCZYY 1 
B0000CD06J 1 
B0000CDBQN 1 
B0000CDEPD 1 
B0000CETGM 1 
B0000CFLCT 1 
B0000CFLIL 1 

這意味着所有鍵的大小始終爲1.正如您在我的輸入json中看到的,對於某些鍵(例如, B00004S1C5),應該有多個值。有誰能幫我解決這個問題嗎?

更新:這裏是要求驅動程序類:如果

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.DoubleWritable; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.conf.Configuration; 

public class SentimentDriver { 

    public static void main(String[] args) throws Exception { 

    /* 
    * Validate that two arguments were passed from the command line. 
    */ 
    if (args.length != 2) { 
     System.out.printf("Usage: StubDriver <input dir> <output dir>\n"); 
     System.exit(-1); 
    } 

    /* 
    * Instantiate a Job object for your job's configuration. 
    */ 
    Configuration conf = new Configuration(); 
    Job job = Job.getInstance(conf, "job_13"); 

    /* 
    * Specify an easily-decipherable name for the job. 
    * This job name will appear in reports and logs. 
    */ 
    job.setJobName("job_13"); 

    job.setJarByClass(SentimentDriver.class); 
    job.setMapperClass(SentimentMapper.class); 
    job.setCombinerClass(SentimentReducer.class); 
    job.setReducerClass(SentimentReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(Text.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 

    /* 
    * Start the MapReduce job and wait for it to finish. 
    * If it finishes successfully, return 0. If not, return 1. 
    */ 
    boolean success = job.waitForCompletion(true); 
    System.exit(success ? 0 : 1); 
    } 
} 

不知道這是相關的,但我出口它作爲一個可運行的JAR文件,並在命令行中調用它。

+0

郵政全映射器和減速機類。 –

+0

發佈驅動程序類的詳細信息.... –

回答

1

更新: 你並不需要在合您的程序,請在驅動程序類刪除或註釋組合,並且應該解決您的程序....

組合將輸入到減速機:

9742356831 ----- 2 
B00004S1C5 ----- 2 

因此減速器輸出:

9742356831 ----- 1 
B00004S1C5 ----- 1 

我測試代碼帶出組合,給了我預期的結果,但我會重新寫你的程序爲:


輸出:

9742356831 2 
B00004S1C5 2 

public static class jsonDataMapper extends Mapper<LongWritable, Text, Text, IntWritable> { 

     public void map(LongWritable key, Text value, Context context) 
       throws IOException, InterruptedException { 

      JSONObject obj; 
      try { 
       obj = new JSONObject(value.toString()); 
       //context.write(new Text(obj.getString("asin")), new Text(obj.getString("reviewText"))); 
       context.write(new Text(obj.getString("asin")), new IntWritable(1)); 
      } catch (JSONException e) {     
       e.printStackTrace(); 
      }  
      } 
    } 

    public static class jsonDataReducer extends Reducer<Text, IntWritable, Text, Text> { 

     public void reduce(Text key, Iterable<IntWritable> values, Context context) 
        throws IOException, InterruptedException { 
//    ArrayList<String> list = new ArrayList<String>(); 
//    for(Text val : values) { 
//     list.add(new String(val.toString())); 
//    } 
// 
//    context.write(key, new Text(String.valueOf(list.size()))); 

      int sum=0; 
      for(IntWritable i: values) 
        sum+=i.get(); 
      context.write(key, new Text(String.valueOf(sum))); 
      } 
    } 
+0

謝謝,你的重構程序也適用於我,但是因爲我需要在reducer中做一些字符串比較,所以我希望能讓我的原始程序工作。 – d8Qo3mH4x

+0

刪除/評論組合器配置將修復您的代碼! –

+0

查看更新的答案 –