2015-11-23 27 views
0

我試圖寫它必須採取兩個文件輸入這個MapReduce的程序,一個有職業的詳細信息和狀態,以及其他有職業和就業增長百分比細節。我使用兩個映射器並將它們結合起來,然後在我的reducer中嘗試查看哪些作業的增長百分比超過了30。我的輸出最好是職業,然後是狀態列表。但是,我只獲得職業名稱而不是州。我已經發布了下面的代碼和示例輸入文件。請指出我做錯了什麼。謝謝。 (請注意,我所提供的輸入文件的樣本都只是實際文件的一小部分)。無法獲得簡單的Hadoop MapReduce的程序所需的輸出

package com; 

import java.io.IOException; 

//import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 


public class GrowthState extends Configured implements Tool { 

    //Parser for Mapper1 
    public static class StateParser{ 

     private String State,Occupation; 

     public void parse(String record){ 

      String str[] = record.split("\t"); 
      if(str[4].length() != 0) 
       setOccupation(str[4]); 
      else 
       setOccupation("Default Occupation"); 

      if(str[2].length() != 0) 
       setState(str[2]); 
      else 
       setState("Default State"); 

     } 

     public void parse(Text record){ 
      parse(record.toString()); 
     } 

     public String getState() { 
      return State; 
     } 

     public void setState(String state) { 
      State = state; 
     } 

     public String getOccupation() { 
      return Occupation; 
     } 

     public void setOccupation(String occupation) { 
      Occupation = occupation; 
     } 
    } 

    //Mapper1 - Processing state.txt 
    public static class GrowthMap1 extends Mapper<LongWritable,Text,Text,Text>{ 
     StateParser sp = new StateParser(); 
     Text outkey = new Text(); 
     Text outvalue = new Text(); 
     public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ 
      sp.parse(value); 
      outkey.set(sp.getOccupation()); 
      outvalue.set("m1\t"+sp.getState()); 
      context.write(outkey,outvalue); 
      //String str[] = value.toString().split("\t"); 
      //context.write(new Text(str[2]), new Text("m1\t"+str[4])); 
     } 
    } 

    public static class ProjParser{ 
     private String Occupation,percent; 

     public void parse(String record){ 
      String str[] = record.split("\t"); 
      if(str[0].length() != 0) 
       setOccupation(str[0]); 
      else 
       setOccupation("Default Occupation"); 

      if(str[5].length() != 0) 
       setPercent(str[5]); 
      else 
       setPercent("0"); 
     } 

     public void parse(Text record){ 
      parse(record.toString()); 
     } 

     public String getOccupation() { 
      return Occupation; 
     } 

     public void setOccupation(String occupation) { 
      Occupation = occupation; 
     } 

     public String getPercent() { 
      return percent; 
     } 

     public void setPercent(String percent) { 
      this.percent = percent; 
     } 
    } 

    //Mapper2 - processing projection.txt 
    public static class GrowthMap2 extends Mapper<LongWritable,Text,Text,Text> { 
     ProjParser pp = new ProjParser(); 
     Text outkey = new Text(); 
     Text outvalue = new Text(); 
     public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ 
      pp.parse(value); 
      outkey.set(pp.getOccupation()); 
      outvalue.set("m2\t"+pp.getPercent()); 
      context.write(outkey, outvalue); 
      //String str[] = value.toString().split("\t"); 
      //context.write(new Text(str[0]), new Text("m2\t"+str[5])); 
     } 
    } 

    //Reducer 
    public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{ 
     Text outvalue = new Text(); 
     public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{ 
      float cent = 0; 
      String state = ""; 
      for(Text values : value){ 
       String[] str = values.toString().split("\t"); 
       if(str[0].equals("m1")){ 
         state = state + " " + str[1]; 
       }else if(str[0].equals("m2")){ 
        try{ 
         cent = Float.parseFloat(str[1]); 
        }catch(Exception nf){ 
         cent = 0; 
        } 
       } 
      } 
      if(cent>=30){ 
       outvalue.set(state); 
       context.write(key,outvalue); 
      } 
     } 
    } 

    //Driver 

    @Override 
    public int run(String[] args) throws Exception { 

     Job job = new Job(getConf(), "States of Growth"); 

     job.setJarByClass(GrowthState.class); 
     job.setReducerClass(GrowthReduce.class); 

     MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, GrowthMap1.class); 
     MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, GrowthMap2.class); 

     FileOutputFormat.setOutputPath(job,new Path(args[2])); 

     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(Text.class); 

     return job.waitForCompletion(true)?0:1; 
    } 

    public static void main(String args[]) throws Exception{ 

     int exitcode = ToolRunner.run(new GrowthState(), args); 
     System.exit(exitcode); 
    } 

} 

樣品輸入文件1:

01 AL Alabama 00-0000 All Occupations total "1,857,530" 0.4 1000.000 1.00 19.66 "40,890" 0.5 8.30 9.72 14.83 23.95 36.04 "17,260" "20,220" "30,850" "49,810" "74,950"   
01 AL Alabama 11-0000 Management Occupations major "67,500" 1.1 36.338 0.73 51.48 "107,080" 0.6 24.54 33.09 44.98 62.09 88.43 "51,050" "68,830" "93,550" "129,150" "183,940"  
01 AL Alabama 11-1011 Chief Executives detailed "1,080" 4.8 0.580 0.32 97.67 "203,150" 2.5 52.05 67.58 # # # "108,270" "140,570" # # #  
01 AL Alabama 11-1021 General and Operations Managers detailed "26,480" 1.5 14.258 0.94 58.00 "120,640" 0.9 27.65 35.76 49.00 71.44 # "57,510" "74,390" "101,930" "148,590" #  
01 AL Alabama 11-1031 Legislators detailed "1,470" 8.7 0.790 1.94 * "21,920" 3.5 * * * * * "16,120" "17,000" "18,450" "20,670" "32,820" TRUE  
01 AL Alabama 11-2011 Advertising and Promotions Managers detailed 80 16.3 0.042 0.19 44.88 "93,350" 9.5 21.59 30.28 38.92 52.22 74.07 "44,900" "62,980" "80,960" "108,620" "154,060"  
01 AL Alabama 11-2021 Marketing Managers detailed 610 11.5 0.329 0.24 61.28 "127,460" 7.4 31.96 37.63 53.39 73.17 # "66,480" "78,280" "111,040" "152,200" #  
01 AL Alabama 11-2022 Sales Managers detailed "2,330" 5.4 1.253 0.47 54.63 "113,620" 2.2 27.28 35.42 48.92 67.62 89.42 "56,740" "73,660" "101,750" "140,640" "186,000"  
05 AR Arkansas 43-4161 "Human Resources Assistants, Except Payroll and Timekeeping" detailed "1,470" 6.6 1.265 1.26 17.25 "35,870" 1.5 11.09 13.54 17.11 20.74 23.30 "23,060" "28,170" "35,590" "43,150" "48,450"   
05 AR Arkansas 43-4171 Receptionists and Information Clerks detailed "7,080" 3.3 6.109 0.84 11.26 "23,420" 0.8 8.14 9.19 10.87 13.09 14.94 "16,940" "19,110" "22,600" "27,230" "31,070"   
05 AR Arkansas 43-4181 Reservation and Transportation Ticket Agents and Travel Clerks detailed 590 23.6 0.510 0.50 12.61 "26,220" 6.1 8.99 9.81 10.88 14.82 20.59 "18,710" "20,400" "22,630" "30,830" "42,830"   
05 AR Arkansas 43-4199 "Information and Record Clerks, All Other" detailed 920 4.7 0.795 0.61 18.45 "38,370" 1.8 13.59 15.33 18.49 21.35 23.86 "28,270" "31,880" "38,470" "44,410" "49,630"   
05 AR Arkansas 43-5011 Cargo and Freight Agents detailed 480 16.5 0.418 0.73 * * * * * * * * * * * * *  
05 AR Arkansas 43-5021 Couriers and Messengers detailed 510 12.4 0.444 0.84 11.92 "24,790" 2.1 8.73 9.91 11.26 13.49 16.03 "18,160" "20,620" "23,420" "28,060" "33,350"  

樣本輸入文件2:

Management occupations 11-0000 "8,861.5" "9,498.0" 636.6 7.2 22.2 "2,586.7" "$93,910" — — — 
Top executives 11-1000 "2,361.5" "2,626.8" 265.2 11.2 3.3 717.4 "$99,550" — — — 
Chief executives 11-1011 330.5 347.9 17.4 5.3 17.7 87.8 "$168,140" Bachelor's degree 5 years or more None 
General and operations managers 11-1021 "1,972.7" "2,216.8" 244.1 12.4 1.0 613.1 "$95,440" Bachelor's degree Less than 5 years None 
Legislators 11-1031 58.4 62.1 3.7 6.4 — 16.5 "$19,780" Bachelor's degree Less than 5 years None 
"Advertising, marketing, promotions, public relations, and sales managers" 11-2000 637.4 700.5 63.1 9.9 3.4 203.3 "$107,950" — — — 
Advertising and promotions managers 11-2011 35.5 38.0 2.4 6.9 17.8 13.4 "$88,590" Bachelor's degree Less than 5 years None 
Marketing and sales managers 11-2020 539.8 592.5 52.7 9.8 2.6 168.6 "$110,340" — — — 
Marketing managers 11-2021 180.5 203.4 22.9 12.7 2.6 61.7 "$119,480" Bachelor's degree 5 years or more None 
Sales managers 11-2022 359.3 389.0 29.8 8.3 2.7 106.9 "$105,260" Bachelor's degree Less than 5 years None 
Public relations and fundraising managers 11-2031 62.1 70.1 8.0 12.9 1.6 21.3 "$95,450" Bachelor's degree 5 years or more None 
Operations specialties managers 11-3000 "1,647.5" "1,799.7" 152.1 9.2 3.3 459.1 "$100,720" — — — 
Administrative services managers 11-3011 280.8 315.0 34.2 12.2 0.1 79.9 "$81,080" Bachelor's degree Less than 5 years None 
Computer and information systems managers 11-3021 332.7 383.6 50.9 15.3 3.1 97.1 "$120,950" Bachelor's degree 5 years or more None 
Financial managers 11-3031 532.1 579.2 47.1 8.9 5.1 146.9 "$109,740" Bachelor's degree 5 years or more None 
Industrial production managers 11-3051 172.7 168.6 -4.1 -2.4 6.1 31.4 "$89,190" Bachelor's degree 5 years or more None 
Purchasing managers 11-3061 71.9 73.4 1.5 2.1 0.3 17.3 "$100,170" Bachelor's degree 5 years or more None 
"Transportation, storage, and distribution managers" 11-3071 105.2 110.3 5.1 4.9 4.8 29.1 "$81,830" High school diploma or equivalent 5 years or more None 
Compensation and benefits managers 11-3111 20.7 21.4 0.6 3.1 — 6.1 "$95,250" Bachelor's degree 5 years or more None 
Human resources managers 11-3121 102.7 116.3 13.6 13.2 1.0 40.6 "$99,720" Bachelor's degree 5 years or more None 
Training and development managers 11-3131 28.6 31.8 3.2 11.2 — 10.7 "$95,400" Bachelor's degree 5 years or more None 
Other management occupations 11-9000 "4,215.0" "4,371.0" 156.1 3.7 43.1 "1,207.0" "$81,940" — — — 

回答

0

沒有您減速的問題。

有故障的代碼如下所示。環路下面被調用特定鍵的所有值(例如,對於「廣告和促銷經理人」,它就會被調用兩次。有一次值「亞拉巴馬」號,再次用值「6.9」)。問題是,你已經把if(cent >= 30)聲明中,for循環外。它應該在裏面,用於匹配鍵。

for(Text values : value){ 
     String[] str = values.toString().split("\t"); 
     if(str[0].equals("m1")){ 
       state = state + " " + str[1]; 
     }else if(str[0].equals("m2")){ 
      try{ 
       cent = Float.parseFloat(str[1]); 
      }catch(Exception nf){ 
       cent = 0; 
      } 
     } 
    } 
    if(cent>=30){ 
     outvalue.set(state); 
     context.write(key,outvalue); 
    } 

下面這段代碼工作正常。

//Reducer 
public static class GrowthReduce extends Reducer<Text,Text,Text,Text>{ 
    Text outvalue = new Text(); 
    HashMap<String, String> stateMap = new HashMap<String, String>(); 


public void reduce(Text key,Iterable<Text> value,Context context)throws IOException, InterruptedException{ 
    float cent = 0; 

    for(Text values : value){ 
     String[] str = values.toString().split("\t"); 

     if(str[0].equals("m1")){ 
      stateMap.put(key.toString().toLowerCase(), str[1]); 
     } 
     else if(str[0].equals("m2")){ 
      try{ 
       cent = Float.parseFloat(str[1]); 
       if(stateMap.containsKey(key.toString().toLowerCase())) 
       { 
        if(cent>30) { 
         outvalue.set(stateMap.get(key.toString().toLowerCase())); 
         context.write(key, outvalue); 
        } 
        stateMap.remove(key.toString()); 
       } 
      }catch(Exception nf){ 
       cent = 0; 
      } 
     } 
    } 
} 
} 

的邏輯是:

  1. As和當遇到的狀態(值 「M1」),則把它在狀態圖。
  2. 下一次,當你遇到相同的鍵(值「M2」)%,你檢查的狀態已經在地圖上。如果是,那麼你輸出鍵/值。