0


我需要MapReduce工作的幫助,我的自定義分區永遠不會被調用。我查了一百萬次,但沒有結果。它曾經工作過一段時間,我不知道爲什麼現在不是。 任何幫助將非常appreicated。
我正在添加代碼(對於非常簡單的情況,它不適用於自定義鍵作爲輸入)。
映射器將正確的值輸出100%,然後跳過分區程序。的Hadoop MapReduce的分區不調用

//import of libs 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.Partitioner; 
import org.apache.hadoop.hbase.mapreduce.TableMapper; 
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 
... 

public class hbaseCountTest extends Configured implements Tool { 
.... 

static class myMapper extends TableMapper<Text,Text> { 
    @Override 
    public void map(ImmutableBytesWritable rowKey,Result result, Context context) throws IOException { 
     ...... //dropping some calculations 
     context.write(new Text(gender), new Text(capsStr)); // everything is right here, checked. 
    } 

} 

    public static class myPartitioner extends Partitioner<Text, Text> { 
    @Override 
    public int getPartition(Text key, Text value, int NumReduceTasks) { 
//getPartitioner IS NEVER INVOKED 
     System.out.println("partitioner started"); 
     String heur = value.toString().split(":")[0]; 
     int h = Integer.parseInt(heur); 
     if (h<10) { 
      ... return... //dropping some calculations 
     } else if (h>9 && h<19) { 
      ... 
     } else 
      { 
      ... 
      } 
    } 

} 

@Override 
public int run(String[] arg0) throws Exception { 
    Job job = Job.getInstance(getConf(), "jobName1"); 
    job.setNumReduceTasks(3); 
    job.setJarByClass(getClass()); 
    Configuration conf = job.getConfiguration(); 
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); 
    conf.addResource("/home/hadoop/Training/CDH4/hadoop-2.0.0-cdh4.0.0/conf/hadoop-local.xml"); 
    conf.addResource("/home/hadoop/Training/CDH4/hadoop-2.0.0-cdh4.0.0/conf/mapred-site.xml"); 
    FileSystem fs = FileSystem.get(getConf()); 
    if (fs.exists(new Path(arg0[0]))) { 
     fs.delete(new Path(arg0[0])); 
    } 
    Scan scan = new Scan(); 
    scan.addColumn(toBytes(famName), toBytes(colNamePage)); 
    scan.addColumn(toBytes(famName), toBytes(colNameTime)); 
    scan.addColumn(toBytes(famName1), toBytes(colNameRegion)); 
    scan.addColumn(toBytes(famName1), toBytes(colNameGender)); 
    TableMapReduceUtil.initTableMapperJob(tableName, scan, myMapper.class, Text.class, Text.class, job); 

    job.setPartitionerClass(myPartitioner.class); 
    job.setReducerClass(myReducer.class); 
    job.setOutputFormatClass(TextOutputFormat.class); 
    TextOutputFormat.setOutputPath(job, new Path(arg0[0])); 
    job.setOutputKeyClass(TextOutputFormat.class); 
    job.setOutputValueClass(TextOutputFormat.class); 
      job.setNumReduceTasks(3); 
    return job.waitForCompletion(true)?0:1; 
} 

} 

非常感謝提前,
亞歷

+2

你在看什麼,並說你的分區器沒有被調用? – rVr

+0

我試圖在本地模式下進行調試,然後在僞模式下查看system.out.println,如果它在Eclipse中的控制檯窗口中顯示消息,並且結果顯示分區沒有發生。順便說一句推出從它的工作原理-partitioner選項命令行的jar = /我不捉摸爲什麼它不Hadoop中工作 –

+0

嗯......這可能是因爲CONF不撿顧客partioner ...讓我們來看看更多在驅動程序類 – rVr

回答

0

嘗試減速的數量設置爲任意數量比唯一鍵的數量。