0
我需要MapReduce工作的幫助,我的自定義分區永遠不會被調用。我查了一百萬次,但沒有結果。它曾經工作過一段時間,我不知道爲什麼現在不是。 任何幫助將非常appreicated。
我正在添加代碼(對於非常簡單的情況,它不適用於自定義鍵作爲輸入)。
映射器將正確的值輸出100%,然後跳過分區程序。的Hadoop MapReduce的分區不調用
//import of libs
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
...
public class hbaseCountTest extends Configured implements Tool {
....
static class myMapper extends TableMapper<Text,Text> {
@Override
public void map(ImmutableBytesWritable rowKey,Result result, Context context) throws IOException {
...... //dropping some calculations
context.write(new Text(gender), new Text(capsStr)); // everything is right here, checked.
}
}
public static class myPartitioner extends Partitioner<Text, Text> {
@Override
public int getPartition(Text key, Text value, int NumReduceTasks) {
//getPartitioner IS NEVER INVOKED
System.out.println("partitioner started");
String heur = value.toString().split(":")[0];
int h = Integer.parseInt(heur);
if (h<10) {
... return... //dropping some calculations
} else if (h>9 && h<19) {
...
} else
{
...
}
}
}
@Override
public int run(String[] arg0) throws Exception {
Job job = Job.getInstance(getConf(), "jobName1");
job.setNumReduceTasks(3);
job.setJarByClass(getClass());
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
conf.addResource("/home/hadoop/Training/CDH4/hadoop-2.0.0-cdh4.0.0/conf/hadoop-local.xml");
conf.addResource("/home/hadoop/Training/CDH4/hadoop-2.0.0-cdh4.0.0/conf/mapred-site.xml");
FileSystem fs = FileSystem.get(getConf());
if (fs.exists(new Path(arg0[0]))) {
fs.delete(new Path(arg0[0]));
}
Scan scan = new Scan();
scan.addColumn(toBytes(famName), toBytes(colNamePage));
scan.addColumn(toBytes(famName), toBytes(colNameTime));
scan.addColumn(toBytes(famName1), toBytes(colNameRegion));
scan.addColumn(toBytes(famName1), toBytes(colNameGender));
TableMapReduceUtil.initTableMapperJob(tableName, scan, myMapper.class, Text.class, Text.class, job);
job.setPartitionerClass(myPartitioner.class);
job.setReducerClass(myReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(arg0[0]));
job.setOutputKeyClass(TextOutputFormat.class);
job.setOutputValueClass(TextOutputFormat.class);
job.setNumReduceTasks(3);
return job.waitForCompletion(true)?0:1;
}
}
非常感謝提前,
亞歷
你在看什麼,並說你的分區器沒有被調用? – rVr
我試圖在本地模式下進行調試,然後在僞模式下查看system.out.println,如果它在Eclipse中的控制檯窗口中顯示消息,並且結果顯示分區沒有發生。順便說一句推出從它的工作原理-partitioner選項命令行的jar = /我不捉摸爲什麼它不Hadoop中工作 –
嗯......這可能是因爲CONF不撿顧客partioner ...讓我們來看看更多在驅動程序類 – rVr