0
我有一個班PorterStemmer我想用在我的Mapper。我的驅動程序類也由Mapper,Reducer組成。我嘗試將PorterStemmer類放在Driver類中,但Hadoop在運行時顯示ClassNotFoundException。我也嘗試將PorterStemmer放入JAR中,並將其添加到分佈式緩存中,但顯然由於PorterStemmer在Driver類中不存在,所以編譯器時間錯誤。無論如何,我可以解決這個問題嗎?如何在Hadoop中導入和使用Mapper中的類?
這裏是我的Driver類
public class InvertedIndex {
public static class IndexMapper extends Mapper<Object, Text, Text, Text>{
private Text word = new Text();
private Text filename = new Text();
private boolean caseSensitive = false;
public static PorterStemmer stemmer = new PorterStemmer();
String token;
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String filenameStr = ((FileSplit) context.getInputSplit()).getPath().getName();
filename = new Text(filenameStr);
String line = value.toString();
if (!caseSensitive) {
line = line.toLowerCase();
}
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
token = tokenizer.nextToken();
stemmer.add(token.toCharArray(), token.length());
stemmer.stem();
token =stemmer.toString();
word.set(token);
context.write(word, filename);
}
}
}
public static class IndexReducer extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuilder stringBuilder = new StringBuilder();
for (Text value : values) {
stringBuilder.append(value.toString());
if (values.iterator().hasNext()) {
stringBuilder.append(" -> ");
}
}
context.write(key, new Text(stringBuilder.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "inverted index");
job.addCacheFile(new Path("/invertedindex/lib/stemmer.jar").toUri());
job.setJarByClass(InvertedIndex.class);
/* Field separator for reducer output*/
job.getConfiguration().set("mapreduce.output.textoutputformat.separator", " | ");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(IndexMapper.class);
job.setCombinerClass(IndexReducer.class);
job.setReducerClass(IndexReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path inputFilePath = new Path(args[0]);
Path outputFilePath = new Path(args[1]);
FileInputFormat.addInputPath(job, inputFilePath);
FileOutputFormat.setOutputPath(job, outputFilePath);
/* Delete output filepath if already exists */
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(outputFilePath)) {
fs.delete(outputFilePath, true);
}
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}