我的代碼基本上有一定的規則來處理,並會根據規則創建新的值(字符串)。我維護列表中的所有輸出(在規則執行後生成),現在需要發送此輸出( Mapper值)傳遞給Reducer,並沒有辦法做到這一點。
package develop;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import utility.RulesExtractionUtility;
public class CustomMap{
public static class CustomerMapper extends Mapper<Object, Text, Text, Text> {
private Map<String, String> rules;
public void setup(Context context)
URI[] cacheFiles = context.getCacheFiles();
catch (IOException ioe)
System.err.println("Error reading state file.");
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
// Map<String, String> rules = new LinkedHashMap<String, String>();
// rules.put("targetcolumn[1]", "ASSIGN(source[0])");
// rules.put("targetcolumn[2]", "INCOME(source[2]+source[3])");
// rules.put("targetcolumn[3]", "ASSIGN(source[1]");
// Above is the "rules", which would basically create some list values from source file
String [] splitSource = value.toString().split(" ");
// lists would have values like (name, age) for each line from a huge text file, which is what i want to write in context and pass it to the reducer.
// As of now i havent implemented the reducer code, as m stuck with passing the value from mapper.
// context.write(new Text(), lists);---- I do not have a way of doing this
private void setupRulesMap(String filename) throws IOException
Map<String, String> rule = new LinkedHashMap<String, String>();
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = reader.readLine();
while (line != null)
String[] split = line.split("=");
rule.put(split[0], split[1]);
line = reader.readLine();
// rules logic
rules = rule;
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
Configuration conf = new Configuration();
if (args.length != 2) {
System.err.println("Usage: customerMapper <in> <out>");
Job job = Job.getInstance(conf);
job.addCacheFile(new URI("Some HDFS location"));
URI[] cacheFiles= job.getCacheFiles();
if(cacheFiles != null) {
for (URI cacheFile : cacheFiles) {
System.out.println("Cache file ->" + cacheFile);
// job.setReducerClass(Reducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
謝謝你的建議,難道不可以去定製寫入.. – user72
是的,很可能。但我相信自定義變量將按照我在答案中所述的方式實現。所以,如果你只是尋找解決方案來解決你的問題,我的工作肯定。 – Yann
您是否在嘗試實施您的建議時可以參考一些示例/鏈接。謝謝 – user72