2014-04-08 52 views
0

我從GitHub下載了關於K-MEANS算法(在hadoop中).. 但是,這隻適用於maper。 (因爲輸出文件名是「part-m-00000」) 我希望減少輸出文件。Hadoop/Mapreduce Reducer無法正常工作

我HDFS下令:./bin/hadoop罐子Kmeans.jar主要輸入輸出

PLZ ..一些身體幫我!!!!

這裏是Main.class

import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 


public class Main{ 
    /** 
    * @param args 
    * @throws IOException 
    * @throws ClassNotFoundException 
    * @throws InterruptedException 
    */ 

    static enum Counter{ 
     CONVERGED 
    } 

    public static final String CENTROIDS = "centroids"; 

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { 

     int iteration = 1; 
     long changes = 0; 
     Path dataPath = new Path(args[0]); 

     //read in the initial cluster centroids. 
     Configuration centroidConf = new Configuration(); 
     Job centroidInputJob = new Job(centroidConf); 
     centroidInputJob.setJobName("KMeans Centroid Input"); 
     centroidInputJob.setJarByClass(Main.class); 

     Path centroidsPath = new Path("centroids_0"); 

     centroidInputJob.setMapperClass(KmeansCentroidInputMapper.class); 

     // No Combiner, no Reducer. 

     centroidInputJob.setMapOutputKeyClass(Text.class); 
     centroidInputJob.setMapOutputValueClass(Text.class); 
     centroidInputJob.setOutputKeyClass(Text.class); 
     centroidInputJob.setOutputValueClass(Text.class); 

     FileInputFormat.addInputPath(centroidInputJob,new Path(args[1])); 
     FileOutputFormat.setOutputPath(centroidInputJob, centroidsPath); 
     centroidInputJob.setNumReduceTasks(0); 

     if (!centroidInputJob.waitForCompletion(true)) { 
      System.err.println("Centroid input job failed!"); 
      System.exit(1); 
     } 

     while(true){ 
      Configuration conf = new Configuration(); 
      Path nextIter = new Path(String.format("centroids_%s", iteration)); 
      Path prevIter = new Path(String.format("centroids_%s", iteration - 1)); 
      conf.set(Main.CENTROIDS, prevIter.toString()); 

      Job job = new Job(conf); 
      job.setJobName("Kmeans " + iteration); 
      job.setJarByClass(Main.class); 

      job.setJobName("KMeans "+ iteration); 


      //Set Mapper, Combiner, and Reducer 
      job.setMapperClass(MapClass.class); 
      job.setReducerClass(ReduceClass.class); 
      job.setMapOutputKeyClass(Text.class); 
      job.setMapOutputValueClass(Text.class); 
      job.setNumReduceTasks(1); 
      job.setCombinerClass(CombineClass.class); 
      job.setOutputKeyClass(Text.class); 
      job.setOutputValueClass(Text.class); 

      //Set input/output paths 
      FileInputFormat.addInputPath(job, dataPath); 
      FileOutputFormat.setOutputPath(job, nextIter); 

      job.setNumReduceTasks(1); 
      job.waitForCompletion(true); 
      iteration++; 
      changes = job.getCounters().findCounter(Main.Counter.CONVERGED).getValue(); 
      job.getCounters().findCounter(Main.Counter.CONVERGED).setValue(0); 
      if(changes<=0){ 
       break; 
      }  
     } 
    } 

} 

的源代碼:https://github.com/yezhang1989/K-Means-Clustering-on-MapReduce

+1

在您的主課程中,您已將減少任務的數量設置爲零。這就是爲什麼沒有在工作中執行reduce tsk的原因。 – donut

回答

0

請命令

job.setNumReduceTasks(1); 

並檢查它是否正在工作。

用於MapReduce的作業默認減速爲1所以不必將其設置爲1

job.setNumReduceTasks(0); 減速機任務無法運行並且輸出文件取決於地圖的否(part-m-00000)。