2013-07-24 49 views
0

集羣這是我ARFF文件(links.arff):如何.arff文件中的「ID」添加到每個列的k-means在秧雞

@relation links 

@attribute isLink1Present numeric 
@attribute isLink2Present numeric 
@attribute isLink3Present numeric 
@attribute isLink4Present numeric 
@attribute isLink6Present numeric 
@attribute isLink7Present numeric 
@attribute isLink8Present numeric 
@attribute isLink9Present numeric 

@data 
0,0,0,0,0,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,1,0,0,0,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,1,0,0,0,0,0,0 
1,1,0,0,0,0,0,0,0 
1,1,1,0,0,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,0,1,0,0,0,0,0 
1,0,1,0,0,0,0,0,0 
1,0,1,1,0,0,0,0,0 
1,1,0,0,0,0,0,0,0 
1,1,0,1,0,0,0,0,0 
1,1,1,0,0,0,0,0,0 
1,1,1,1,0,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,0,0,1,0,0,0,0 
1,0,0,1,0,0,0,0,0 
1,0,0,1,1,0,0,0,0 
1,0,1,0,0,0,0,0,0 
1,0,1,0,1,0,0,0,0 
1,0,1,1,0,0,0,0,0 
1,0,1,1,1,0,0,0,0 
1,1,0,0,0,0,0,0,0 
1,1,0,0,1,0,0,0,0 
1,1,0,1,0,0,0,0,0 
1,1,0,1,1,0,0,0,0 
1,1,1,0,0,0,0,0,0 
1,1,1,0,1,0,0,0,0 
1,1,1,1,0,0,0,0,0 
1,1,1,1,1,0,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,0,0,0,1,0,0,0 
1,0,0,0,1,0,0,0,0 
1,0,0,0,1,1,0,0,0 
1,0,0,1,0,0,0,0,0 
1,0,0,1,0,1,0,0,0 
1,0,0,1,1,0,0,0,0 
1,0,0,1,1,1,0,0,0 
1,0,1,0,0,0,0,0,0 
1,0,1,0,0,1,0,0,0 
1,0,1,0,1,0,0,0,0 
1,0,1,0,1,1,0,0,0 
1,0,1,1,0,0,0,0,0 
1,0,1,1,0,1,0,0,0 
1,0,1,1,1,0,0,0,0 
1,0,1,1,1,1,0,0,0 
1,1,0,0,0,0,0,0,0 
1,1,0,0,0,1,0,0,0 
1,1,0,0,1,0,0,0,0 
1,1,0,0,1,1,0,0,0 
1,1,0,1,0,0,0,0,0 
1,1,0,1,0,1,0,0,0 
1,1,0,1,1,0,0,0,0 
1,1,0,1,1,1,0,0,0 
1,1,1,0,0,0,0,0,0 
1,1,1,0,0,1,0,0,0 
1,1,1,0,1,0,0,0,0 
1,1,1,0,1,1,0,0,0 
1,1,1,1,0,0,0,0,0 
1,1,1,1,0,1,0,0,0 
1,1,1,1,1,0,0,0,0 
1,1,1,1,1,1,0,0,0 
1,0,0,0,0,0,0,0,0 
1,0,0,0,0,0,1,0,0 
1,0,0,0,0,1,0,0,0 
1,0,0,0,0,1,1,0,0 
1,0,0,0,1,0,0,0,0 
1,0,0,0,1,0,1,0,0 
1,0,0,0,1,1,0,0,0 
1,0,0,0,1,1,1,0,0 
1,0,0,1,0,0,0,0,0 
1,0,0,1,0,0,1,0,0 
1,0,0,1,0,1,0,0,0 
1,0,0,1,0,1,1,0,0 
1,0,0,1,1,0,0,0,0 
1,0,0,1,1,0,1,0,0 
1,0,0,1,1,1,0,0,0 
1,0,0,1,1,1,1,0,0 
1,0,1,0,0,0,0,0,0 
1,0,1,0,0,0,1,0,0 
1,0,1,0,0,1,0,0,0 
1,0,1,0,0,1,1,0,0 
1,0,1,0,1,0,0,0,0 
1,0,1,0,1,0,1,0,0 
1,0,1,0,1,1,0,0,0 
1,0,1,0,1,1,1,0,0 
1,0,1,1,0,0,0,0,0 
1,0,1,1,0,0,1,0,0 
1,0,1,1,0,1,0,0,0 
1,0,1,1,0,1,1,0,0 
1,0,1,1,1,0,0,0,0 
1,0,1,1,1,0,1,0,0 
1,0,1,1,1,1,0,0,0 
1,0,1,1,1,1,1,0,0 
1,1,0,0,0,0,0,0,0 
1,1,0,0,0,0,1,0,0 
1,1,0,0,0,1,0,0,0 
1,1,0,0,0,1,1,0,0 

下面是我執行ķ -means:

public void runKMeans(int numClusters){ 
    try { 
     SimpleKMeans kmeans = new SimpleKMeans(); 

     //DistanceFunction df = new weka.core.ManhattanDistance(); 
     DistanceFunction df = new weka.core.EuclideanDistance(); 

     kmeans.setDistanceFunction(df); 
     kmeans.setSeed(10); 

     kmeans.setPreserveInstancesOrder(true); 
     kmeans.setNumClusters(numClusters); 

     String arffFile = new PropertyUtils().getProperty("datafiles-home")+"\\links.arff"; 
     DataSource source = new DataSource(arffFile); 
     Instances instances = source.getDataSet(); 

     //inst.setDataset(instances); 
     kmeans.buildClusterer(instances); 
     System.out.println(kmeans.displayStdDevsTipText()); 

     // This array returns the cluster number (starting with 0) for each instance 
     // The array has as many elements as the number of instances 
     int[] assignments = kmeans.getAssignments(); 

     int i=0; 

     List<Cluster> lc = new ArrayList<Cluster>(); 
     for(int clusterNum : assignments) { 
      lc.add(new Cluster((i+1) , clusterNum)); 
      // System.out.println("Instance "+(i+1)+" -> Cluster "+clusterNum); 
      i++; 

     } 
     Collections.sort(lc); 

     for(Cluster c : lc){ 
      PrintUtils.println("Instance : "+c.getInstance()+" Cluster "+c.getCluster()); 
     } 

     } 
     catch(Exception e){ 
      e.printStackTrace(); 
     } 
} 

我想將每列數據與「名稱」屬性關聯起來,這樣我就可以識別每列。我怎樣才能做到這一點?我不認爲我可以爲@data添加一個String屬性,因爲這會包含k-means算法實現嗎?有另一種方法嗎?

回答

0

是的,你可以添加一個額外的屬性來命名實例。

然後,對於EuclideanDistance,您可以使用-R選項或setAttributeIndices來決定計算距離時使用的屬性範圍。刪除name屬性將工作!

相關問題