2015-09-19 100 views
2

下面的代碼:如何訂購聚合輸出?

library("C50") 

portuguese_scores = read.table("https://raw.githubusercontent.com/JimGorman17/Datasets/master/student-por.csv",sep=";",header=TRUE) 
portuguese_scores <- portuguese_scores[,!names(portuguese_scores) %in% c("school", "age", "G1", "G2")] 
median_score <- summary(portuguese_scores$G3)['Median'] 
portuguese_scores$score_gte_than_median <- as.factor(median_score<=portuguese_scores$G3) 
portuguese_scores <- portuguese_scores[,!names(portuguese_scores) %in% c("G3")] 

set.seed(123) 

train_sample <- sample(nrow(portuguese_scores), .9 * nrow(portuguese_scores)) 
port_train <- portuguese_scores[train_sample,] 

learn_DF <- data.frame() 

algorithm <- "C5.0 Decision Tree" 
for (i in seq(15,100,by=1)) { 
    pct_of_training_data <- sample(nrow(port_train), i/100 * nrow(port_train)) 
    port_train_pct <- port_train[pct_of_training_data,] 

    fit <- C5.0(score_gte_than_median ~ ., data=port_train_pct) 
    learn_DF <- rbind(learn_DF, data.frame(pct_of_training_set=i, err_pct=sum(predict(fit,port_train_pct) != port_train_pct$score_gte_than_median)/nrow(port_train_pct), type="train", algorithm=algorithm)) 
} 

for (h in seq(.1, .9, by=.1)) { 
    algorithm <- paste("Pruning with confidence (",h,")") 
    for (i in seq(15,100,by=1)) { 
    pct_of_training_data <- sample(nrow(port_train), i/100 * nrow(port_train)) 
    port_train_pct <- port_train[pct_of_training_data,] 

    ctrl=C5.0Control(CF=h) 
    fit <- C5.0(score_gte_than_median ~ ., data=port_train_pct, ctrl=ctrl) 
    learn_DF <- rbind(learn_DF, data.frame(pct_of_training_set=i, err_pct=sum(predict(fit,port_train_pct) != port_train_pct$score_gte_than_median)/nrow(port_train_pct), type="train", algorithm=algorithm)) 
    } 
} 

aggregate(err_pct~algorithm,data=learn_DF,mean) 

生成以下的輸出:

 algorithm err_pct 
1    C5.0 Decision Tree 0.09895810 
2 Pruning with confidence (0.1) 0.09288930 
3 Pruning with confidence (0.2) 0.09935209 
4 Pruning with confidence (0.3) 0.09496267 
5 Pruning with confidence (0.4) 0.09724305 
6 Pruning with confidence (0.5) 0.09721156 
7 Pruning with confidence (0.6) 0.09695104 
8 Pruning with confidence (0.7) 0.10041991 
9 Pruning with confidence (0.8) 0.09881957 
10 Pruning with confidence (0.9) 0.09611947 

我的問題:

  • 我如何,而不是由algorithm排序該網格由err_pct

回答

2

您可以將您在data.frame彙總結果,然後排序

res <- aggregate(err_pct~algorithm,data=learn_DF,mean) 
res[order(res$err_pct), ] 
         algorithm err_pct 
2 Pruning with confidence (0.1) 0.09288930 
4 Pruning with confidence (0.3) 0.09496267 
10 Pruning with confidence (0.9) 0.09611947 
7 Pruning with confidence (0.6) 0.09695104 
6 Pruning with confidence (0.5) 0.09721156 
5 Pruning with confidence (0.4) 0.09724305 
9 Pruning with confidence (0.8) 0.09881957 
1    C5.0 Decision Tree 0.09895810 
3 Pruning with confidence (0.2) 0.09935209 
8 Pruning with confidence (0.7) 0.10041991 
0

您可以使用該功能安排在包「plry」。

library(plyr) 
a<-aggregate(err_pct~algorithm,data=learn_DF,mean) 
arrange(a,desc(err_pct),algorithm) 

這是一個建議...祝你好運!