2016-11-23 91 views
1

我有一個多分類問題,我試圖運行KNN算法來查找每個數據點周圍50個最近的鄰居。我在R中使用了FNN軟件包,但是由於我的數據集有大約2900萬行,所以需要很長時間。我想知道R中是否有可以並行運行KNN的軟件包。你有什麼建議與它的用途的例子?我該如何運行knn算法並行使用r進行多分類

回答

0
you can use the following by modifying it accordig to KNN .. If need i will provided you with exact code .. the following code is for svc 





pkgs <- c('foreach', 'doParallel') 

lapply(pkgs, require, character.only = T) 

registerDoParallel(cores = 4) 

### PREPARE FOR THE DATA ### 

df1 <- read.csv(...... your dataset path........) 

## do normalization if needed ## 


### SPLIT DATA INTO K FOLDS ### 
set.seed(2016) 

df1$fold <- caret::createFolds(1:nrow(df1), k = 10, list = FALSE) 


### PARAMETER LIST ### 
cost <- 10^(-1:4) 

gamma <- 2^(-4:-1) 

parms <- expand.grid(cost = cost, gamma = gamma) 

### LOOP THROUGH PARAMETER VALUES ### 
result <- foreach(i = 1:nrow(parms), .combine = rbind) %do% { 

    c <- parms[i, ]$cost 

    g <- parms[i, ]$gamma 

    ### K-FOLD VALIDATION ### 

    out <- foreach(j = 1:max(df1$fold), .combine = rbind, .inorder = FALSE) %dopar% { 

deve <- df1[df1$fold != j, ] 

    test <- df1[df1$fold == j, ] 

    mdl <- e1071::svm(Classification-type-column ~ ., data = deve, type = "C-classification", kernel = "radial", cost = c, gamma = g, probability = TRUE) 

    pred <- predict(mdl, test, decision.values = TRUE, probability = TRUE) 
    data.frame(y = test$DEFAULT, prob = attributes(pred)$probabilities[, 2]) 

    } 
    ### CALCULATE SVM PERFORMANCE ### 

    roc <- pROC::roc(as.factor(out$y), out$prob) 

    data.frame(parms[i, ], roc = roc$auc[1]) 

}