1
我有一個多分類問題,我試圖運行KNN算法來查找每個數據點周圍50個最近的鄰居。我在R中使用了FNN軟件包,但是由於我的數據集有大約2900萬行,所以需要很長時間。我想知道R中是否有可以並行運行KNN的軟件包。你有什麼建議與它的用途的例子?我該如何運行knn算法並行使用r進行多分類
我有一個多分類問題,我試圖運行KNN算法來查找每個數據點周圍50個最近的鄰居。我在R中使用了FNN軟件包,但是由於我的數據集有大約2900萬行,所以需要很長時間。我想知道R中是否有可以並行運行KNN的軟件包。你有什麼建議與它的用途的例子?我該如何運行knn算法並行使用r進行多分類
you can use the following by modifying it accordig to KNN .. If need i will provided you with exact code .. the following code is for svc
pkgs <- c('foreach', 'doParallel')
lapply(pkgs, require, character.only = T)
registerDoParallel(cores = 4)
### PREPARE FOR THE DATA ###
df1 <- read.csv(...... your dataset path........)
## do normalization if needed ##
### SPLIT DATA INTO K FOLDS ###
set.seed(2016)
df1$fold <- caret::createFolds(1:nrow(df1), k = 10, list = FALSE)
### PARAMETER LIST ###
cost <- 10^(-1:4)
gamma <- 2^(-4:-1)
parms <- expand.grid(cost = cost, gamma = gamma)
### LOOP THROUGH PARAMETER VALUES ###
result <- foreach(i = 1:nrow(parms), .combine = rbind) %do% {
c <- parms[i, ]$cost
g <- parms[i, ]$gamma
### K-FOLD VALIDATION ###
out <- foreach(j = 1:max(df1$fold), .combine = rbind, .inorder = FALSE) %dopar% {
deve <- df1[df1$fold != j, ]
test <- df1[df1$fold == j, ]
mdl <- e1071::svm(Classification-type-column ~ ., data = deve, type = "C-classification", kernel = "radial", cost = c, gamma = g, probability = TRUE)
pred <- predict(mdl, test, decision.values = TRUE, probability = TRUE)
data.frame(y = test$DEFAULT, prob = attributes(pred)$probabilities[, 2])
}
### CALCULATE SVM PERFORMANCE ###
roc <- pROC::roc(as.factor(out$y), out$prob)
data.frame(parms[i, ], roc = roc$auc[1])
}