爲交叉驗證獲得
預測值存儲在model$predicted[[1]]
,觀測值是y
。如果您想分別查看每個摺疊的預測值,則需要獲取有關摺疊分割的信息。要做到這一點,您可以:
1)分手動摺疊和自己
2)使用caret
包
3)稍微修改cvrf
輸出這些信息做交叉驗證 - 添加idx
到輸出名單
rfcv2 <- function (trainx, trainy, cv.fold = 5, scale = "log", step = 0.5,
mtry = function(p) max(1, floor(sqrt(p))), recursive = FALSE,
...)
{
classRF <- is.factor(trainy)
n <- nrow(trainx)
p <- ncol(trainx)
if (scale == "log") {
k <- floor(log(p, base = 1/step))
n.var <- round(p * step^(0:(k - 1)))
same <- diff(n.var) == 0
if (any(same))
n.var <- n.var[-which(same)]
if (!1 %in% n.var)
n.var <- c(n.var, 1)
}
else {
n.var <- seq(from = p, to = 1, by = step)
}
k <- length(n.var)
cv.pred <- vector(k, mode = "list")
for (i in 1:k) cv.pred[[i]] <- trainy
if (classRF) {
f <- trainy
}
else {
f <- factor(rep(1:5, length = length(trainy))[order(order(trainy))])
}
nlvl <- table(f)
idx <- numeric(n)
for (i in 1:length(nlvl)) {
idx[which(f == levels(f)[i])] <- sample(rep(1:cv.fold,
length = nlvl[i]))
}
for (i in 1:cv.fold) {
all.rf <- randomForest(trainx[idx != i, , drop = FALSE],
trainy[idx != i], trainx[idx == i, , drop = FALSE],
trainy[idx == i], mtry = mtry(p), importance = TRUE,
...)
cv.pred[[1]][idx == i] <- all.rf$test$predicted
impvar <- (1:p)[order(all.rf$importance[, 1], decreasing = TRUE)]
for (j in 2:k) {
imp.idx <- impvar[1:n.var[j]]
sub.rf <- randomForest(trainx[idx != i, imp.idx,
drop = FALSE], trainy[idx != i], trainx[idx ==
i, imp.idx, drop = FALSE], trainy[idx == i],
mtry = mtry(n.var[j]), importance = recursive,
...)
cv.pred[[j]][idx == i] <- sub.rf$test$predicted
if (recursive) {
impvar <- (1:length(imp.idx))[order(sub.rf$importance[,
1], decreasing = TRUE)]
}
NULL
}
NULL
}
if (classRF) {
error.cv <- sapply(cv.pred, function(x) mean(trainy !=
x))
}
else {
error.cv <- sapply(cv.pred, function(x) mean((trainy -
x)^2))
}
names(error.cv) <- names(cv.pred) <- n.var
list(n.var = n.var, error.cv = error.cv, predicted = cv.pred, idx = idx)
}
現在,您可以撥打
model <- rfcv2(x,y, cv.fold=10)
model$idx # returns the folds split.
請注意,cvrf
函數並非爲純粹的交叉驗證而設計,而是用於變量選擇。因此,您執行了大量冗餘計算。
我做了所有,但由於某種原因,當我做'模型$ idx'我只是得到NULL – user2062207
你需要調用修改函數'cvrf2',而不是原來的。請參閱答案末尾的修改。 – DrDom
哦,我明白了。有什麼方法可以返回結果,以便預測結果與實際結果相符嗎?正如我現在得到的回報是這樣的: - '[1] 6 2 1 5 2 3 7 10 9 6 1 7 7 9 5 2 3 7 4 10 7 1 8 2 4 [26] 1 6 4 5 8 4 5 9 4 4 6 10 3 6 3 5 9 9 1 3 5 2 3 2 10' – user2062207