-1
我正在構建用於使用R.執行gcrma標準化後乳腺癌數據的預測模型,我產生的電位預測變量。現在,當我運行RF算法我遇到了以下錯誤隨機森林,使用R
rf_output=randomForest(x=pred.data, y=target, importance = TRUE, ntree = 25001, proximity=TRUE, sampsize=sampsizes)
Error: Error in randomForest.default(x = pred.data, y = target, importance = TRUE, : Can not handle categorical predictors with more than 53 categories.
代碼:
library(randomForest)
library(ROCR)
library(Hmisc)
library(genefilter)
setwd("E:/kavya's project_work/final")
datafile<-"trainset_gcrma.txt"
clindatafile<-read.csv("mod clinical_details.csv")
outfile="trainset_RFoutput.txt"
varimp_pdffile="trainset_varImps.pdf"
MDS_pdffile="trainset_MDS.pdf"
ROC_pdffile="trainset_ROC.pdf"
case_pred_outfile="trainset_CasePredictions.txt"
vote_dist_pdffile="trainset_vote_dist.pdf"
data_import=read.table(datafile, header = TRUE, na.strings = "NA", sep="\t")
clin_data_import=clindatafile
clincaldata_order=order(clin_data_import[,"GEO.asscession.number"])
clindata=clin_data_import[clincaldata_order,]
data_order=order(colnames(data_import)[4:length(colnames(data_import))])+3 #Order data without first three columns, then add 3 to get correct index in original file
rawdata=data_import[,c(1:3,data_order)] #grab first three columns, and then remaining columns in order determined above
header=colnames(rawdata)
X=rawdata[,4:length(header)]
ffun=filterfun(pOverA(p = 0.2, A = 100), cv(a = 0.7, b = 10))
filt=genefilter(2^X,ffun)
filt_Data=rawdata[filt,]
#Get potential predictor variables
predictor_data=t(filt_Data[,4:length(header)])
predictor_names=c(as.vector(filt_Data[,3])) #gene symbol
colnames(predictor_data)=predictor_names
target= clindata[,"relapse"]
target[target==0]="NoRelapse"
target[target==1]="Relapse"
target=as.factor(target)
tmp = as.vector(table(target))
num_classes = length(tmp)
min_size = tmp[order(tmp,decreasing=FALSE)[1]]
sampsizes = rep(min_size,num_classes)
rf_output=randomForest(x=pred.data, y=target, importance = TRUE, ntree = 25001, proximity=TRUE, sampsize=sampsizes)
error:"Error in randomForest.default(x = pred.data, y = target, importance = TRUE, : Can not handle categorical predictors with more than 53 categories."
,因爲我是新來的機器學習我無法繼續。請做好必要的事情。 提前。
的[同時使用,使用R隨機森林來錯誤]可能的複製(http://stackoverflow.com/questions/33098731/error-coming-while-using-random-forest-using-r) – phiver