2016-06-27 62 views
1

所以我對R很新。我在用Mathematica導入數據時遇到了一些麻煩,所以我決定做一個開關,因爲R更適合分析。我正在構建一些機器學習技術來分析我現在可以導入的數據。這是一個遺傳編程實現,完成時應該對一些數據進行符號迴歸。除了錯誤之外,腳本應該幾乎完成(我需要編寫組合操作符,使分區受到保護,並完成基本函數列表)。編寫已解決的腳本(R Error Genetic Programming Implementation)時,我遇到了以前的問題。我已經調試了大約一天的腳本,並且我完全沒有想法。R GP執行錯誤

我的錯誤信息是:

Error in makeStrName(nextGen) : object 'nextGen' not found 
> 
> #Print the string versions of the five functions with the lowest RMSE evolved. 
> byRMSEList<-sortByRMSE(populationsBestTenStr) 
Error: object 'totalTwo' not found 
> for(i in 1:5) 
+ { 
+ byRMSEList[[i]] 
+ } 
Error: object 'byRMSEList' not found 

這裏是我的腳本。我目前正在使用RStudio。感謝您抽出寶貴時間來幫助:

library("datasets") 

operators<-list("+","*","-","/","o") 
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)") 

#Allows me to map a name to each element in a numerical list. 
makeStrName<-function(listOfItems) 
{ 
    for(i in 1:length(listOfItems)) 
    { 
    names(listOfItems)[i]=paste("x",i,sep="") 
    } 
    return(listOfItems) 
} 

#Allows me to replace each random number in a vector with the corresponding 
#function in a list of functions. 

mapFuncList<-function(funcList,rndNumVector) 
{ 
    for(i in 1:length(funcList)) 
    { 
    rndNumVector[rndNumVector==i]<-funcList[i] 
    } 
    return(rndNumVector) 
} 

#Will generate a random function from the list of functions and a random sample. 
generateOrganism<-function(inputLen,inputSeed, funcList) 
{ 
    set.seed(inputSeed) 
    rnd<-sample(1:length(funcList),inputLen,replace=T) 
    Org<-mapFuncList(funcList,rnd) 
    return(Org) 
} 

#Will generate a series of "Organisms" 
genPopulation<-function(popSize,initialSeed,initialSize,functions) 
{ 
    population<-list() 
    for(i in 1:popSize) 
    { 
    population <- c(population,generateOrganism(initialSize,initialSeed+i,functions)) 
    } 
    populationWithNames<-makeStrName(population) 
    return(populationWithNames) 
} 

#Turns the population of functions (which are actually strings in "") into 
#actual functions. (i.e. changes the mode of the list from string to function). 

funCreator<-function(snippet) 
{ 
    txt=snippet 
    function(x) 
    { 
    exprs <- parse(text = txt) 
    eval(exprs) 
    } 
} 

#Applies a fitness function to the population. Puts the best organism in 
#the hallOfFame. 
evalPopulation<-function(populationFuncList, inputData, outputData, populationStringList) 
{ 
    #rmse <- sqrt(mean((sim - obs)^2)) 
    for(i in 1:length(populationStringList)) 
    { 
    stringFunc<-populationStringList[[i]] 
    total<-list(mode="numeric",length=length(inputData)) 
    topTenPercentFunctionList<-list() 
    topTenPercentRMSEList<-list() 
    topTenPercentStringFunctionList<-list() 
    tempFunc<-function(x){x} 
    for(z in 1:length(inputData)) 
    { 
     total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]]))) 
     tempFunc<-populationFuncList[[i]] 
    } 
    rmse<-sqrt(mean(total*total)) 
    topTenPercentVal<-length(populationFuncList)*0.1 
    if(length(topTenPercentFunctionList)<topTenPercentVal||RMSE<min(topTenPercentRMSEList)) 
    { 
     topTenPercentStringFunctionList<-c(topTenPercentStringFunctionList,stringFunc) 
     topTenPercentRMSEList<-c(topTenPercentRMSEList, rmse) 
     topTenPercentFunctionList<-c(topTenPercentFunctionList, tempFunc) 
    } 
    } 
    return(topTenPercentStringFunctionList) 
} 
#Get random operator 
getRndOp<-function(seed) 
{ 
    set.seed(seed) 
    rndOpNum<-sample(1:length(operators),1,replace=T) 
    operation<-operators[[rndOpNum]] 
    return(operation) 
} 

#Mutation Operators 

#This attaches a new appendage to an organism 
endNodeMutation<-function(strFunc,seed) 
{ 
    op<-getRndOp(seed) 
    strFunc<-c(strFunc,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFunc<-c(strFunc,newAppendage) 
    return(strFunc) 
} 

#This is a mutation that occurs at a random locaiton in an organism 
rndNodeMutation<-function(strFunc,seed,secondSeed) 
{ 
    op<-getRndOp(seed) 
    halfStrFunc<-((length(strFunc))/2) 
    set.seed(seed) 
    randomStart<-sample(1:halfStrFunc,1,replace=T) 
    set.seed(secondSeed) 
    randomEnd<-2*(sample(1:length(halfStrFunc),1,replace=T)) 
    strFuncUpdate<-substr(strFunc,randomStart,randomEnd) 
    strFuncUpdate<-c(strFuncUpdate,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFuncUpdate<-c(strFuncUpdate,newAppendage) 
    return(strFuncUpdate) 
} 

#Crossover Operators 

#Crossover operator that attaches otherStrFunc to strFunc at the endpoint of strFunc 
crossoverConcatenationOperator<-function(strFunc,otherStrFunc) 
{ 
    newStrFunc<-c(strFunc,otherStrFunc) 
    return(newStrFunc) 
} 

#Crossover Operation that starts and ends at random points in the concatenation 
randomCrossoverOperator<-function(strFunc,otherStrFunc,seed,secondSeed) 
{ 
    set.seed(seed) 
    wholeLength<-(length(strFunc)+length(otherStrFunc)) 
    startRndNum<-sample(1:length(strFunc),1,replace=T) 
    set.seed(secondSeed) 
    endRndNum<-sample(length(strFunc):wholeLength,1,replace=T) 
    concatenatedFunc<-c(strFunc,otherStrFunc) 
    newFunc<-substr(concatenatedFunc,startRndNum,endRndNum) 
    return(newFunc) 
} 
evolve<-function(strFuncList,tenPercentStrFuncList) 
{ 
    #Detach the bottom ninety percent to the top ten percent 
    evolveList<-substr(strFuncList,length(tenPercentStrFuncList),length(strFuncList)) 
    #Get sizes. Will use a random mutation, then random crossover, then 
    #random mutation, then random crossover at percentages with 0.05,0.45,0.05,0.45 
    #respectively 
    size<-length(evolveList) 
    mutateNum<-0.1*size 
    crossoverNum<-0.9*size 
    halfMutateNum<-0.05*size 
    halfCrossoverNum<-0.45*size 
    roundedMutateNum<-floor(mutateNum) 
    roundedCrossoverNum<-floor(crossoverNum) 
    roundedHalfMutateNum<-floor(halfMutateNum) 
    roundedHalfCrossoverNum<-floor(halfCrossoverNum) 

    #Calls the functions for those percentage of organisms in that order 
    for(i in 1:roundedHalfMutateNum) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTWo) 
    evolveList[[i]]<-newFunc 
    } 
    for (i in roundedHalfMutateNum:(roundedHalfCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveLIst,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
    for(i in (roundedHalfCrossoverNum+roundedHalfMutateNum):(roundedHalfCrossoverNum+roundedMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTWo) 
    evolveList[[i]]<-newFunc 
    } 
    for(i in (roundedHalfCrossoverNum+roundedMutateNum):(roundedCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveLIst,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
} 

#Calculates the root mean squared of the functions in a string list. 
#Then sorts the list by RMSE. 
sortByRMSE<-function(strL) 
{ 
    for (z in 1:length(strL)) 
    { 
    for(i in 1:length(strL)) 
    { 
     nonStrFuncList<-lapply(strL,function(x){funCreator(x)}) 
     totalTwo<-c(totalTwo,(abs(nonStrFuncList[[z]](inputData[[i]])-outputData[[i]]))) 
    } 
    rmse<-sqrt(mean(totalTwo*totalTwo)) 
    strFuncsLists<-strL[order(sapply(strL, '[[', rmse))] 
    } 
    return(strFuncsLists) 
} 

#Data, Output Goal 
desiredFuncOutput<-list(1,4,9,16,25) 
dataForInput<-list(1,2,3,4,5) 

#Generate Initial Population 
POpulation<-genPopulation(4,1,1,funcs) 
POpulationFuncList <- lapply(setNames(POpulation,names(POpulation)),function(x){funCreator(x)}) 

#Get and save top ten percent in bestDudes 
bestDudes<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput,POpulation) 
#Evolve the rest 
NewBottomNinetyPercent<-evolve(POpulation,bestDudes) 
#Concatenate the two to make a new generation 
nextGen<-c(bestDudes,NewBottomNinetyPercent) 

#Declare lists, 
populationsBestTenStr<-list() 
populationsFuncList<-list() 

#Run ten generations. 
for(i in 1:10) 
{ 
    nextGen<-makeStrName(nextGen) 
    populationsFuncList<-lapply(setNames(nextGen,names(nextGen)),function(x){funCreator(x)}) 
    populationsBestTenStr<-evalPopulation(populationsFuncList,dataForInput,desiredFuncOutput,nextGen) 
    nextGen<-evolve(populations,populationsBestTenStr) 
} 

#Print the string versions of the five functions with the lowest RMSE evolved. 
byRMSEList<-sortByRMSE(populationsBestTenStr) 
for(i in 1:5) 
{ 
    byRMSEList[[i]] 
} 

回答

0
library("datasets") 

operators<-list("+","*","-","/","o") 
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)") 

# Fixed: 
# evolveLIst inconsistently typed as evolveList 
# rndCrossoverOperation inconsistently typed as randomCrossoverOperator 
# rndTWo inconsistently typed as rndTwo 
# broken substr 
# broken condition leading to for(i in 1:0) 
# misc. others 

#Allows me to map a name to each element in a numerical list. 
makeStrName<-function(listOfItems) 
{ 
    for(i in 1:length(listOfItems)) 
    { 
    names(listOfItems)[i]=paste("x",i,sep="") 
    } 
    return(listOfItems) 
} 

#Allows me to replace each random number in a vector with the corresponding 
#function in a list of functions. 

mapFuncList<-function(funcList,rndNumVector) 
{ 
    for(i in 1:length(funcList)) 
    { 
    rndNumVector[rndNumVector==i]<-funcList[i] 
    } 
    return(rndNumVector) 
} 

#Will generate a random function from the list of functions and a random sample. 
generateOrganism<-function(inputLen,inputSeed, funcList) 
{ 
    set.seed(inputSeed) 
    rnd<-sample(1:length(funcList),inputLen,replace=T) 
    Org<-mapFuncList(funcList,rnd) 
    return(Org) 
} 

#Will generate a series of "Organisms" 
genPopulation<-function(popSize,initialSeed,initialSize,functions) 
{ 
    population<-list() 
    for(i in 1:popSize) 
    { 
    population <- c(population,generateOrganism(initialSize,initialSeed+i,functions)) 
    } 
    populationWithNames<-makeStrName(population) 
    return(populationWithNames) 
} 

#Turns the population of functions (which are actually strings in "") into 
#actual functions. (i.e. changes the mode of the list from string to function). 

funCreator<-function(snippet) 
{ 
    txt=snippet 
    function(x) 
    { 
    exprs <- parse(text = txt) 
    eval(exprs) 
    } 
} 

#Applies a fitness function to the population. Puts the best organism in 
#the hallOfFame. 
evalPopulation<-function(populationFuncList=POpulationFuncList, inputData=dataForInput, outputData=desiredFuncOutput, 
         populationStringList=POpulation) 
{ 
    #rmse <- sqrt(mean((sim - obs)^2)) 
    for(i in 1:length(populationStringList)) 
    { 
    stringFunc<-populationStringList[[i]] 
    total<-as.numeric(length(inputData)) 
    topTenPercentFunctionList<-list() 
    topTenPercentRMSEList<-list() 
    topTenPercentStringFunctionList<-list() 
    tempFunc<-function(x){x} 
    for(z in 1:length(inputData)) 
    { 
     total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]]))) 
     tempFunc<-populationFuncList[[i]] 
    } 
    rmse<-sqrt(mean(total^2)) 
    topTenPercentVal<-length(populationFuncList)*0.1 
    if(length(topTenPercentFunctionList)<topTenPercentVal||RMSE<min(topTenPercentRMSEList)) 
    { 
     topTenPercentStringFunctionList<-c(topTenPercentStringFunctionList,stringFunc) 
     topTenPercentRMSEList<-c(topTenPercentRMSEList, rmse) 
     topTenPercentFunctionList<-c(topTenPercentFunctionList, tempFunc) 
    } 
    } 
    return(topTenPercentStringFunctionList) 
} 
#Get random operator 
getRndOp<-function(seed) 
{ 
    set.seed(seed) 
    rndOpNum<-sample(1:length(operators),1,replace=T) 
    operation<-operators[[rndOpNum]] 
    return(operation) 
} 

#Mutation Operators 

#This attaches a new appendage to an organism 
endNodeMutation<-function(strFunc,seed) 
{ 
    op<-getRndOp(seed) 
    strFunc<-c(strFunc,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFunc<-c(strFunc,newAppendage) 
    return(strFunc) 
} 

#This is a mutation that occurs at a random locaiton in an organism 
rndNodeMutation<-function(strFunc,seed,secondSeed) 
{ 
    op<-getRndOp(seed) 
    halfStrFunc<-((length(strFunc))/2) 
    set.seed(seed) 
    randomStart<-sample(1:halfStrFunc,1,replace=T) 
    set.seed(secondSeed) 
    randomEnd<-2*(sample(1:length(halfStrFunc),1,replace=T)) 
    strFuncUpdate<-substr(strFunc,randomStart,randomEnd) 
    strFuncUpdate<-c(strFuncUpdate,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFuncUpdate<-c(strFuncUpdate,newAppendage) 
    return(strFuncUpdate) 
} 

#Crossover Operators 

#Crossover operator that attaches otherStrFunc to strFunc at the endpoint of strFunc 
crossoverConcatenationOperator<-function(strFunc,otherStrFunc) 
{ 
    newStrFunc<-c(strFunc,otherStrFunc) 
    return(newStrFunc) 
} 

#Crossover Operation that starts and ends at random points in the concatenation 
rndCrossoverOperation<-function(strFunc,otherStrFunc,seed,secondSeed) # fixed function name 
{ 
    set.seed(seed) 
    wholeLength<-(length(strFunc)+length(otherStrFunc)) 
    startRndNum<-sample(1:length(strFunc),1,replace=T) 
    set.seed(secondSeed) 
    endRndNum<-sample(length(strFunc):wholeLength,1,replace=T) 
    concatenatedFunc<-c(strFunc,otherStrFunc) 
    newFunc<-substr(concatenatedFunc,startRndNum,endRndNum) 
    return(newFunc) 
} 
evolve<-function(strFuncList=POpulation,tenPercentStrFuncList=bestDudes) 
{ 
    #Detach the bottom ninety percent to the top ten percent 
    evolveList<-strFuncList[!strFuncList %in% tenPercentStrFuncList] # fixed broken substring 
    #Get sizes. Will use a random mutation, then random crossover, then 
    #random mutation, then random crossover at percentages with 0.05,0.45,0.05,0.45 
    #respectively 
    size<-length(evolveList) 
    mutateNum<-0.1*size 
    crossoverNum<-0.9*size 
    halfMutateNum<-0.05*size 
    halfCrossoverNum<-0.45*size 
    roundedMutateNum<-floor(mutateNum) 
    roundedCrossoverNum<-floor(crossoverNum) 
    roundedHalfMutateNum<-floor(halfMutateNum) 
    roundedHalfCrossoverNum<-floor(halfCrossoverNum) 

    #Calls the functions for those percentage of organisms in that order 
    if(roundedHalfMutateNum < 1) roundedHalfMutateNum <- 1 
    for(i in 1:roundedHalfMutateNum) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTwo) # fixed case 
    evolveList[[i]]<-newFunc 
    } 
    for (i in roundedHalfMutateNum:(roundedHalfCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveList,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
    for(i in (roundedHalfCrossoverNum+roundedHalfMutateNum):(roundedHalfCrossoverNum+roundedMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTwo) 
    evolveList[[i]]<-newFunc 
    } 
    for(i in (roundedHalfCrossoverNum+roundedMutateNum):(roundedCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveList,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
} 

#Calculates the root mean squared of the functions in a string list. 
#Then sorts the list by RMSE. 
sortByRMSE<-function(strL) 
{ 
    for (z in 1:length(strL)) 
    { 
    for(i in 1:length(strL)) 
    { 
     nonStrFuncList<-lapply(strL,function(x){funCreator(x)}) 
     totalTwo<-c(totalTwo,(abs(nonStrFuncList[[z]](inputData[[i]])-outputData[[i]]))) 
    } 
    rmse<-sqrt(mean(totalTwo*totalTwo)) 
    strFuncsLists<-strL[order(sapply(strL, '[[', rmse))] 
    } 
    return(strFuncsLists) 
} 

#Data, Output Goal 
desiredFuncOutput<-list(1,4,9,16,25) 
dataForInput<-list(1,2,3,4,5) 

#Generate Initial Population 
POpulation<-genPopulation(4,1,1,funcs) 
POpulationFuncList <- lapply(setNames(POpulation,names(POpulation)),function(x){funCreator(x)}) 

#Get and save top ten percent in bestDudes 
bestDudes<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput,POpulation) 
#Evolve the rest 
NewBottomNinetyPercent<-evolve(POpulation,bestDudes) 
#Concatenate the two to make a new generation 
nextGen<-c(bestDudes,NewBottomNinetyPercent) 

#Declare lists, 
populationsBestTenStr<-list() 
populationsFuncList<-list() 

#Run ten generations. 
for(i in 1:10) 
{ 
    nextGen<-makeStrName(nextGen) 
    populationsFuncList<-lapply(setNames(nextGen,names(nextGen)),function(x){funCreator(x)}) 
    populationsBestTenStr<-evalPopulation(populationsFuncList,dataForInput,desiredFuncOutput,nextGen) 
    nextGen<-evolve(populations,populationsBestTenStr) 
} 

#Print the string versions of the five functions with the lowest RMSE evolved. 
byRMSEList<-sortByRMSE(populationsBestTenStr) 
for(i in 1:5) 
{ 
    byRMSEList[[i]] 
} 
+0

我有,在我的代碼。請參閱「#Data,輸出目標 desiredFuncOutput <-list(1,4,9,16,25) dataForInput <-list(1,2,3,4,5)」由於某些原因,它未複製到您的。固定mapStrFunc,感謝您的提示 –

+0

它剛好在我發佈的代碼中調用evalPopulation。 –

+0

@ novice-polymath好的我修復了另一個bug,現在它通過'evolve'工作,但我不知道你想用'substr'做什麼。它似乎在任意地截斷「調度」值。我看到了那條線的評論,但我真的不明白。你能幫我嗎?我必須去開會,但我會在今天晚些時候回來。 –