1
我編寫了一些代碼,使用iris
數據集手工製作樸素貝葉斯分類器。我做了以下:R中的樸素貝葉斯分類 - 從頭開始
- 將數據分成3類
- 計算均值和方差爲每一類
使用dnorm
set.seed(1) #reproducibility
training_rows <- sort(c(sample(1:50, 40), sample(51:100, 40), sample(101:150, 40)))
training_x <- as.data.frame(iris[training_rows, 1:4])
training_y <- iris[training_rows, 5]
iris_nb <- function(x, trainx, trainy){
train <- cbind(trainx, trainy)
class_virginica <- train[which(train$trainy == 'virginica'),]
class_setosa <- train[which(train$trainy == 'setosa'),]
class_versicolor <- train[which(train$trainy == 'versicolor'),]
posterior <- function(x, classtype){
p_Sepal.Length <- dnorm(x, mean(classtype[,1]), sd(classtype[,1]))
p_Sepal.Width <- dnorm(x, mean(classtype[,2]), sd(classtype[,2]))
p_Petal.Length <- dnorm(x, mean(classtype[,3]), sd(classtype[,3]))
p_Petal.Width <- dnorm(x, mean(classtype[,4]), sd(classtype[,4]))
vec <- 0.33* p_Sepal.Length * p_Sepal.Width * p_Petal.Length * p_Petal.Width #for each species
return(vec)
}
return(list(virginica = sum(posterior(x, class_virginica)),
setosa = sum(posterior(x, class_setosa)),
versicolor = sum(posterior(x, class_versicolor))))
}
這裏是輸出:
test_case_1 <- as.matrix(iris[1, 1:4])
iris_nb(test_case_1, training_x, training_y)
## $virginica
## [1] 1.167108e-16
## $setosa
## [1] 2.136291e-54
## $versicolor
## [1] 1.636154e-32
我感謝幫助!