我正嘗試使用R中的RTextTools庫創建文本分類器。訓練和測試數據框格式相同。它們都由兩列組成:第一列是文本,第二列是標籤。R - 如何調整使用RTextTools創建的文本分類器
# Packages
## Install
install.packages('e1071', 'RTextTools')
## Import
data.train <- data.frame("content" = c("Lorem Ipsum is simply dummy text of the printing and typesetting industry.", "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book.", "It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged."), "label" = c("yes", "yes", "no"))
data.test <- data.frame("content" = c("It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout.", "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English.", "Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy."), "label" = c("no", "yes", "yes"))
# Process training dataset
data.train.dtm <- create_matrix(data.train$content, language = "english", weighting = tm::weightTfIdf, removePunctuation = TRUE, removeNumbers = TRUE, removeSparseTerms = 0, removeStopwords = TRUE, stemWords = TRUE, stripWhitespace = TRUE, toLower = TRUE)
data.train.container <- create_container(data.train.dtm, data.train$label, trainSize = 1:nrow(data.train), virgin = FALSE)
# Create linear SVM model
model.linear <- train_model(data.train.container, "SVM", kernel = "linear", cost = 10, gamma = 1^-2)
# Process testing dataset
data.test.dtm <- create_matrix(data.test$content, originalMatrix = data.train.dtm)
data.test.container <- create_container(data.test.dtm, labels = rep(0, nrow(data.test)), testSize = 1:nrow(data.test), virgin = FALSE)
# Classify testing dataset
model.linear.results <- classify_model(data.test.container, model.linear)
model.linear.results.table <- table(Predicted = model.linear.results$SVM_LABEL, Actual = data.test$label)
model.tuned <- tune.svm(label~., data = as.data.frame(data.train.dtm), gamma = 10^(-6:-1), cost = 10^(-1:1))