2014-01-12 26 views
0

我有一個函數,我用它來翻譯令牌基於鍵/值字典。Mantain用戶定義的元數據與定製函數爲tm_map

dictionary <- c("casa", "barco", "carro", "arbol") 
names(dictionary) <- c("home", "boat", "car", "tree") 

translate2 <- function (text, dictionary) { 
    text_out <- character(0) 
    for (i in 1:length(text)) { 
    text.split <- strsplit(text[i], "\\s") 
    translation <- dictionary[unlist(text.split)] 
    text_out <- append(text_out, paste(translation, sep="", collapse=" ")) 
    } 
    PlainTextDocument(text_out, id = ID(text), author = Author(text)) 
} 

此功能工作正常的元`作者:

library(tm) 

text <- "My car is on the tree next to my home under the boat" 
corpus <- Corpus(VectorSource(text)) 
meta(corpus, "Author", type="local") <- "Kant" 
meta(corpus, "TextID", type="local") <- "121212" 
meta(corpus[[1]], "Author") 
# [1] "Kant" 

corpus <- tm_map(corpus, translate2, dictionary) 
meta(corpus[[1]], "Author") 
# [1] "Kant" 
corpus[[1]] 
# NA carro NA NA NA arbol NA NA NA casa NA NA barco 

但是,當我試圖通過用戶定義的元樣TextID與功能的略微修改後的版本

translate1 <- function (text, dictionary) { 
    text_out <- character(0) 
    for (i in 1:length(text)) { 
    text.split <- strsplit(text[i], "\\s") 
    translation <- dictionary[unlist(text.split)] 
    text_out <- append(text_out, paste(translation, sep="", collapse=" ")) 
    } 
    PlainTextDocument(text_out, id = ID(text), author = Author(text), 
        TextID = TextID(text)) 
} 

我得到

text <- "My car is on the tree next to my home under the boat" 
corpus <- Corpus(VectorSource(text)) 
meta(corpus, "Author", type="local") <- "Kant" 
meta(corpus, "TextID", type="local") <- "121212" 
meta(corpus[[1]], "Author") 
# [1] "Kant" 
meta(corpus[[1]], "TextID") 
# [1] "121212" 

corpus <- tm_map(corpus, translate1, dictionary) 
# Error in PlainTextDocument(text_out, id = ID(text), author = Author(text), : 
#        unused argument (TextID = TextID(text)) 

回答

0

有你的方法的幾個問題:

  1. PlainTextDocument沒有參數TextID(這引起了你的錯誤)
  2. 沒有名爲TextID

?PlainTextDocument功能,似乎你正在尋找的論點叫做localmetadata

這裏有一個版本的translate1似乎按預期方式工作:

translate1 <- function (text, dictionary) { 
    text_out <- character(0) 
    for (i in 1:length(text)) { 
    text.split <- strsplit(text[i], "\\s") 
    translation <- dictionary[unlist(text.split)] 
    text_out <- append(text_out, paste(translation, sep="", collapse=" ")) 
    } 
    PlainTextDocument(text_out, id = ID(text), author = Author(text), 
        localmetadata = list(TextID = meta(text, "TextID"))) 
} 

text <- "My car is on the tree next to my home under the boat" 
corpus <- Corpus(VectorSource(text)) 
meta(corpus, "Author", type="local") <- "Kant" 
meta(corpus, "TextID", type="local") <- "121212" 
meta(corpus[[1]], "Author") 
# [1] "Kant" 
meta(corpus[[1]], "TextID") 
# [1] "121212" 

corpus <- tm_map(corpus, translate1, dictionary) 
meta(corpus[[1]], "Author") 
# [1] "Kant" 
meta(corpus[[1]], "TextID") 
# [1] "121212" 
corpus[[1]] 
# NA carro NA NA NA arbol NA NA NA casa NA NA barco