2017-08-01 22 views
0

我想從我的系統上傳文件夾Shiny App以得到Corpus的Document Term Matrix以應用K-means
我嘗試了各種方法來做到這一點,但我無法建立所有上傳文件之間建立連接以創建語料庫。
我能夠通過在全球環境中創建語料庫來應用K-means,但是我想通過ShinyApp上傳文件夾或選擇多個文件來完成此操作。如何上傳ShinyApp中的文本文件夾以獲取R中文件語料庫中的文檔術語矩陣?

下面是我做了什麼至今代碼:我可以上傳多個文件

library(shiny) 
library(shinydashboard) 
library(shinythemes) 
library(shinyFiles) 
library(tm) 

ui <- dashboardPage(
    dashboardHeader(title = "Document_Clustering"), 
    dashboardSidebar( 
    sidebarMenu(
     menuItem("Data Processing", tabName = "DP", icon = icon("info-circle")), 
     menuItem("K-Means", tabName = "KMeans", icon = icon("th")) 
)), 
    dashboardBody(
    tabItems(
     tabItem(tabName = "DP", 
     fluidRow(
      box(fileInput('file1', 'Choose Files', 
         accept=c('text/csv', 
           'text/comma-separated-values,text/plain', 
           '.csv'), multiple = TRUE) 
      , solidHeader = TRUE)) 
    ,fluidRow(
    box(title = "Pre-processing", width = 15 ,tableOutput('proc')) 
) 

), 


    tabItem(tabName = "KMeans", 
      fluidRow(
      box(
       title = "Enter Number of Clusters:", 
       selectInput("C", choices =c(seq(1 , 15, 1)),label = NULL ,selected = 1), solidHeader = TRUE 
      )), 
      fluidRow(box(title = "Cluster", width = 9, textOutput("cluster1"))), 
      fluidRow(box(title = "Cluster Size", width = 9, textOutput("size1"))), 
      fluidRow(box(title= "Between Cluster Hetrogeneity" , width=9, textOutput("hetro1"))) 

) 
))) 

server <- shinyServer(function(input, output, session){ 
    myData <- reactive({ 
    inFile <- input$file1 
    if (is.null(inFile)) return(NULL) 

con<- file(inFile$datapath, open="rt", encoding = "UTF-8") 
text<-readLines(con) 
msg<- paste(text, collapse = "\n") 
close(con) 
msg<- msg 


myCorpus <- Corpus(VectorSource(msg)) 
myCorpus <- tm_map(myCorpus, tolower) 
myCorpus <- tm_map(myCorpus, PlainTextDocument) 
myCorpus<- tm_map(myCorpus,removePunctuation) 
myCorpus <- tm_map(myCorpus, removeNumbers) 
myCorpus <- tm_map(myCorpus, removeWords,stopwords("english")) 
myCorpus <- tm_map(myCorpus, stripWhitespace) 
dtm <- DocumentTermMatrix(myCorpus,control = list(minWordLength = 1)) 
dtm_tfxidf <- weightTfIdf(dtm) 
m11 <- as.matrix(dtm_tfxidf) 
ri <- m11 


set.seed(1234) 
### Only kmeans 
n2 <- input$C 
clusk <- kmeans(as.data.frame(ri), n2) #, nstart = 9) 

T3<- list(Name= m11, Cluster_K=clusk$cluster, Size_K= clusk$size, Hetro_K=clusk$betweenss/clusk$totss*100) 
    }) 

    output$proc <- renderTable({ 
    myData()$Name 
    }) 

    output$cluster1 <- renderText({ 
    myData()$Cluster_K 

    }) 

    output$size1 <- renderText({ 
    myData()$Size_K 

    }) 

    output$hetro1 <- renderText({ 
    myData()$Hetro_K 
    }) 

    }) 

shinyApp(ui= ui, server = server) 

使用上面的代碼,但我在它進一步加工得到錯誤。 錯誤:我無法解決無效的'description'參數
此外,當我只上傳單個文件,然後一切似乎工作,但我沒有得到爲什麼羣集大小爲2的kmeans爲單個文件。

任何形式的幫助,非常感謝。
在此先感謝!

回答

0

我們無法使用某些功能連接所有文件,並且該代碼中缺少該功能。

爲了使ShinyApp做工精細,更改低於服務器部分

替換此

con<- file(inFile$datapath, open="rt", encoding = "UTF-8") 
text<-readLines(con) 
msg<- paste(text, collapse = "\n") 
close(con) 
msg<- msg 

myCorpus <- Corpus(VectorSource(msg)) 

有了這個

get.msg <- function(path) 
{ 
    con <- file(path, open = "rt", encoding = "latin1") 
    text <- readLines(con) 
    msg <- text[seq(which(text == "")[1] + 1, length(text), 1)] 
    close(con) 
    return(paste(msg, collapse = "\n")) 
} 

data.docs <- inFile$datapath 
data.docs <- data.docs[which(data.docs != "cmds")] 
all.data <- sapply(data.docs, 
        function(p) get.msg(file.path(p))) 

myCorpus <- Corpus(VectorSource(all.data)) 
相關問題