2017-04-24 52 views
0

這是我用R編寫的網頁代碼,效果很好。網絡掃描後保存.txt文件沒有空白

require(httr) 
require(rvest) 
library(KoNLP) 
useSejongDic() 


total = c() 
site = "http://movie.daum.net/moviedb/grade? 
movieId=92107&type=netizen&page=" 

for(i in 1:10){ 
    url = paste0(site, i); 
    mention = GET(url) %>% read_html() %>% 
    html_nodes("p.desc_review") %>% 
    html_text() %>% 
    repair_encoding(); 
    total = rbind(total, mention); 
} 


total = gsub("\t","",total) 
total = gsub("\n","",total) 

text1=extractNoun(total) 
noun= sapply(total,extractNoun,USE.NAMES = FALSE) 
undata= unlist(noun) 
write(undata,"C:\\Users\\dlgof\\DATA.txt") 

但是,在txt文件中有太多的空格,我該如何刪除它們?

enter image description here

回答

0

看起來你undata有很多的 「」。添加

undata = undata[undata != " "] 

在寫之前。

+0

謝謝!!!!!我爲這兩天掙扎着,終於有效 – LEE