2014-04-10 122 views
2

我正在使用jvmr在Scala中運行R.我不知道如何通過傳遞參數從scala調用R函數。使用JVMR從Scala調用R函數

附在下面的斯卡拉文件:

package org.scala.rtest 

import org.ddahl.jvmr.RInScala 

object RIntegration { 
    def main(args: Array[String]) { 
     val R = RInScala() 
     R>""" 
      score.sentiment = function (sentences, pos.words,neg.words, .progress='none') 
       { 
        require(plyr) 
        require(stringr) 

        scores = laply(sentences, function(sentence,pos.words,neg.words){ 

       sentence = gsub('[[:punct:]]','',sentence) 
       sentence = gsub('[[:cntrl:]]','',sentence) 
       sentence = gsub('\\d+','',sentence) 
       sentence = tolower(sentence) 

       word.list = str_split(sentence, '\\s+') 
       words = unlist(word.list) 

       pos.matches = match(words, pos.words) 

       neg.matches = match(words, neg.words) 

       pos.matches = !is.na(pos.matches) 

       neg.matches = !is.na(neg.matches) 

       score = sum(pos.matches) - sum (neg.matches) 

       return(score) 

       },pos.words, neg.words, .progress = .progress) 

       scores.df = data.frame(score=scores, text=sentences) 

       return(scores.df) 

      } 

     pos_words = scan("C:\\Users\\servicelyadmin\\Downloads\\opinion-lexicon-English\\positive-words.txt",what="character",comment.char=";") 
     neg_words = scan("C:\\Users\\servicelyadmin\\Downloads\\opinion-lexicon-English\\negative-words.txt",what="character",comment.char=";") 
     tweetsdata = scan("C:\\Users\\servicelyadmin\\Downloads\\opinion-lexicon-English\\twitterstream1.txt",what="character") 
     analysis = score.sentiment(tweetsdata,pos_words,neg_words) 

     """ 
     println(R.capture("analysis")) 

     } 
} 

這上面的代碼工作完全正常,並打印輸出。但我需要通過將值傳遞給該函數來移動以下提示之外的函數調用。我不知道如何實現這一點。

val pos_words = R.apply("scan('positive-words.txt',what='character',comment.char=';')") 
     val neg_words = R.apply("scan('negative-words.txt',what='character',comment.char=';')") 
     val tweetsdata = R.apply("scan('twitterstream1.txt',what='character')") 
     val analysis = R.eval("score.sentiment('tweetsdata','pos_words','neg_words')") 
println(R.capture("analysis")) 

任何幫助將不勝感激。

感謝, 爬完Venugopal

回答

0

我能得到它jvmr工作。

package org.scala.rtest 

import org.ddahl.jvmr.RInScala 

object RIntegration { 
    def main(args: Array[String]) { 
     val R = RInScala() 
     R>""" 
      require(sparkR) 

      score.sentiment = function(sentences, pos.words, neg.words, .progress='none') 
       { 
        require(plyr) 
        require(stringr) 


        scores = laply(sentences, function(sentence, pos.words, neg.words) { 

        # clean up sentences with R's regex-driven global substitute, gsub(): 

        sentence = gsub('[[:punct:]]', '', sentence, ignore.case=T) 

        sentence = gsub('[[:cntrl:]]', '', sentence, ignore.case=T) 

        sentence = gsub('\\d+', '', sentence, ignore.case=T) 

        # and convert to lower case: 

        sentence = tolower(sentence) 

        # split into words. str_split is in the stringr package 

        word.list = str_split(sentence, '\\s+') 

        # sometimes a list() is one level of hierarchy too much 

        words = unlist(word.list) 

        # compare our words to the dictionaries of positive & negative terms 

        pos.matches = match(words, pos.words) 
        neg.matches = match(words, neg.words) 

        # match() returns the position of the matched term or NA 
        # we just want a TRUE/FALSE: 

        pos.matches = !is.na(pos.matches) 

        neg.matches = !is.na(neg.matches) 

        # and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum(): 

        score = sum(pos.matches) - sum(neg.matches) 

        return(score) 

        }, pos.words, neg.words, .progress=.progress) 
        scores.df = data.frame(score=scores, text=sentences) 
        return(scores.df) 
       } 


     """ 

     R(" x <- scan('positive-words.txt',what='character',comment.char=';')") 
     R(" y <- scan('negative-words.txt',what='character',comment.char=';')") 
     R(" z <- scan('twitterstream1.txt', what='character')") 

     R.eval("df <- score.sentiment(z,x,y)") 
     println(R.capture("df")) 

     } 
}