2015-11-03 57 views
1
require(plyr) 
library(reshape) 
library(iterators) 
library(parallel) 
library(foreach) 
library(doParallel)  

getCosine <- function(x,y) 
{ 
    this.cosine <- sum(x*y)/(sqrt(sum(x*x)) * sqrt(sum(y*y))) 
    return(this.cosine) 
} 

visitordata <- read.csv("~/Hotels.csv", sep = ",", header = TRUE , stringsAsFactors = FALSE) 
visitordata <- subset(visitordata, Product.Views > 0) 
head(visitordata) 

    Visitor_ID  Products Product.Views 
2 1001863689_3519696751 CZ1XQZ    2 
3 1001863689_3519696751 CZR3CN    1 
4 1001863689_3519696751 CZTNKN    3 
5 121021834007_98749174 CZ2LB0    1 
6 11029477426_678878300 CZTNKN    1 
7 21029477426_678878300 CZVDHR    1 

ColumnBasedData <- reshape(visitordata, idvar="Visitor_ID", timevar="Products", direction="wide") 

ColumnBasedData[is.na(ColumnBasedData)] <- 0 

x <<- (ColumnBasedData[,!(names(ColumnBasedData) %in% c("Visitor_ID"))]) 
head(x) 

    Product.Views.CZ1XQZ Product.Views.CZR3CN Product.Views.CZTNKN Product.Views.CZVDHR Product.Views.CZ36D3 Product.Views.CZE0EN 
2      1     1     1     0     0     0 
6      0     0     1     1     0     0 
9      0     0     0     0     1     1 
24     0     0     0     0     0     0 
37     0     0     0     0     0     0 
40     0     0     0     0     0     0 

holder <- matrix(NA, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x))) 

dataframe_y <<- as.data.frame(holder) 



cl<-makeCluster(detectCores() -1) 
doParallel::registerDoParallel(cl) 

ls <- foreach(i = 1:ncol(x)) %dopar% { 
for(j in 1:ncol(x)) { 
    dataframe_y[i,j] <- getCosine(x[i],x[j]) 
} 
} 
stopCluster(cl) 

write.csv(dataframe_y,file="~/cosine.csv") 

它與%do%%dopar%沒有做返回任何變量。用%dopar%,dataframe_y返回null。任何想法?與dopar foreach循環不中的R

編輯:圖書館,功能,數據示例。 我將用大數據進行處理,所以我試圖使用並行處理。腳本需要超過一天才能完成並行處理。

+1

提供[最小可重現示例](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example)。你在用'parallel'嗎?什麼是getCosine?什麼是'x'? – m0nhawk

+0

@ m0nhawk我編輯了這個問題。希望這個版本可以。感謝您的評論。 –

+1

檢查這個問題..讓我們知道它是否解決了你的問題:http://stackoverflow.com/questions/33081342/foreach-parallel-loop-returns-nas – 2015-11-03 09:06:54

回答

0

非常感謝大家。嵌套的Foreach爲我工作。看看我在下面改變了什麼。

ls <- 
    foreach(i = 1:ncol(x), .combine = rbind) %:% 
    foreach(j = 1:ncol(x), .combine=cbind) %dopar% { 
    dataframe_y[i,j] <- getCosine(x[i],x[j]) 
    } 

holder <- matrix(ls, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x))) 
dataframe_y <<- as.data.frame(holder)