2016-03-26 169 views
0

----大家好,我的問題是我有下一個獲獎組合和3張「門票」。R - 矢量化功能

winner <- c("L","L",rep("X",12)) 

[1] "L" "L" "X" "X" "X" "X" "X" "X" "X" "X" "X" "X" "X" "X" 

combinations 

    Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 
1 L L L L L L L L L  L  L  L  L  L 
2 X L L L L L L L L  L  L  L  L  L 
3 V L L L L L L L L  L  L  L  L  L 

dput(combinations) 

structure(list(Var1 = structure(1:3, .Label = c("L", "X", "V" 
), class = "factor"), Var2 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var3 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var4 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var5 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var6 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var7 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var8 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var9 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var10 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var11 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var12 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var13 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor"), Var14 = structure(c(1L, 1L, 1L), .Label = c("L", 
"X", "V"), class = "factor")), .Names = c("Var1", "Var2", "Var3", 
"Var4", "Var5", "Var6", "Var7", "Var8", "Var9", "Var10", "Var11", 
"Var12", "Var13", "Var14"), out.attrs = structure(list(dim = c(3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), dimnames = structure(list(
    Var1 = c("Var1=L", "Var1=X", "Var1=V"), Var2 = c("Var2=L", 
    "Var2=X", "Var2=V"), Var3 = c("Var3=L", "Var3=X", "Var3=V" 
    ), Var4 = c("Var4=L", "Var4=X", "Var4=V"), Var5 = c("Var5=L", 
    "Var5=X", "Var5=V"), Var6 = c("Var6=L", "Var6=X", "Var6=V" 
    ), Var7 = c("Var7=L", "Var7=X", "Var7=V"), Var8 = c("Var8=L", 
    "Var8=X", "Var8=V"), Var9 = c("Var9=L", "Var9=X", "Var9=V" 
    ), Var10 = c("Var10=L", "Var10=X", "Var10=V"), Var11 = c("Var11=L", 
    "Var11=X", "Var11=V"), Var12 = c("Var12=L", "Var12=X", "Var12=V" 
    ), Var13 = c("Var13=L", "Var13=X", "Var13=V"), Var14 = c("Var14=L", 
    "Var14=X", "Var14=V")), .Names = c("Var1", "Var2", "Var3", 
"Var4", "Var5", "Var6", "Var7", "Var8", "Var9", "Var10", "Var11", 
"Var12", "Var13", "Var14"))), .Names = c("dim", "dimnames")), row.names = c(NA, 
3L), class = "data.frame") 

門票價格是showen未來

price 

    Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 
1 2.48 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 
2 3.28 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 
3 3.16 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 

dput(price) 

structure(list(Var1 = c(2.48, 3.28, 3.16), Var2 = c(1.79, 1.79, 
1.79), Var3 = c(2.99, 2.99, 2.99), Var4 = c(2.31, 2.31, 2.31), 
    Var5 = c(4.03, 4.03, 4.03), Var6 = c(2.1, 2.1, 2.1), Var7 = c(2.71, 
    2.71, 2.71), Var8 = c(2.22, 2.22, 2.22), Var9 = c(2.7, 2.7, 
    2.7), Var10 = c(2.94, 2.94, 2.94), Var11 = c(2.01, 2.01, 
    2.01), Var12 = c(2.16, 2.16, 2.16), Var13 = c(3.41, 3.41, 
    3.41), Var14 = c(2.16, 2.16, 2.16)), .Names = c("Var1", "Var2", 
"Var3", "Var4", "Var5", "Var6", "Var7", "Var8", "Var9", "Var10", 
"Var11", "Var12", "Var13", "Var14"), out.attrs = structure(list(
    dim = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L), dimnames = structure(list(Var1 = c("Var1=2.48", "Var1=3.28", 
    "Var1=3.16"), Var2 = c("Var2=1.79", "Var2=4.04", "Var2=4.55" 
    ), Var3 = c("Var3=2.99", "Var3=3.38", "Var3=2.54"), Var4 = c("Var4=2.31", 
    "Var4=3.39", "Var4=3.36"), Var5 = c("Var5=4.03", "Var5=3.22", 
    "Var5=2.14"), Var6 = c("Var6=2.10", "Var6=3.77", "Var6=3.60" 
    ), Var7 = c("Var7=2.71", "Var7=3.22", "Var7=2.93"), Var8 = c("Var8=2.22", 
    "Var8=3.56", "Var8=3.47"), Var9 = c("Var9=2.70", "Var9=3.66", 
    "Var9=2.65"), Var10 = c("Var10=2.94", "Var10=3.30", "Var10=2.65" 
    ), Var11 = c("Var11=2.01", "Var11=3.61", "Var11=4.09"), Var12 = c("Var12=2.16", 
    "Var12=3.15", "Var12=4.19"), Var13 = c("Var13=3.41", "Var13=3.43", 
    "Var13=2.27"), Var14 = c("Var14=2.16", "Var14=3.78", "Var14=3.43" 
    )), .Names = c("Var1", "Var2", "Var3", "Var4", "Var5", "Var6", 
    "Var7", "Var8", "Var9", "Var10", "Var11", "Var12", "Var13", 
    "Var14"))), .Names = c("dim", "dimnames")), row.names = c(NA, 
3L), class = "data.frame") 

我做了下一個公式來計算某一票的價格(這是一個不切實際的價格,但我的書上說這一點)

myfunction2 <- function(x,y){ 
ifelse(sum((x==winner))>=2,prod(((x==winner)*y)+((x==winner)<=0)*1),return(1)) 
          } 

    myfunction2(combinations[1,],price[1,]) 
    [1] 4.4392 

    myfunction2(combinations[2,],price[2,]) 
    [1] 1.0000 

    myfunction2(combinations[3,],price[3,]) 
    [1] 1.0000 

公式是正確的,在第一種情況下價格是(2.48 * 1.79)= 4.4392,在第二和第三種情況下答案是1,因爲我需要至少兩個正確的答案來支付更多的費用1.

當我嘗試「矢量化」的公式,答案是不正確

R3 <- apply(combinations,1,myfunction2,y=price) 

R3 
    1  2  3 
    595.0378 1.00000 1.00000 

我能解決一個for循環的問題,但我有3^14組合,這是非常緩慢的,我嘗試用mapply,但我有同樣的問題(不正確的答案)

歡迎任何幫助,非常感謝你

+0

您的代碼與您的結果不符。因爲第二行和第三行沒有兩個匹配,所以它們應該是0,而不是你列出的那一個。請編輯您的問題,修正您的代碼,更改您的數據以使其更易於使用(例如,使用'dput(price)'以及其他變量)。 (順便說一下,'prod(...)'中的所有內容都可以通過'ifelse(...)'處理得更容易。) – r2evans

+0

@ r2evans是的,對不起,我錯了。我不知道函數'pdut()',謝謝你的幫助 – user63192

回答

1

一個你可以做弄清楚爲什麼apply方法是行不通的事情是手動插入browser()在你的函數中,看看它實際上看到的是什麼參數。使用你的數據從上面,這裏是你的功能與附加線:

myfunction2 <- function(x,y){ 
    browser() 
    if (sum((x==winner))>=2) { 
    return(prod(((x == winner) * price) + ((x == winner) == 0) * 1)) 
    } else { 
    return(1) 
    } 
} 

apply(combinations,1,myfunction2,price) 
# Called from: FUN(newX[, i], ...) 
# debug at #3: if (sum((x == winner)) >= 2) { 
#  return(prod(((x == winner) * price) + ((x == winner) == 0) * 
#   1)) 
# } else { 
#  return(1) 
# } 
# Browse[2]> 
x 
# Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 
# "L" "L" "L" "L" "L" "L" "L" "L" "L" "L" "L" "L" "L" "L" 

到目前爲止好。

# Browse[2]> 
y 
# Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 
# 1 2.48 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 
# 2 3.28 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 
# 3 3.16 1.79 2.99 2.31 4.03 2.1 2.71 2.22 2.7 2.94 2.01 2.16 3.41 2.16 

有你的問題的一部分:它是在每次調用看到price所有,和你的功能不知道要檢查這個。

順便說一句:你誤讀我的使用ifelse的建議。雖然你使用它的方式正在工作,但它是不正確的,並會在稍後咬你。我建議你谷歌ifelse和傳統if ... else之間的差異。長話短說:ifelse需要假定它的三個參數的長度是相同的(或容易回收),所以當你測試一件事情時(sum(...)>=2,它的第一個參數),那麼你應該使用if ... else

此外,由於combinations[1,]在技術上返回data.frame,因此您應該將其取消並與其他事物保持一致。 (同爲price)認爲這是一種替代的功能:

myfunction3 <- function(x,y){ 
    i <- (unlist(x) == winner) 
    if (sum(i) >= 2) prod(ifelse(i, unlist(y), 1)) else 1 
} 
myfunction3(combinations[1,], price[1,]) 
# [1] 4.4392 
myfunction3(combinations[2,], price[2,]) 
# [1] 1 
myfunction3(combinations[3,], price[3,]) 
# [1] 1 

最後,我避免side-effectswiki)的超級粉絲。類似於此的東西超出了函數的作用域(wiki),以獲得父環境或名稱空間中的變量。雖然它有效,但它可能有問題。我將通過在函數參數中傳遞值winner來解決此問題。

myfunction4 <- function(x,y,w){ 
    i <- (unlist(x) == w) 
    if (sum(i) >= 2) prod(ifelse(i, unlist(y), 1)) else 1 
} 

回到原來的問題一次獲得所有price。儘管肯定有可能通過使用mapply來解決這個問題,但我認爲這是*apply函數中唯一可以很容易地應用於這個問題的函數,即使這樣,它也需要對數據框架進行一些按摩和轉換。相反,我建議使用這樣的事情:

myfunction5 <- function(comb, pr, win) { 
    i <- (unlist(comb) == win) 
    if (sum(i) >= 2) prod(ifelse(i, unlist(pr), 1)) else 1 
} 

最後,我們能夠做這樣的事:

sapply(1:nrow(combinations), 
     function(i, comb, pr, win) myfunction5(comb[i,], pr[i,], win), 
     combinations, price, winner) 
# [1] 4.4392 1.0000 1.0000 

在這種情況下,i僅僅是內combinations和索引的行price。雖然這看起來很迂腐,但完全自給自足的寫作功能(不會到達其空間之外的未提供給它的變量)通常會產生更強大,更「防禦性」的程序。

注意:這一切仍然在這裏躲着一個巨大的潛在問題:真的是有風險的做法是使用apply(x, 1, ...)x是一個data.frame。雖然它在這裏工作(只是因爲你的每個data.frames是完全同類的),但它會在你最不期待的時候咬你。問題是data.frames允許你添加一個與其他類不同的類,所以如果你爲任何原因在你的price變量中添加了一列字符串,這些都不會起作用。

另外,如果你沒有一些理智檢查,這往往會分解一點。例如,強制執行combinationsprice必須具有相同尺寸的隱式要求,而winner必須與其他兩個列的長度相同。

+0

哇,你的回答非常好,謝謝你的一切。 – user63192