2015-08-21 53 views
3

我想在R中編寫基本的Vanilla TrueSkill (3.1)算法,但我得到了一些奇怪的結果。在R的TrueSkill實現

我的代碼如下:被稱爲

# A simple test between two players repeatedly laying one another 
betaSq = 0.1 
obs = 10000 

p1_skills = 0.333 
p2_skills = 0 

p1_draws = rnorm(obs, p1_skills, sqrt(betaSq)) 
p2_draws = rnorm(obs, p2_skills, sqrt(betaSq)) 

p1_pred_mu = rep(NA, obs+1) 
p1_pred_sigmaSq = rep(NA, obs+1) 
p2_pred_mu = rep(NA, obs+1) 
p2_pred_sigmaSq = rep(NA, obs+1) 

# Initial values 
p1_pred_mu[1] = 0 
p1_pred_sigmaSq[1] = 1 
p2_pred_mu[1] = 0 
p2_pred_sigmaSq[1] = 1 

results = p1_draws > p2_draws 
probs = rep(NA, obs) 

# Run TrueSkill 
for (i in seq(2,obs+1)) { 
    probs[i-1] = predictProb(p1_pred_mu[i-1], p1_pred_sigmaSq[i-1], p2_pred_mu[i-1], p2_pred_sigmaSq[i-1], betaSq) 
    out = updateSkill(p1_pred_mu[i-1], p1_pred_sigmaSq[i-1], p2_pred_mu[i-1], p2_pred_sigmaSq[i-1], betaSq, results[i-1]) 

    # Now update based on the out 
    p1_pred_mu[i] = out$mu1 
    p1_pred_sigmaSq[i] = out$sigmaSq1 
    p2_pred_mu[i] = out$mu2 
    p2_pred_sigmaSq[i] = out$sigmaSq2 
} 

# Output results 
dev.new() 
mu = p1_pred_mu 
lower = qnorm(0.05, p1_pred_mu, p1_pred_sigmaSq) 
upper = qnorm(0.95, p1_pred_mu, p1_pred_sigmaSq) 
plot(mu, ylim = c(min(lower), max(upper)), main = "p1") 
lines(lower) 
lines(upper) 

dev.new() 
mu = p2_pred_mu 
lower = qnorm(0.05, p2_pred_mu, p2_pred_sigmaSq) 
upper = qnorm(0.95, p2_pred_mu, p2_pred_sigmaSq) 
plot(mu, ylim = c(min(lower), max(upper)), main = "p2") 
lines(lower) 
lines(upper) 

a = filter(probs, rep(1, 20))/20 
dev.new() 
plot(a) 

print(sprintf("Mean p1: %g", mean(p1_pred_mu))) 
print(sprintf("Mean p2: %g", mean(p2_pred_mu))) 
print(sprintf("Mean results: %g", mean(results))) 
print(sprintf("Mean predicted results: %g", mean(probs))) 

的功能有:

# Functions 
updateSkill <- function(mu1, sigmaSq1, mu2, sigmaSq2, betaSq, result) { 
    # http://papers.nips.cc/paper/3331-trueskill-through-time-revisiting-the-history-of-chess.pdf 
    c = 2*betaSq + sigmaSq1 + sigmaSq2 

    if (result == 1) { 
    # Player 1 wins 
    v = dnorm((mu1-mu2)/c)/pnorm((mu1-mu2)/c) 
    w = v*(v+(mu1-mu2)/c) 

    mu1 = mu1 + (sigmaSq1/c)*v 
    mu2 = mu2 - (sigmaSq2/c)*v 

    sigmaSq1 = sigmaSq1 * sqrt(1 - (sigmaSq1/c^2)*w) 
    sigmaSq2 = sigmaSq2 * sqrt(1 - (sigmaSq2/c^2)*w) 
    } else if (result == 0) { 
    # Player 2 wins 
    v = dnorm((mu2-mu1)/c)/pnorm((mu2-mu1)/c) 
    w = v*(v+(mu2-mu1)/c) 

    mu1 = mu1 - (sigmaSq1/c)*v 
    mu2 = mu2 + (sigmaSq2/c)*v 

    sigmaSq1 = sigmaSq1 * sqrt(1 - (sigmaSq1/c^2)*w) 
    sigmaSq2 = sigmaSq2 * sqrt(1 - (sigmaSq2/c^2)*w)  
    } 

    return(list(mu1=mu1, mu2=mu2, sigmaSq1=sigmaSq1, sigmaSq2=sigmaSq2)) 
} 

predictProb <- function(mu1, sigmaSq1, mu2, sigmaSq2, betaSq) { 
    # Try to predict the probability of player 1 beating player 2 using Trueskill model 
    mean1 = mu1 
    mean2 = mu2 
    var1 = sigmaSq1 + betaSq 
    var2 = sigmaSq2 + betaSq 

    # Now the dist of player1 - player2 is N(mean1 - mean2, sqrt(var1 + var2)) 
    prob1Wins = pnorm(0, mean2 - mean1, sqrt(var1 + var2)) 

    return(prob1Wins) 
} 

我恨張貼了大量的代碼,斑點,但我真的想不通,事情會出錯。

該程序運行並且預測技能(分佈到N(mu,sigma))會聚。但是他們所預測的概率並沒有收斂到結果的真實概率!

樣本輸出是:

[1] "Mean p1: 0.0762161" 
[1] "Mean p2: -0.0762161" 
[1] "Mean results: 0.7733" 
[1] "Mean predicted results: 0.631424" 

任何想法是怎麼回事?

+0

我懷疑這是它,但如果你共享代碼請注意,您不能在定義之前使用函數。重新組織代碼,以便在調用之前定義的函數在被調用之前被定義 – Dason

+0

是的,它們實際上是在另一個文件中並且我來源。我會做得更清楚。 – rwolst

回答

0

這不工作的原因是因爲在updateSkills 3號線的功能應該讀

c = sqrt(2*betaSq + sigmaSq1 + sigmaSq2) 

c = 2*betaSq + sigmaSq1 + sigmaSq2