將Gamma分佈擬合到R中的數據我有一個數據集,它也包括家庭收入數據,我必須使用最大似然估計來擬合這個數據的Gamma分佈。具體來說,我們需要使用包優化,而不是fitdistr。所以這是我的代碼:使用optim,ML
t1 <- sum(log(newdata$faminc))
t2 <- sum(newdata$faminc)
obs <- nrow(newdata)
lh.gamma <- function(par) {
-((par[1]-1)*t1 - par[2]*t2 - obs*par[1]*log(par[2]) - obs*lgamma(par[1]))
}
#initial guess for a = mean^2(x)/var(x) and b = mean(x)/var(x)
a1 <- (mean(newdata$faminc))^2/var(newdata$faminc)
b1 <- mean(newdata$faminc)/var(newdata$faminc)
init <- c(a1,b1)
q <- optim(init, lh.gamma, method = "BFGS")
q
也嘗試填充init向量中的值,幷包括這段代碼;
dlh.gamma <- function(par){
cbind(obs*digamma(par[1])+obs*log(par[2])-t2,
obs*par[1]/par[2]-1/par[2]^2*t1)
}
,然後在Optim會是什麼樣子:
q <- optim(init, lh.gamma, dhl.gamma, method="BFGS")
它沒有 '作品'。首先,當我在學校電腦上試用這些代碼時,它給了我非常多的形狀和速率參數,這是不可能的。現在,在家嘗試,我得到這個:
> q <- optim(init, lh.gamma, method = "BFGS")
Error in optim(init, lh.gamma, method = "BFGS") :
non-finite finite-difference value [2]
In addition: There were 50 or more warnings (use warnings() to see the first 50)
> q
function (save = "default", status = 0, runLast = TRUE)
.Internal(quit(save, status, runLast))
<bytecode: 0x000000000eaac960>
<environment: namespace:base>
q甚至不是'創建'。除了上面包含dlh.gamma部分的時候,我只是再次獲得大量數字而沒有收斂。
誰知道哪裏出了問題/該怎麼辦?
編輯:
> dput(sample(newdata$faminc, 500))
c(42.5, 87.5, 22.5, 17.5, 12.5, 30, 30, 17.5, 42.5, 62.5, 62.5,
30, 30, 150, 22.5, 30, 42.5, 30, 17.5, 8.75, 42.5, 42.5, 42.5,
62.5, 42.5, 30, 17.5, 87.5, 62.5, 150, 42.5, 150, 42.5, 42.5,
42.5, 6.25, 62.5, 87.5, 6.25, 87.5, 30, 150, 22.5, 62.5, 42.5,
150, 17.5, 42.5, 42.5, 42.5, 62.5, 22.5, 42.5, 42.5, 30, 62.5,
30, 62.5, 87.5, 87.5, 42.5, 22.5, 62.5, 22.5, 8.75, 30, 30, 17.5,
87.5, 8.75, 62.5, 30, 17.5, 22.5, 62.5, 42.5, 30, 17.5, 62.5,
8.75, 62.5, 42.5, 150, 30, 62.5, 87.5, 17.5, 62.5, 30, 62.5,
87.5, 42.5, 62.5, 30, 62.5, 42.5, 87.5, 150, 12.5, 42.5, 62.5,
42.5, 62.5, 62.5, 150, 30, 87.5, 12.5, 17.5, 42.5, 62.5, 30,
6.25, 62.5, 42.5, 12.5, 62.5, 8.75, 17.5, 42.5, 62.5, 87.5, 8.75,
62.5, 30, 62.5, 87.5, 42.5, 62.5, 62.5, 12.5, 150, 42.5, 62.5,
12.5, 62.5, 42.5, 62.5, 62.5, 87.5, 42.5, 62.5, 30, 42.5, 150,
42.5, 30, 62.5, 62.5, 87.5, 42.5, 30, 62.5, 62.5, 42.5, 42.5,
30, 62.5, 42.5, 42.5, 62.5, 62.5, 150, 42.5, 30, 42.5, 62.5,
17.5, 62.5, 17.5, 150, 8.75, 62.5, 30, 62.5, 42.5, 42.5, 22.5,
150, 62.5, 42.5, 62.5, 62.5, 22.5, 30, 62.5, 30, 150, 42.5, 42.5,
42.5, 62.5, 30, 12.5, 30, 150, 12.5, 8.75, 22.5, 30, 22.5, 30,
42.5, 42.5, 42.5, 30, 12.5, 62.5, 42.5, 30, 22.5, 42.5, 87.5,
22.5, 12.5, 42.5, 62.5, 62.5, 62.5, 30, 42.5, 30, 62.5, 30, 62.5,
12.5, 22.5, 42.5, 22.5, 87.5, 30, 22.5, 17.5, 42.5, 62.5, 17.5,
250, 150, 42.5, 30, 42.5, 30, 62.5, 17.5, 87.5, 22.5, 150, 62.5,
42.5, 6.25, 87.5, 62.5, 42.5, 30, 42.5, 62.5, 42.5, 87.5, 62.5,
150, 42.5, 30, 6.25, 22.5, 30, 42.5, 42.5, 62.5, 250, 8.75, 150,
42.5, 30, 42.5, 30, 42.5, 42.5, 30, 30, 150, 22.5, 62.5, 30,
8.75, 150, 62.5, 87.5, 150, 42.5, 30, 42.5, 42.5, 42.5, 30, 8.75,
42.5, 42.5, 30, 22.5, 62.5, 17.5, 62.5, 62.5, 42.5, 8.75, 42.5,
12.5, 12.5, 150, 42.5, 42.5, 17.5, 42.5, 62.5, 62.5, 42.5, 42.5,
30, 42.5, 62.5, 30, 62.5, 42.5, 42.5, 42.5, 22.5, 62.5, 62.5,
62.5, 22.5, 150, 62.5, 42.5, 62.5, 42.5, 30, 30, 62.5, 22.5,
62.5, 87.5, 62.5, 42.5, 42.5, 22.5, 62.5, 62.5, 30, 42.5, 42.5,
8.75, 87.5, 42.5, 42.5, 87.5, 30, 62.5, 17.5, 62.5, 42.5, 17.5,
22.5, 62.5, 8.75, 62.5, 22.5, 22.5, 22.5, 42.5, 17.5, 22.5, 62.5,
42.5, 42.5, 42.5, 42.5, 42.5, 30, 30, 8.75, 30, 42.5, 62.5, 22.5,
6.25, 30, 42.5, 62.5, 17.5, 62.5, 42.5, 8.75, 22.5, 30, 17.5,
22.5, 62.5, 42.5, 150, 87.5, 22.5, 12.5, 62.5, 62.5, 62.5, 30,
42.5, 22.5, 62.5, 87.5, 30, 42.5, 62.5, 22.5, 87.5, 30, 30, 22.5,
87.5, 87.5, 250, 30, 62.5, 250, 62.5, 42.5, 42.5, 62.5, 62.5,
42.5, 6.25, 62.5, 62.5, 62.5, 42.5, 42.5, 150, 62.5, 62.5, 30,
150, 22.5, 87.5, 30, 150, 17.5, 8.75, 62.5, 42.5, 62.5, 150,
42.5, 22.5, 42.5, 42.5, 17.5, 62.5, 17.5, 62.5, 42.5, 150, 250,
22.5, 42.5, 30, 62.5, 62.5, 42.5, 42.5, 30, 150, 150, 42.5, 17.5,
17.5, 42.5, 8.75, 62.5, 42.5, 42.5, 22.5, 150, 62.5, 30, 250,
62.5, 87.5, 62.5, 8.75, 62.5, 30, 30, 8.75, 17.5, 17.5, 150,
22.5, 62.5, 62.5, 42.5)
的faminc變量是在1000
EDIT2:
好,代碼是好的,但現在我嘗試使用,以適應在直方圖中的分佈如下:
x <- rgamma(500,shape=q$par[1],scale=q$par[2])
hist(newdata$faminc, prob = TRUE)
curve(dgamma(x, shape=q$par[1], scale=q$par[2]), add=TRUE, col='blue')
它只是在x軸上產生一條扁平的藍線。
請在你的問題中加入'dput(newdata $ faminc)'。 – nrussell
有6547個觀測值.. –
如果你暗示知道只有觀測值的數量足以估計你的參數,那麼我認爲是時候再次破解你的統計學教科書了...... – nrussell