我有一個像下面這樣的列表數據。我想執行非線性迴歸高斯曲線中頻和計數爲計算列表中的高斯曲線擬合

mylist<- structure(list(A = structure(list(breaks = c(-10, -9, 
-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4), counts = c(1L, 
0L, 1L, 5L, 9L, 38L, 56L, 105L, 529L, 2858L, 17L, 2L, 0L, 2L), 
    density = c(0.000276014352746343, 0, 0.000276014352746343, 
    0.00138007176373171, 0.00248412917471709, 0.010488545404361, 
    0.0154568037537952, 0.028981507038366, 0.146011592602815, 
    0.788849020149048, 0.00469224399668783, 0.000552028705492686, 
    0, 0.000552028705492686), mids = c(-9.5, -8.5, -7.5, -6.5, 
    -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5), 
    xname = "x", equidist = TRUE), .Names = c("breaks", "counts", 
"density", "mids", "xname", "equidist"), class = "histogram"), 
    B = structure(list(breaks = c(-7, -6, -5, 
    -4, -3, -2, -1, 0), counts = c(2L, 0L, 6L, 2L, 2L, 1L, 3L 
    ), density = c(0.125, 0, 0.375, 0.125, 0.125, 0.0625, 0.1875 
    ), mids = c(-6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5), xname = "x", 
     equidist = TRUE), .Names = c("breaks", "counts", "density", 
    "mids", "xname", "equidist"), class = "histogram"), C = structure(list(
     breaks = c(-7, -6, -5, -4, -3, -2, -1, 0, 1), counts = c(2L, 
     2L, 4L, 5L, 14L, 22L, 110L, 3L), density = c(0., 
     0., 0.0246913580246914, 0.0308641975308642, 
     0.0864197530864197, 0.135802469135802, 0.679, 
     0.0185185185185185), mids = c(-6.5, -5.5, -4.5, -3.5, 
     -2.5, -1.5, -0.5, 0.5), xname = "x", equidist = TRUE), .Names = c("breaks", 
    "counts", "density", "mids", "xname", "equidist"), class = "histogram")), .Names = c("A", 
"B", "C"))

我已閱讀本 Fitting a density curve to a histogram in R 我的列表，並報告平均值和標準偏差的每一個元素，但是這是如何適應的嵌合曲線轉換爲直方圖。我要的是最佳擬合值」

‘中庸’ ‘SD’

如果我使用PRISM做到這一點，我應該得到以下結果對於A

Mids Counts 
-9.5 1 
-8.5 0 
-7.5 1 
-6.5 5 
-5.5 9 
-4.5 38 
-3.5 56 
-2.5 105 
-1.5 529 
-0.5 2858 
0.5  17 
1.5  2 
2.5  0 
3.5  2

進行非線性迴歸高斯曲線擬合，我得到

"Best-fit values" 
"  Amplitude" 3537 
"  Mean"  -0.751 
"  SD"   0.3842

第二組乙

Mids Counts 
-6.5 2 
-5.5 0 
-4.5 6 
-3.5 2 
-2.5 2 
-1.5 1 
-0.5 3 



"Best-fit values" 
"  Amplitude" 7.672 
"  Mean"   -4.2 
"  SD"   0.4275

和第三個

Mids Counts 
-6.5 2 
-5.5 2 
-4.5 4 
-3.5 5 
-2.5 14 
-1.5 22 
-0.5 110 
0.5  3

我得到這個

"Best-fit values" 
"  Amplitude" 120.7 
"  Mean"  -0.6893 
"  SD"  0.4397

來源

2016-06-28 nik

如果您正在尋找估計的平均值和標準差/方差，我認爲這可以通過最大似然程序來完成。在R中有'mle'函數以及'maxLik'包。在這種情況下，您應該使用原始數據，而不是中間數和計數。「mle」中的第一個例子應該與您想要的類似。 – lmo

我目前無法觀看視頻，但在可以的情況下，我會在幾個小時內查看視頻。似乎從分箱數據估計失去了有用的信息。考慮到你有這麼小的樣本量，這尤其值得關注：16我認爲。 – lmo

@lmo好吧，不是真的樣本大小是像1000這樣高得多。所以這不會是一個問題，在這種情況下，我認爲 – nik

爲了直方圖轉換回平均值和標準偏差的估計。首先將bin計數的結果轉換爲bin。這將是原始數據的近似值。

根據你上面的例子：

#extract the mid points and create list of simulated data 
simdata<-lapply(mylist, function(x){rep(x$mids, x$counts)}) 
#if the original data were integers then this may give a better estimate 
#simdata<-lapply(mylist, function(x){rep(x$breaks[-1], x$counts)}) 

#find the mean and sd of simulated data 
means<-lapply(simdata, mean) 
sds<-lapply(simdata, sd) 
#or use sapply in the above 2 lines depending on future process needs

如果你的數據是整數，然後利用休息時間爲箱將提供更好的估計。取決於直方圖的函數（即right = TRUE/FALSE）可能會將結果移動一個。

編輯

我認爲這將是一件容易的事情。我回顧了視頻，顯示的示例數據爲：

mids<-seq(-7, 7) 
counts<-c(7, 1, 2, 2, 2, 5, 217, 70, 18, 0, 2, 1, 2, 0, 1) 
simdata<-rep(mids, counts)

視頻結果爲mean = -0.7359和sd = 0.4571。提供我發現溶液中的最接近的結果是使用「fitdistrplus」包：

fitdist(simdata, "norm", "mge")

使用「最大化擬合優度估計」導致平均= -0.7597280和SD = 0.8320465。
在這一點上，上述方法提供了一個接近的估計，但不完全匹配。我不知道用什麼技術來計算視頻的適合度。

編輯＃2

將上述溶液涉案重新創建原始數據和擬合，使用任一的平均/ SD或使用fitdistrplus包。這種嘗試是嘗試使用高斯分佈執行最小二乘擬合。

simdata<-lapply(mylist, function(x){rep(x$mids, x$counts)}) 
means<-sapply(simdata, mean) 
sds<-sapply(simdata, sd) 

#Data from video 
#mids<-seq(-7, 7) 
#counts<-c(7, 1, 2, 2, 2, 5, 217, 70, 18, 0, 2, 1, 2, 0, 1) 

#make list of the bins and distribution in each bin 
mids<-lapply(mylist, function(x){x$mids}) 
dis<-lapply(mylist, function(x) {x$counts/sum(x$counts)}) 

#function to perform the least square fit 
nnorm<-function(values, mids, dis) { 
    means<-values[1] 
    sds<-values[2] 
    #print(paste(means, sds)) 
    #calculate out the Gaussian distribution for each bin 
    modeld<-dnorm(mids, means, sds) 
    #sum of the squares 
    diff<-sum((modeld-dis)^2) 
    diff 
} 

#use optim function with the mean and sd as initial guesses 
#find the mininium with the mean and SD as fit parameters 
lapply(1:3, function(i) {optim(c(means[[i]], sds[[i]]), nnorm, mids=mids[[i]], dis=dis[[i]])})

該解決方案對PRISM結果提供了一個更接近的答案，但仍然不盡相同。以下是所有4種解決方案的比較。

從表中，最小二乘擬合（上面的那個）提供了最接近的近似值。也許調整中點dnorm函數可能會有所幫助。但情況B的數據距離正態分佈最遠，但PRISM軟件仍然產生一個小的標準偏差，而其他方法是相似的。 PRISM軟件有可能執行某種類型的數據過濾，以在合適之前移除異常值。

來源

2016-06-28 15:42:56 Dave2e

你確定這樣做，一個適用於非線性迴歸高斯曲線擬合？ – nik

嗨，我檢查了上面的數據，我用PRISM建立了非線性迴歸高斯曲線擬合，我得到了平均值和標準差。你能看看它是否一樣嗎？ – nik

這些值不匹配。我不知道PRISM軟件是如何執行這個功能的。它可能會剪裁或平滑適合的尾巴。你的情況B不是很正常，但PRISM產生的標準偏差<0.5 – Dave2e

計算列表中的高斯曲線擬合

回答

編輯

編輯＃2

相關問題