2016-01-07 176 views
4

我有一個關於線性混合模型運行事後檢驗問題:事後檢驗線性混合模型LSMEANS錯誤

我運行在lme4線性混合模型3組,每組5種蛇,每組在一個不同的通風率(Vent),採取在不同時間點(Time)測量,以指定作爲隨機效應的下方(ID

數據子集蛇:

subset1 <- structure(list(ID = structure(c(5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 
9L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 10L, 10L, 
10L, 10L, 10L, 20L, 20L, 20L, 20L, 20L, 4L, 4L, 4L, 4L, 4L, 11L, 
11L, 11L, 11L, 11L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 12L, 
13L, 14L, 15L, 16L, 17L, 17L, 17L, 17L, 17L), .Label = c("", 
"1_1_2", "10", "10_1_1", "13_1_4", "14_2_4", "15_3_4", "16_1_4", 
"17_2_4", "2_2_1", "5", "5_2_2", "5_2_3", "5_2_4", "5_2_5", "5_2_6", 
"7_1_2", "8", "9", "9_3_1"), class = "factor"), Vent = c(30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 125L, 
125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 
125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 125L, 
125L, 125L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 
250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 250L, 
250L, 250L, 250L, 250L, 250L), Time = c(60L, 80L, 180L, 720L, 
1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 
60L, 80L, 180L, 720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 
80L, 180L, 720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 80L, 
180L, 720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 80L, 180L, 
720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 80L, 180L, 720L, 
1440L, 60L, 80L, 180L, 720L, 1440L, 60L, 80L, 180L, 720L, 1440L, 
60L, 80L, 180L, 720L, 1440L), corr.pO2 = c(224.1388673, 233.9456874, 
239.1553778, 107.2373336, 76.71835625, 164.6293748, 243.8501858, 
234.8205544, 71.74240501, 62.23789874, 69.69478654, 62.23789874, 
152.1142885, 79.61325688, 63.33285001, 240.8713061, 231.304842, 
222.7743953, 95.7912966, 64.41744793, 241.7255035, 238.2936023, 
138.1188987, 43.00663696, 50.64392111, 265.4973967, 274.0599252, 
285.0144919, 83.37647392, NA, 292.3660214, 281.6533627, 275.9747984, 
63.33285001, 56.59660394, 254.2521631, 222.3180596, 208.736288, 
88.83223104, 114.1782867, 208.255285, 232.1878564, 193.3861802, 
72.75355024, 60.01517133, 209.6956308, 245.9596884, 200.4342522, 
75.73874562, 67.61194011, 240.0146049, 261.1278627, 166.9318704, 
74.75152919, 73.75652657, 270.9724687, 251.7882317, 245.9596884, 
147.1396383, 50.64392111, 294.179467, 296.3431178, 284.6426934, 
73.75652657, 75.73874562, 233.0681297, 234.3834557, 143.3247511, 
73.75652657, 66.55672391, 245.9596884, 249.3041163, 223.6847954, 
92.35383362, 78.65544784)), .Names = c("ID", "Vent", "Time", 
"corr.pO2"), row.names = c(NA, 75L), class = "data.frame") 

代碼:

attach(subset1) 

require(lme4) 

with.vent = lmer(corr.pO2 ~ Vent * Time + (1|ID),REML = FALSE, data = subset1) 

with.vent.no.int = lmer(corr.pO2 ~ Vent + Time + (1|ID),REML = FALSE, data = subset1) 

anova(with.vent, with.vent.no.int) 
#no significant interaction 

試驗發泄的效果:

without.vent = lmer(corr.pO2 ~ Time + (1|ID), REML = FALSE, data = subset1) 

比較與發泄:

anova(with.vent.no.int, without.vent) 
#no significant effect of ventilation treatment p=0.09199 

測試的時間效應:

without.time = lmer(corr.pO2 ~ Vent + (1|ID), data = subset1) 

anova(with.vent.no.int, without.time) 
# highly significant effect of time on pO2 < 2.2e-16 *** 

因此,嘗試事後測試:

require(lsmeans) 
lsmeans(with.vent.no.int, pairwise ~ Time, adjust = "tukey", data = subset1) 

這是我得到它讀取錯誤:

Error in solve.default(L %*% V0 %*% t(L), L) : 
    Lapack routine dgesv: system is exactly singular: U[1,1] = 0 

我可以使用運行具有校正成對檢驗:

pairwise.t.test(corr.pO2, Time, p.adj = "BH", paired = T) 

,但知道這是行不通的,而其他變量有一個互動(就像我的其他數據一樣),並且我希望在每個時間點和通氣狀態之間進行配對比較。這可能與lsmeans()

感謝您的意見,我知道似然比測試本身是有爭議的。我已經考慮了混合效應ANOVA,但是有一些缺失的數據點使得這是不可能的。數據以前由另一名學生分析爲雙向anova,沒有重複的措施,但我的感覺是,這是不恰當的,因爲每個蛇在重複的時間點測量

+4

我投票結束這個問題作爲題外話題,因爲它是關於R錯誤消息沒有一個可重複的例子。 – gung

+1

感謝@gung的鏈接,我試圖以可複製的方式添加數據。 –

+0

「lmer」中的「l」代表「線性」,如果將時間建模爲連續預測器,則線性假設通常並不真正有效。 – Roland

回答

4

答案結果相當簡單:您需要以確保您的VentTime預測因子是因子。否則lsmeans會對成對測試的含義感到困惑。 (關於你是否真的想用連續預測器來分析這個模型,比如作爲響應曲面設計而不是雙向方差分析,這裏有一個稍微長一點的對話。)下面是一個稍微更緊湊的分析版本:

subset1 <- transform(subset1,Vent=factor(Vent), Time=factor(Time)) 
require(lme4) 
with.vent = lmer(corr.pO2 ~ Vent * Time + (1|ID), 
     REML = FALSE, data = subset1) 
drop1(with.vent,test="Chisq") ## test interaction 
with.vent.no.int = update(with.vent, . ~ . - Vent:Time) 
drop1(with.vent.no.int,test="Chisq") ## test main effects 
require(lsmeans) 
lsmeans(with.vent.no.int, pairwise ~ Time) 

子集輸出:

$contrasts 
contrast estimate  SE df t.ratio p.value 
60 - 80  -6.99222 12.76886 63.45 -0.548 0.9819 
60 - 180 14.74281 12.76886 63.45 1.155 0.7768 
60 - 720 147.27139 12.76886 63.45 11.534 <.0001 
... 

我不同意,該錯誤信息是高深莫測。可能值得一提的是lsmeans維護人員看看是否有可能檢測並標記這個(非常常見的)錯誤。

+2

我會看看它,看看我能否陷入困境。注意'lsmeans'調用中的'adjust'和'data'參數是不必要的。 – rvl

+0

我想我至少會讓'Time'成爲一個排序因子,可能也是'Vent'(儘管這不會影響'drop1'結果)。 – Roland

+0

非常感謝@Ben Bolker,這真的很有幫助。關於我是否以正確的方式進行分析的問題對我來說也是有用的,我曾認爲雙向方差分析是不恰當的,因爲從相同的蛇取得重複的樣本,而且重複測量的anova不會像那裏那樣工作是少量缺失的數據點。我很樂意聽到你的意見,因爲我正在努力提高數據識讀能力。 –

3

的LSMEANS(大概在2016年2月1日)的下一次更新將捕獲這種類型的錯誤:

> lsmeans(with.vent.no.int, pairwise ~ Vent) 

$lsmeans 
    Vent lsmean  SE df lower.CL upper.CL 
135.1351 167.4871 6.859275 18.63 153.1111 181.863 

Confidence level used: 0.95 

$contrasts 


Warning message: 
In contrast.ref.grid(result, method = contr, by, ...) : 
    No contrasts were generated! Perhaps only one lsmean is involved. 
    This can happen, for example, when your predictors are not factors. 

ref.grid功能是非常方便的瞭解你有什麼:

> ref.grid(with.vent.no.int) 
'ref.grid' object with variables: 
    Vent = 135.14 
    Time = 483.24 

VentTime都是協變量,所以默認使用它們的平均值。要改變這一點,你不一定要改變數據集;你可以只強制模型中的因素的預測變量:

> repaired = lmer(corr.pO2 ~ factor(Vent) + factor(Time) + (1|ID), 
        REML = FALSE, data = subset1) 
> ref.grid(repaired) 
'ref.grid' object with variables: 
    Vent = 30, 125, 250 
    Time = 60, 80, 180, 720, 1440 

> lsmeans(repaired, pairwise ~ Vent) 
$lsmeans 
Vent lsmean  SE df lower.CL upper.CL 
    30 146.0967 12.19373 18.16 120.4952 171.6981 
    125 177.0917 12.29417 18.66 151.3274 202.8559 
    250 173.2568 11.12879 26.72 150.4111 196.1024 

Results are averaged over the levels of: Time 
Confidence level used: 0.95 

$contrasts 
contrast estimate  SE df t.ratio p.value 
30 - 125 -30.994975 17.31570 18.41 -1.790 0.2005 
30 - 250 -27.160077 16.50870 21.52 -1.645 0.2490 
125 - 250 3.834898 16.58302 21.81 0.231 0.9710 

Results are averaged over the levels of: Time 
P value adjustment: tukey method for comparing a family of 3 estimates 
+0

謝謝@rvl,這真的很有幫助,並讓我知道即將到來的更新 –