0
轉到學習tidyr
並遇到spread()
問題。tidyr:傳播時不擴展所有列
這裏有一個假的實驗數據集:
library(tidyr)
df <- structure(list(mood = c(0.855, -0.103, 0.421, -0.222, 0.772, -0.027, -1.088, 0.923, -1.516, -1.503, -0.358, -0.357, -0.344, 0.294, 0.348, -0.174, 0.872, -1.188, 0.842, -0.246, -0.758, 0.674, 0.045, 0.72, -1.253, 0.00599999999999995, -0.0749999999999999,1.623, -1.754, -0.44, -0.607, -0.083, -0.827, -0.337, -0.6, 0.429, -0.383, -1.755, 0.894, 0.146, -0.658, -0.409, -0.531, 1.388, -0.688, 0.521, -0.662, 0.852, -1.363, 0.18, -0.775, 0.393, -0.926, 0.809, -0.857, 0.889, 0.0969999999999999, -1.553, -0.21,1.769, -0.114, -0.203, 0.805, 0.186, 0.286, -0.076, 0.137, 1.208, 0.33, 0.34, 0.832, 0.815, -0.427, 0.444, -0.838, 1.45, 1.701, -2.265, 0.531, 0.808),
subj = structure(c(1L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,11L, 13L, 1L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 1L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 1L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 2L, 3L, 4L, 5L,6L, 7L, 8L, 9L, 10L, 11L, 13L), .Label = c("s1", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s2", "s20", "s3", "s4", "s5", "s6", "s7", "s8", "s9"), class = "factor"),
depressed = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
activity = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("exercize", "relaxation"), class = "factor"),
drug = structure(c(1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("placebo", "SSRI"), class = "factor")), .Names = c("mood", "subj", "depressed", "activity", "drug"), row.names = c(NA, -80L), class = "data.frame")
在df
採取偷看,我們可以看到,activity
和drug
是內,主體因素,而depressed
是被試間因素:
head(arrange(df, subj))
mood subj depressed activity drug
1 0.855 s1 yes relaxation placebo
2 -0.758 s1 yes relaxation SSRI
3 -0.658 s1 yes exercize placebo
4 -0.114 s1 yes exercize SSRI
5 -1.503 s10 no relaxation placebo
6 -0.440 s10 no relaxation SSRI
我想將df
轉換爲寬屏幕形式,其中每個主題的mood
用單獨的列表示,每行對應於activity
和drug
的組合。不幸的是,我不知道如何避免爲每個級別的depressed
創建一組行。這是我到目前爲止有:
df %>% spread(subj, mood) %>% `[`(1:5)
depressed activity drug s1 s10
1 no exercize placebo NA 0.18
2 no exercize SSRI NA 0.34
3 no relaxation placebo NA -1.50
4 no relaxation SSRI NA -0.44
5 yes exercize placebo -0.658 NA
6 yes exercize SSRI -0.114 NA
7 yes relaxation placebo 0.855 NA
8 yes relaxation SSRI -0.758 NA
我想避免擴大depressed
因此產生的輸出將只有四排。
我很困惑。你說你想要四欄,但是你正在傳播有20個關卡的主題欄,所以你會得到20欄主題心情加上其他欄目。你能爲第一或第二個主題顯示你想要的結果嗎? – Gregor 2015-02-11 02:15:42
我也很困惑。 4列;他們的名字是什麼?你預計有多少行?你想做任何聚合嗎? – 2015-02-11 02:16:25
也許像'df%>%select(-depressed)%>%spread(subj,mood)%>%select(1:4)'是你想要的嗎? – 2015-02-11 02:19:34