2016-11-11 82 views
3

我有一個數據幀如下融化重塑

id gender group Student_Math_1 Student_Math_2 Student_Read_1 Student_Read_2 
46 M  Red 23    45    37    56 
46 M  Red 34    36    33    78 
46 M  Red 56    63    58  
62 F  Blue 59            68 
62 F  Blue     68    87    73 
38 M  Red 78    57        65 
38 M  Red     75    54 
17 F  Blue 74        56    72 
17 F  Blue 75    61        79 
17 F  Blue     74    43    81 

    df = structure(list(id = c(46, 46, 46, 62, 62, 38, 38, 17, 17, 17), 
    gender = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
    1L), .Label = c("F", "M"), class = "factor"), group = structure(c(2L, 
    2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("Blue", "Red" 
    ), class = "factor"), Student_Math_1 = c(23, 34, 56, 59, 
    NA, 78, NA, 74, 75, NA), Student_Math_2 = c(45, 36, 63, NA, 
    68, 57, 75, NA, 61, 74), Student_Read_1 = c(37, 33, 58, NA, 
    87, NA, 54, 56, NA, 43), Student_Read_2 = c(56, 78, NA, 68, 
    73, 65, NA, 72, 79, 81)), .Names = c("id", "gender", "group", 
"Student_Math_1", "Student_Math_2", "Student_Read_1", "Student_Read_2" 
), row.names = c(NA, -10L), class = "data.frame") 

與同類列根詞列我所要做的是重塑該數據集使得Student_Math_1Student_Math_2列疊起來爲單柱Math一個下面的其他並且類似地,Student_Read_1Student_Read_2列疊起來作爲一個單一的柱Reading如下所示

id gender group Math Index1   Reading Index2 

46 M  Red 23 Student_Math_1 45  Student_Read_1   
46 M  Red 34 Student_Math_1 36  Student_Read_1 
46 M  Red 56 Student_Math_1 63  Student_Read_1 
62 F  Blue 59 Student_Math_1   Student_Read_1     
62 F  Blue  Student_Math_1 68  Student_Read_1  
38 M  Red 78 Student_Math_1 57  Student_Read_1  
38 M  Red  Student_Math_1 75  Student_Read_1 
17 F  Blue 74 Student_Math_1   Student_Read_1     
17 F  Blue 75 Student_Math_1 61  Student_Read_1   
17 F  Blue  Student_Math_1 74  Student_Read_1  

46 M  Red 45 Student_Math_2 56  Student_Read_2 
46 M  Red 36 Student_Math_2 78  Student_Read_2 
46 M  Red 63 Student_Math_2   Student_Read_2 
62 F  Blue  Student_Math_2 68  Student_Read_2 
62 F  Blue 68 Student_Math_2 73  Student_Read_2 
38 M  Red 57 Student_Math_2 65  Student_Read_2 
38 M  Red 75 Student_Math_2   Student_Read_2  
17 F  Blue  Student_Math_2 72  Student_Read_2 
17 F  Blue 61 Student_Math_2 79  Student_Read_2 
17 F  Blue 74 Student_Math_2 81  Student_Read_2 

只知道這是可以實現d正在重塑或融化,並從廣泛的格式轉變爲長格式,但不知道如何繼續進行。非常感謝任何幫助實現這一轉變。

+0

你能做到這一點在'基地R'即'拍< - C( 「Student_Math」, 「Student_Read」); cbind( df [rep(1:nrow(df),2),1:3],do.call(cbind,lapply(pat,function(nm)melt(df [grep(nm,names(df))]))) )'並更改列名稱 – akrun

+0

或者另一個選項是從'data.table'' melt(setDT(df),measure = patterns(「Math」,「Read」),value.name = c(「Math 「,」Read「))[,Index1:= names(df)[4:5] [variable]] [,Index2:= names(df)[5:6] [vari能夠] []' – akrun

回答

1

運用reshape2melt並通過適當的投入idmeasure.vars

MathDF = melt(data = DF,id=c("id","gender","group"),measure.vars = c("Student_Math_1","Student_Math_2"),value.name = "Math", 
    variable.name = "Index1") 

ReadDF = melt(data = DF,id=c("id","gender","group"),measure.vars = c("Student_Read_1","Student_Read_2"),value.name = "Read", 
    variable.name = "Index2") 


mergeDF = merge(MathDF,ReadDF,by=c("id","gender","group")) 

head(mergeDF) 
# id gender group   Index1 Math   Index2 Read 
# 1 46  M Red Student_Math_1 23 Student_Read_1 37 
# 2 46  M Red Student_Math_1 23 Student_Read_1 33 
# 3 46  M Red Student_Math_1 23 Student_Read_1 58 
# 4 46  M Red Student_Math_1 23 Student_Read_2 78 
# 5 46  M Red Student_Math_1 23 Student_Read_2 NA 
# 6 46  M Red Student_Math_1 23 Student_Read_2 56 
1

隨着tidyverse,你可以gather每個組列,然後filter哪裏出了指數不匹配數的任何值(假設你不想Student_*_1Student_*_2組合):

library(tidyverse) 

df %>% gather(Index1, Math, contains('Math')) %>% 
    gather(Index2, Reading, contains('Read')) %>% 
    filter(parse_number(Index1) == parse_number(Index2)) 

## id gender group   Index1 Math   Index2 Reading 
## 1 46  M Red Student_Math_1 23 Student_Read_1  37 
## 2 46  M Red Student_Math_1 34 Student_Read_1  33 
## 3 46  M Red Student_Math_1 56 Student_Read_1  58 
## 4 62  F Blue Student_Math_1 59 Student_Read_1  NA 
## 5 62  F Blue Student_Math_1 NA Student_Read_1  87 
## 6 38  M Red Student_Math_1 78 Student_Read_1  NA 
## 7 38  M Red Student_Math_1 NA Student_Read_1  54 
## 8 17  F Blue Student_Math_1 74 Student_Read_1  56 
## 9 17  F Blue Student_Math_1 75 Student_Read_1  NA 
## 10 17  F Blue Student_Math_1 NA Student_Read_1  43 
## 11 46  M Red Student_Math_2 45 Student_Read_2  56 
## 12 46  M Red Student_Math_2 36 Student_Read_2  78 
## 13 46  M Red Student_Math_2 63 Student_Read_2  NA 
## 14 62  F Blue Student_Math_2 NA Student_Read_2  68 
## 15 62  F Blue Student_Math_2 68 Student_Read_2  73 
## 16 38  M Red Student_Math_2 57 Student_Read_2  65 
## 17 38  M Red Student_Math_2 75 Student_Read_2  NA 
## 18 17  F Blue Student_Math_2 NA Student_Read_2  72 
## 19 17  F Blue Student_Math_2 61 Student_Read_2  79 
## 20 17  F Blue Student_Math_2 74 Student_Read_2  81 
2

我們可以使用meltdata.table

library(data.table) 
melt(setDT(df), measure = patterns("Math", "Read"), 
value.name = c("Math", "Read"))[, Index1 := names(df)[4:5][variable] 
      ][, Index2 := names(df)[5:6][variable]][] 

或者另一種選擇是

pat <- c("Student_Math", "Student_Read") 
cbind(df[rep(1:nrow(df), 2), 1:3], do.call(cbind, lapply(pat, 
      function(nm) melt(df[grep(nm, names(df))]))))