2013-11-21 154 views
2

我有一個數據集在文章的底部。該數據有四列,稱爲SIC,AT95Group,AT95Mean,AT95Med。列AT95Group有四個值,如「00」,「01」,「11」和「10」。目前,對於每個SIC,AT95Group的每個值都有四行。我想以某種方式重塑數據幀,以便每個SIC只有一行。儘管之前我們有兩列,分別稱爲平均值和中值(SIC,AT95Group)對,但我們希望創建4 * 2列(對於組「00」,「11」,「01」,「10」)和2 for(「Mean」和「Med」)。這八列將像「00Mean」,「11Mean」,「00Med」,「11Med」等,以及每個SIC的相應值。重塑R中的數據

我覺得這很難做到。請提供任何建議。謝謝。

> dput(head(pp,20)) 
structure(list(SIC = c(1L, 1L, 1L, 10L, 10L, 10L, 10L, 12L, 12L, 
12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L), AT95Group = c("11", 
"10", "00", "11", "01", "00", "10", "01", "11", "10", "00", "11", 
"01", "00", "10", "11", "01", "10", "00", "01"), AT95Med = c(0.0691039117115276, 
0.0608649722972575, 0.0609974198491522, 0.215571816296268, 0.305308985848382, 
0.351312558091798, 0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 0.143724642017362, 
0.156092793582293, 0.0976059673595903, 0.0116620184564053, 0.0188895210677074, 
0.0356836223212195, 0.0513040852859517, 0.0982448708035204), 
    AT95Mean = c(0.0691039117115276, 0.0608649722972575, 0.0609974198491522, 
    0.215571816296268, 0.305308985848382, 0.351312558091798, 
    0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
    0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 
    0.143724642017362, 0.156092793582293, 0.0976059673595903, 
    0.0116620184564053, 0.0188895210677074, 0.0356836223212195, 
    0.0513040852859517, 0.0982448708035204)), .Names = c("SIC", 
"AT95Group", "AT95Med", "AT95Mean"), row.names = c(241L, 236L, 
27L, 1126L, 1035L, 1030L, 664L, 1269L, 1259L, 1245L, 1244L, 3919L, 
4722L, 3329L, 3222L, 4886L, 4889L, 4951L, 4860L, 5108L), class = "data.frame") 

粗略失敗的嘗試嘗試提到的代碼。不知道如何繼續前進。

pp <- unique(dacc1[,c("SIC","AT95Group","AT95Med","AT95Mean")]) 
xsic <- unique(pp[,"SIC"]); 
xlist <- list(xsic,rep("AT95",length(xsic))); 

編輯:

結果我跑特洛伊的結果後得出:

> pp1 <- head(pp,20) 
    SIC AT95Group AT95Med AT95Mean 
241 1  11 0.06910391 0.06910391 
236 1  10 0.06086497 0.06086497 
27  1  00 0.06099742 0.06099742 
1126 10  11 0.21557182 0.21557182 
1035 10  01 0.30530899 0.30530899 
1030 10  00 0.35131256 0.35131256 
664 10  10 0.35270472 0.35270472 
1269 12  01 0.04598877 0.04598877 
1259 12  11 0.03044662 0.03044662 
1245 12  10 0.05138754 0.05138754 
1244 12  00 0.05414319 0.05414319 
3919 13  11 0.06509209 0.06509209 
4722 13  01 0.14372464 0.14372464 
3329 13  00 0.15609279 0.15609279 
3222 13  10 0.09760597 0.09760597 
4886 14  11 0.01166202 0.01166202 
4889 14  01 0.01888952 0.01888952 
4951 14  10 0.03568362 0.03568362 
4860 14  00 0.05130409 0.05130409 
5108 15  01 0.09824487 0.09824487 

> molten<-melt(pp); 
Using AT95Group as id variables 

molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," "); 
cast(molten[,c(1,3,4)], SIC ~ variable); 

> cast(molten[,c(1,3,4)], SIC ~ variable); 
Error in `[.data.frame`(molten, , c(1, 3, 4)) : 
    undefined columns selected 
+0

嗯......不確定。有一件事是,你在pp上運行而不是在pp1上 - 是有意的還是可能你已經重塑了pp?因爲我看到,熔體不包括SIC在默認組中,這樣就可以更換 熔融<-melt(PP1) 與 熔融<-melt(PP1,ID = C( 「SIC」, 「AT95Group」 )) - 這應該強制它整合結構。 – Troy

回答

1

根據記錄,有同樣的,在base一個reshape功能(以及,stats):

reshape(pp, direction = "wide", idvar = "SIC", 
     timevar = "AT95Group", v.names = c("AT95Med", "AT95Mean")) 
#  SIC AT95Med.11 AT95Mean.11 AT95Med.10 AT95Mean.10 AT95Med.00 AT95Mean.00 AT95Med.01 AT95Mean.01 
#241 1 0.06910391 0.06910391 0.06086497 0.06086497 0.06099742 0.06099742   NA   NA 
#1126 10 0.21557182 0.21557182 0.35270472 0.35270472 0.35131256 0.35131256 0.30530899 0.30530899 
#1269 12 0.03044662 0.03044662 0.05138754 0.05138754 0.05414319 0.05414319 0.04598877 0.04598877 
#3919 13 0.06509209 0.06509209 0.09760597 0.09760597 0.15609279 0.15609279 0.14372464 0.14372464 
#4886 14 0.01166202 0.01166202 0.03568362 0.03568362 0.05130409 0.05130409 0.01888952 0.01888952 
#5108 15   NA   NA   NA   NA   NA   NA 0.09824487 0.09824487 
+0

好點.... – Troy

1

我希望這個解決方案不是太神祕:

xsic <- unique(pp[,"SIC"]); 
AT = c("00", "01", "10", "11") 
d = data.frame(xsic=xsic); 
for(i in 1:4) { 
    subgroup = pp[ pp$AT95Group==AT[i],]; 
    d[[paste0(AT[i],"AT95Med")]] = subgroup$AT95Med[match(xsic,subgroup$SIC)]; 
    d[[paste0(AT[i],"AT95Mean")]] = subgroup$AT95Mean[match(xsic,subgroup$SIC)]; 
} 

結果:

xsic 00AT95Med 00AT95Mean 01AT95Med 01AT95Mean 10AT95Med 10AT95Mean 11AT95Med 11AT95Mean 
    1 0.06099742 0.06099742   NA   NA 0.06086497 0.06086497 0.06910391 0.06910391 
10 0.35131256 0.35131256 0.30530899 0.30530899 0.35270472 0.35270472 0.21557182 0.21557182 
12 0.05414319 0.05414319 0.04598877 0.04598877 0.05138754 0.05138754 0.03044662 0.03044662 
13 0.15609279 0.15609279 0.14372464 0.14372464 0.09760597 0.09760597 0.06509209 0.06509209 
14 0.05130409 0.05130409 0.01888952 0.01888952 0.03568362 0.03568362 0.01166202 0.01166202 
15   NA   NA 0.09824487 0.09824487   NA   NA   NA   NA 
1

或者你可以使用 「重塑」 包裝:

install.packages("reshape") # only run this once if you don't have it 
require(reshape) 
pp # this is what I called your table 
molten<-melt(pp) # this stretches the table out into variable/value pairs 

# then modify the "variable" values so they reflect the group (and delete 'AT95') 
molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ") 

# then use cast (you can look up the documentation in ?reshape) 
# but basically this gives you a crosstab of the SICs against the new variables 
# the significant of 1,3,4 is it pulls out only the columns I want to cast 
cast(molten[,c(1,3,4)], SIC ~ variable) 

它給你:

SIC Mean 00 Mean 01 Mean 10 Mean 11  Med 00  Med 01  Med 10  Med 11 
1 1 0.06099742   NA 0.06086497 0.06910391 0.06099742   NA 0.06086497 0.06910391 
2 10 0.35131256 0.30530899 0.35270472 0.21557182 0.35131256 0.30530899 0.35270472 0.21557182 
3 12 0.05414319 0.04598877 0.05138754 0.03044662 0.05414319 0.04598877 0.05138754 0.03044662 
4 13 0.15609279 0.14372464 0.09760597 0.06509209 0.15609279 0.14372464 0.09760597 0.06509209 
5 14 0.05130409 0.01888952 0.03568362 0.01166202 0.05130409 0.01888952 0.03568362 0.01166202 
6 15   NA 0.09824487   NA   NA   NA 0.09824487   NA   NA 
+0

請運行您提到的命令後查看結果。我在編輯中發佈了我的結果。我收到一個錯誤。 – Sumit