2012-07-23 48 views
12

我有以下數據框,我想使用轉換創建一個「數據透視表」與兩個值(值和百分比)的列。 這裏是數據幀:重塑多個值列以寬格式

expensesByMonth <- structure(list(month = c("2012-02-01", "2012-02-01", "2012-02-01", 
"2012-02-01", "2012-02-01", "2012-02-01", "2012-02-01", "2012-02-01", 
"2012-02-01", "2012-02-01", "2012-02-01", "2012-02-01", "2012-03-01", 
"2012-03-01", "2012-03-01", "2012-03-01", "2012-03-01", "2012-03-01", 
"2012-03-01", "2012-03-01", "2012-03-01", "2012-03-01", "2012-03-01", 
"2012-03-01", "2012-03-01", "2012-03-01", "2012-03-01", "2012-04-01", 
"2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", 
"2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", 
"2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", "2012-04-01", 
"2012-04-01", "2012-04-01", "2012-05-01", "2012-05-01", "2012-05-01", 
"2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", 
"2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", 
"2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", "2012-05-01", 
"2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", 
"2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", 
"2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", 
"2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", "2012-06-01", 
"2012-07-01", "2012-07-01", "2012-07-01", "2012-07-01", "2012-07-01", 
"2012-07-01", "2012-07-01", "2012-07-01", "2012-07-01", "2012-07-01", 
"2012-07-01", "2012-07-01", "2012-07-01"), 
expense_type = c("Adjustment", "Bank Service Charge", "Cable", "Clubbing", "Dining", "Education", 
"Gifts", "Groceries", "Lunch", "Personal Care", "Rent", "Transportation", 
"Adjustment", "Bank Service Charge", "Cable", "Clubbing", "Dining", 
"Gifts", "Groceries", "Lunch", "Medical Expenses", "Miscellaneous", 
"Personal Care", "Phone", "Recreation", "Rent", "Transportation", 
"Adjustment", "Bank Service Charge", "Clothes", "Clubbing", "Computer", 
"Dining", "Gifts", "Groceries", "Lunch", "Maintenance", "Medical Expenses", 
"Miscellaneous", "Personal Care", "Phone", "Recreation", "Rent", 
"Transportation", "Travel", "Bank Service Charge", "Cable", "Clothes", 
"Clubbing", "Computer", "Dining", "Electric", "Gifts", "Groceries", 
"Lunch", "Maintenance", "Medical Expenses", "Miscellaneous", 
"Personal Care", "Phone", "Recreation", "Rent", "Transportation", 
"Adjustment", "Bank Service Charge", "Cable", "Charity", "Clothes", 
"Computer", "Dining", "Education", "Electric", "Gifts", "Groceries", 
"Lunch", "Maintenance", "Medical Expenses", "Miscellaneous", 
"Personal Care", "Phone", "Recreation", "Rent", "Transportation", 
"Computer", "Gifts", "Groceries", "Lunch", "Maintenance", "Medical Expenses", 
"Miscellaneous", "Personal Care", "Phone", "Recreation", "Rent", 
"Repair and Maintenance", "Transportation"), 
value = c(442.37, 200, 21.33, 75, 22.5, 1800, 10, 233.33, 154.75, 30, 545, 32.5, 
2, 200, 36.33, 206.55, 74.5, 89, 372.68, 383.75, 144.19, 508.11, 
30, 38.4, 81.75, 1746.7, 35, 16.37, 200, 806.9, 324.81, 756, 
80.5, 100, 398.37, 326.25, 151, 29.95, 101, 90, 38.45, 61, 743.75, 
129, 228.53, 200, 39.05, 237, 40, 283.83, 141.32, 32.88, 30, 
424.4, 412, 142.75, 86.55, 1051.5, 30, 38.9, 51.5, 749.7, 35, 
10, 200, 16, 32.59, 149.81, 100, 80, 60, 31.91, 55, 397.25, 486.4, 
115.6, 47.08, 1000, 120, 41.11, 256, 761.6, 55, 10.54, 10, 342.11, 
291, 76.5, 66.8, 1008, 30, 41.11, 316, 765, 65, 62), 
percent = c(0.124025030980324, 0.0560729845967511, 0.00598018380724351, 0.0210273692237817, 
0.0063082107671345, 0.50465686137076, 0.00280364922983756, 0.0654175474797997, 
0.0433864718317362, 0.00841094768951267, 0.152798883026147, 0.00911185999697206, 
0.000506462461002391, 0.0506462461002391, 0.00919989060410842, 
0.0523049106600219, 0.018865726672339, 0.0225375795146064, 0.0943742149831854, 
0.0971774847048337, 0.0365134111259673, 0.128669320529962, 0.00759693691503586, 
0.0097240792512459, 0.0207016530934727, 0.442318990316438, 0.00886309306754183, 
0.00357276925628781, 0.0436502047194601, 0.176106750940662, 0.0708901149746392, 
0.164997773839559, 0.0175692073995827, 0.0218251023597301, 0.0869446602704567, 
0.0712043964486193, 0.0329559045631924, 0.00653661815673915, 
0.0220433533833274, 0.0196425921237571, 0.00839175185731621, 
0.0133133124394353, 0.162324198800492, 0.0281543820440518, 0.0498769064226911, 
0.0496724104530621, 0.00969853814096037, 0.0588618063868785, 
0.00993448209061241, 0.070492601294463, 0.0350985252261336, 0.0081661442784834, 
0.00745086156795931, 0.105404854981398, 0.102325165533308, 0.035453682960873, 
0.0214957356235626, 0.261152697956974, 0.00745086156795931, 0.00966128383312057, 
0.0127906456916635, 0.186197030583303, 0.00869267182928586, 0.00249044292527426, 
0.0498088585054852, 0.00398470868043882, 0.00811635349346881, 
0.0373093254635337, 0.0249044292527426, 0.0199235434021941, 0.0149426575516456, 
0.00794700337455016, 0.0136974360890084, 0.09893284520652, 0.12113514388534, 
0.0287895202161704, 0.0117250052921912, 0.249044292527426, 0.0298853151032911, 
0.0102382108658025, 0.0637553388870211, 0.189672133188888, 0.0136974360890084, 
0.00341757293956667, 0.0032424790697976, 0.110928451456846, 0.0943561409311103, 
0.0248049648839517, 0.021659760186248, 0.326841890235599, 0.00972743720939281, 
0.013329831455938, 0.102462338605604, 0.248049648839517, 0.0210761139536844, 
0.0201033702327451)), 
.Names = c("month", "expense_type", "value", "percent"), 
row.names = c(NA, -96L), 
class = "data.frame" 
) 

這是我想創建(當然,用不同的標題名稱等:[月] _value,[月] _percent):什麼

expenses value  percent value.1 percent.1 value.2 percent.2 value.3 percent.3 value.4 percent.4 value.5 percent.5 
1    Adjustment 442.37 0.124025031 2.00 0.000506462 16.37 0.003572769 0.00 0.000000000 10.00 0.002490443 0.00 0.000000000 
2  Bank Service Charge 200.00 0.056072985 200.00 0.050646246 200.00 0.043650205 200.00 0.049672410 200.00 0.049808859 0.00 0.000000000 
3     Cable 21.33 0.005980184 36.33 0.009199891 0.00 0.000000000 39.05 0.009698538 16.00 0.003984709 0.00 0.000000000 
4     Charity 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 32.59 0.008116353 0.00 0.000000000 
5     Clothes 0.00 0.000000000 0.00 0.000000000 806.90 0.176106751 237.00 0.058861806 149.81 0.037309325 0.00 0.000000000 
6    Clubbing 75.00 0.021027369 206.55 0.052304911 324.81 0.070890115 40.00 0.009934482 0.00 0.000000000 0.00 0.000000000 
7    Computer 0.00 0.000000000 0.00 0.000000000 756.00 0.164997774 283.83 0.070492601 100.00 0.024904429 10.54 0.003417573 
8     Dining 22.50 0.006308211 74.50 0.018865727 80.50 0.017569207 141.32 0.035098525 80.00 0.019923543 0.00 0.000000000 
9    Education 1800.00 0.504656861 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 60.00 0.014942658 0.00 0.000000000 
10    Electric 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 32.88 0.008166144 31.91 0.007947003 0.00 0.000000000 
11     Gifts 10.00 0.002803649 89.00 0.022537580 100.00 0.021825102 30.00 0.007450862 55.00 0.013697436 10.00 0.003242479 
12    Groceries 233.33 0.065417547 372.68 0.094374215 398.37 0.086944660 424.40 0.105404855 397.25 0.098932845 342.11 0.110928451 
13     Lunch 154.75 0.043386472 383.75 0.097177485 326.25 0.071204396 412.00 0.102325166 486.40 0.121135144 291.00 0.094356141 
14   Maintenance 0.00 0.000000000 0.00 0.000000000 151.00 0.032955905 142.75 0.035453683 115.60 0.028789520 76.50 0.024804965 
15  Medical Expenses 0.00 0.000000000 144.19 0.036513411 29.95 0.006536618 86.55 0.021495736 47.08 0.011725005 66.80 0.021659760 
16   Miscellaneous 0.00 0.000000000 508.11 0.128669321 101.00 0.022043353 1051.50 0.261152698 1000.00 0.249044293 1008.00 0.326841890 
17   Personal Care 30.00 0.008410948 30.00 0.007596937 90.00 0.019642592 30.00 0.007450862 120.00 0.029885315 30.00 0.009727437 
18     Phone 0.00 0.000000000 38.40 0.009724079 38.45 0.008391752 38.90 0.009661284 41.11 0.010238211 41.11 0.013329831 
19    Recreation 0.00 0.000000000 81.75 0.020701653 61.00 0.013313312 51.50 0.012790646 256.00 0.063755339 316.00 0.102462339 
20     Rent 545.00 0.152798883 1746.70 0.442318990 743.75 0.162324199 749.70 0.186197031 761.60 0.189672133 765.00 0.248049649 
21 Repair and Maintenance 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 65.00 0.021076114 
22   Transportation 32.50 0.009111860 35.00 0.008863093 129.00 0.028154382 35.00 0.008692672 55.00 0.013697436 62.00 0.020103370 
23     Travel 0.00 0.000000000 0.00 0.000000000 228.53 0.049876906 0.00 0.000000000 0.00 0.000000000 0.00 0.000000000 

在單個值列上使用強制轉換時,我也遇到了以下錯誤:它沒有考慮到「值」參數。所以,即使我指定了value =「percent」,它仍然會顯示「value」列中的值。

cast(expensesByMonth, expense_type ~ month, fun.aggregate = sum, value = "percent") 
+0

下面的答案是非常有幫助的;謝謝。不過,我想知道:這項任務是否可以通過tidyr完成? – fredtal 2016-02-07 19:21:00

回答

18

你最好的選擇是你的數據重塑長格式,使用melt,然後dcast

library(reshape2) 

meltExpensesByMonth <- melt(expensesByMonth, id.vars=1:2) 
dcast(meltExpensesByMonth, expense_type ~ month + variable, fun.aggregate = sum) 

輸出的前幾行:

   expense_type 2012-02-01_value 2012-02-01_percent 2012-03-01_value 2012-03-01_percent 
1    Adjustment   442.37  0.124025031    2.00  0.0005064625 
2  Bank Service Charge   200.00  0.056072985   200.00  0.0506462461 
3     Cable   21.33  0.005980184   36.33  0.0091998906 
4     Charity    0.00  0.000000000    0.00  0.0000000000 
+0

謝謝,這是我想要的。另外,你有什麼想法,爲什麼演員沒有按照預期工作? cast(expensesByMonth,expense_type〜month,fun.aggregate = sum,value =「percent」) – 2012-07-23 08:25:13

+0

'cast'是(現已停用的)軟件包'reshape'中的函數。它在'reshape2'中被'dcast'和'acast'取代。我不再安裝舊版本。 – Andrie 2012-07-23 08:27:28

3

我喜歡爲此,包tables中的tabulate函數。它需要一些因素,但對於您擁有的數據類型而言,這是一個好主意。

library(tables) 
expensesByMonth$month= as.factor(expensesByMonth$month) 
expensesByMonth$expense_type= as.factor(expensesByMonth$expense_type) 
tabular(expense_type~(month)*(value+percent)*(sum),data=expensesByMonth) 
# Optional formatting 
tabular(expense_type~month* 
    ((Format(digits=1))*value+(Format(digits=3))*percent)*sum, 
    data=expensesByMonth) 

的部分輸出:

     value  percent value  percent value  percent 
expense_type   sum  sum  sum  sum  sum  sum  
Adjustment    442  0.124025 2  0.000506 16  0.003573 
Bank Service Charge  200  0.056073 200  0.050646 200  0.043650 
Cable     21  0.005980 36  0.009200 0  0.000000 
+0

一些()是多餘的,但我有時會保留這些以防萬一我想添加更多的術語。 – 2012-07-23 08:48:00

+0

我以前從未和這個班合作過。你能否提供一個如何從中提取數據的例子,如果這不是太多要問的話? – 2012-07-23 08:50:10

+0

這不是一條線。該軟件包的作者Duncan Murdoch已經爲桌面軟件包寫了一個很好的小插曲。請參閱包裝文檔/指南和插圖概述。 – 2012-07-23 08:52:35

18

data.table v1.9.5+可以投多個value.var變量...這是相當直接的(和有效),因此:

require(data.table) # v1.9.5+ 
dcast(setDT(expensesByMonth), expense_type ~ month, value.var=c("value", "percent")) 
0

由於這個問題是經常訪問在我看來,它也值得一個完整的基礎R答案。從基R中的reshape -function是相當通用的,可以很容易地被應用到這個問題,以及:

expenses <- reshape(expensesByMonth, idvar = 'expense_type', direction = 'wide', timevar = 'month', sep = '_') 

NA - 值可以將細胞置換以0

expenses[is.na(expenses)] <- 0 

其給出(由expense_type下令,使其更容易與期望的輸出比較):

> expenses[order(expenses$expense_type),] 
      expense_type value_2012-02-01 percent_2012-02-01 value_2012-03-01 percent_2012-03-01 value_2012-04-01 percent_2012-04-01 value_2012-05-01 percent_2012-05-01 value_2012-06-01 percent_2012-06-01 value_2012-07-01 percent_2012-07-01 
1    Adjustment   442.37  0.124025031    2.00  0.0005064625   16.37  0.003572769    0.00  0.000000000   10.00  0.002490443    0.00  0.000000000 
2  Bank Service Charge   200.00  0.056072985   200.00  0.0506462461   200.00  0.043650205   200.00  0.049672410   200.00  0.049808859    0.00  0.000000000 
3     Cable   21.33  0.005980184   36.33  0.0091998906    0.00  0.000000000   39.05  0.009698538   16.00  0.003984709    0.00  0.000000000 
67    Charity    0.00  0.000000000    0.00  0.0000000000    0.00  0.000000000    0.00  0.000000000   32.59  0.008116353    0.00  0.000000000 
30    Clothes    0.00  0.000000000    0.00  0.0000000000   806.90  0.176106751   237.00  0.058861806   149.81  0.037309325    0.00  0.000000000 
4    Clubbing   75.00  0.021027369   206.55  0.0523049107   324.81  0.070890115   40.00  0.009934482    0.00  0.000000000    0.00  0.000000000 
32    Computer    0.00  0.000000000    0.00  0.0000000000   756.00  0.164997774   283.83  0.070492601   100.00  0.024904429   10.54  0.003417573 
5     Dining   22.50  0.006308211   74.50  0.0188657267   80.50  0.017569207   141.32  0.035098525   80.00  0.019923543    0.00  0.000000000 
6    Education   1800.00  0.504656861    0.00  0.0000000000    0.00  0.000000000    0.00  0.000000000   60.00  0.014942658    0.00  0.000000000 
52    Electric    0.00  0.000000000    0.00  0.0000000000    0.00  0.000000000   32.88  0.008166144   31.91  0.007947003    0.00  0.000000000 
7     Gifts   10.00  0.002803649   89.00  0.0225375795   100.00  0.021825102   30.00  0.007450862   55.00  0.013697436   10.00  0.003242479 
8    Groceries   233.33  0.065417547   372.68  0.0943742150   398.37  0.086944660   424.40  0.105404855   397.25  0.098932845   342.11  0.110928451 
9     Lunch   154.75  0.043386472   383.75  0.0971774847   326.25  0.071204396   412.00  0.102325166   486.40  0.121135144   291.00  0.094356141 
37   Maintenance    0.00  0.000000000    0.00  0.0000000000   151.00  0.032955905   142.75  0.035453683   115.60  0.028789520   76.50  0.024804965 
21  Medical Expenses    0.00  0.000000000   144.19  0.0365134111   29.95  0.006536618   86.55  0.021495736   47.08  0.011725005   66.80  0.021659760 
22   Miscellaneous    0.00  0.000000000   508.11  0.1286693205   101.00  0.022043353   1051.50  0.261152698   1000.00  0.249044293   1008.00  0.326841890 
10   Personal Care   30.00  0.008410948   30.00  0.0075969369   90.00  0.019642592   30.00  0.007450862   120.00  0.029885315   30.00  0.009727437 
24     Phone    0.00  0.000000000   38.40  0.0097240793   38.45  0.008391752   38.90  0.009661284   41.11  0.010238211   41.11  0.013329831 
25    Recreation    0.00  0.000000000   81.75  0.0207016531   61.00  0.013313312   51.50  0.012790646   256.00  0.063755339   316.00  0.102462339 
11     Rent   545.00  0.152798883   1746.70  0.4423189903   743.75  0.162324199   749.70  0.186197031   761.60  0.189672133   765.00  0.248049649 
95 Repair and Maintenance    0.00  0.000000000    0.00  0.0000000000    0.00  0.000000000    0.00  0.000000000    0.00  0.000000000   65.00  0.021076114 
12   Transportation   32.50  0.009111860   35.00  0.0088630931   129.00  0.028154382   35.00  0.008692672   55.00  0.013697436   62.00  0.020103370 
45     Travel    0.00  0.000000000    0.00  0.0000000000   228.53  0.049876906    0.00  0.000000000    0.00  0.000000000    0.00  0.000000000 

你也可以與實現這一目標的tidyverse

library(dplyr) 
library(tidyr) 

expensesByMonth %>% 
    gather(k, v, 3:4) %>% 
    unite(km, k, month) %>% 
    spread(km, v, fill = 0)