2012-11-09 52 views
3

我無法在data.frame上傳遞這個rle函數。功能上的另一組的偉大工程:R數據結構和函數

fgroup <- aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), function(x) rle(x)$values) 

錯誤給出:

Error in rle(x) : 'x' must be an atomic vector 

樣品DATAS:

> dput(fevents2[1:20,]) 
structure(list(weeks = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor"), A1M.Date = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("2012-05-09", "2012-05-10", "2012-05-11", 
"2012-05-14", "2012-05-15", "2012-05-17", "2012-05-18", "2012-05-21", 
"2012-05-22", "2012-05-24", "2012-05-25", "2012-05-28", "2012-05-29", 
"2012-05-30", "2012-05-31", "2012-06-04", "2012-06-05", "2012-06-07", 
"2012-06-08", "2012-06-11", "2012-06-12", "2012-06-14", "2012-06-15", 
"2012-06-18", "2012-06-19", "2012-06-21", "2012-06-22"), class = "factor"), 
    vv = structure(c(8L, 8L, 8L, 20L, 24L, 24L, 24L, 1L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 24L), .Label = c("C AA", 
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", 
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", 
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "nil"), class = "factor"), 
    rv = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
    10L, 10L, 22L, 22L, 22L, 25L, 10L, 22L, 22L, 22L, 22L, 25L 
    ), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR", 
    "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ", 
    "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB", 
    "G VR", "G VV", "nil"), class = "factor"), ja = structure(c(12L, 
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 25L, 25L, 
    12L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("C AA", "C AJ", 
    "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", 
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", 
    "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"), 
    aa = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 25L, 25L, 
    25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c("C AA", 
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", 
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", 
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil" 
    ), class = "factor"), bv = structure(c(25L, 11L, 11L, 11L, 
    23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 
    23L, 23L, 23L, 23L), .Label = c("C AA", "C AJ", "C BB", "C BV", 
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", 
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", 
    "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"), 
    aj = structure(c(7L, 7L, 7L, 25L, 25L, 25L, 25L, 25L, 9L, 
    9L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 25L, 25L), .Label = c("C AA", 
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", 
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", 
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil" 
    ), class = "factor"), vb = structure(c(1L, 1L, 1L, 25L, 25L, 
    25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 2L, 
    25L, 2L, 2L), .Label = c("C AA", "C AJ", "C BB", "C BV", 
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", 
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", 
    "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"), 
    rj = structure(c(5L, 5L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
    16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("C AA", 
    "C AJ", "C BB", "C BV", "C JR", "C RJ", "C RR", "C RV", "C VB", 
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JR", "G RJ", 
    "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"), 
    rr = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("C AA", 
    "C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", 
    "C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", 
    "G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil" 
    ), class = "factor"), vr = structure(c(5L, 5L, 5L, 25L, 25L, 
    7L, 7L, 7L, 7L, 7L, 25L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR", 
    "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ", 
    "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB", 
    "G VR", "G VV", "nil"), class = "factor"), bb = structure(c(4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L), .Label = c("C AA", "C AJ", "C BB", "C BV", 
    "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", 
    "G AA", "G AJ", "G BB", "G BV", "G JA", "G RJ", "G RR", "G RV", 
    "G VB", "G VR", "G VV", "nil"), class = "factor"), jr = structure(c(25L, 
    25L, 10L, 10L, 22L, 22L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
    25L, 25L, 25L, 5L, 5L, 5L, 5L), .Label = c("C AA", "C AJ", 
    "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", 
    "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", 
    "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor")), 
.Names = c("weeks", 
"A1M.Date", "vv", "rv", "ja", "aa", "bv", "aj", "vb", "rj", "rr", 
"vr", "bb", "jr"), row.names = c(NA, 20L), class = "data.frame") 

DATAS的結構:

str(fevents2) 
data.frame': 1430 obs. of 14 variables: 
$ weeks : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... 
$ A1M.Date: Factor w/ 27 levels "2012-05-09","2012-05-10",..: 1 1 1 1 1 1 1 1 1 1 ... 
$ vv  : Factor w/ 24 levels "C AA","C AJ",..: 8 8 8 20 24 24 24 1 13 13 .. 
$ rv  : Factor w/ 25 levels "C AA","C AJ",..: 25 25 25 25 25 25 25 25 10 10 ... 
$ ja  : Factor w/ 25 levels "C AA","C AJ",..: 12 12 12 12 12 12 12 12 12 12 ... 
$ aa  : Factor w/ 25 levels "C AA","C AJ",..: 2 2 2 2 2 2 2 2 25 25 ... 
$ bv  : Factor w/ 25 levels "C AA","C AJ",..: 25 11 11 11 23 23 23 23 23 23 ... 
$ aj  : Factor w/ 25 levels "C AA","C AJ",..: 7 7 7 25 25 25 25 25 9 9 ... 
$ vb  : Factor w/ 25 levels "C AA","C AJ",..: 1 1 1 25 25 25 25 25 25 25 ... 
$ rj  : Factor w/ 23 levels "C AA","C AJ",..: 5 5 16 16 16 16 16 16 16 16 ... 
$ rr  : Factor w/ 25 levels "C AA","C AJ",..: 3 3 3 3 3 3 3 3 3 3 ... 
$ vr  : Factor w/ 25 levels "C AA","C AJ",..: 5 5 5 25 25 7 7 7 7 7 ... 
$ bb  : Factor w/ 24 levels "C AA","C AJ",..: 4 4 4 4 4 4 4 4 4 4 ... 
$ jr  : Factor w/ 25 levels "C AA","C AJ",..: 25 25 10 10 22 22 25 25 25 25 ... 
NULL 

我明白,我有因素,但轉換荷蘭國際集團因素與數字

as.numeric(as.character(fevents2)) 

或:

sapply(fevents2, function(x) as.numeric(as.character(x))) 

並沒有解決我的問題:

Error in fevents3[, 3:14] : incorrect number of dimensions 
In addition: Warning message: 
In eval.with.vis(expr, envir, enclos) : NAs introduced by coercion 

下面是一個示例data.frame上的RLE功能的工作原理:

dput(fevents[1:20,] 
structure(list(weeks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1), A1M.Date = c("2012-05-09", "2012-05-09", 
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", 
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", 
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", 
"2012-05-09", "2012-05-09", "2012-05-09"), vv = c("C RR", "C RR", 
"C RR", "G RR", "nil", "nil", "nil", "C AA", "G AA", "G AA", 
"G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", 
"G AA", "nil"), rv = c("nil", "nil", "nil", "nil", "nil", "nil", 
"nil", "nil", "C VB", "C VB", "G VB", "G VB", "G VB", "nil", 
"G VB", "G VB", "G VB", "G VB", "G VB", "nil"), ja = c("C VV", 
"C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", 
"C VV", "C VV", "nil", "nil", "G VV", "G VV", "G VV", "G VV", 
"G VV", "G VV", "G VV"), aa = c("C AJ", "C AJ", "C AJ", "C AJ", 
"C AJ", "C AJ", "C AJ", "C AJ", "nil", "nil", "nil", "nil", "nil", 
"nil", "nil", "nil", "nil", "nil", "nil", "nil"), bv = c("nil", 
"C VR", "C VR", "C VR", "G VR", "G VR", "G VR", "G VR", "G VR", 
"G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", 
"G VR", "G VR", "G VR"), aj = c("C RJ", "C RJ", "C RJ", "nil", 
"nil", "nil", "nil", "nil", "C RV", "C RV", "G RV", "G RV", "G RV", 
"G RV", "G RV", "G RV", "G RV", "G RV", "nil", "nil"), vb = c("C AA", 
"C AA", "C AA", "nil", "nil", "nil", "nil", "nil", "nil", "nil", 
"nil", "nil", "nil", "nil", "nil", "nil", "C AJ", "nil", "C AJ", 
"C AJ"), rj = c("C JR", "C JR", "G JR", "G JR", "G JR", "G JR", 
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", 
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR"), rr = c("C BB", 
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", 
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", 
"C BB", "C BB", "C BB"), vr = c("C JA", "C JA", "C JA", "nil", 
"nil", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "nil", "C RJ", 
"C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ" 
), bb = c("C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", 
"C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", 
"C BV", "C BV", "C BV", "C BV", "C BV"), jr = c("nil", "nil", 
"C VB", "C VB", "G VB", "G VB", "nil", "nil", "nil", "nil", "nil", 
"nil", "nil", "nil", "nil", "nil", "C JA", "C JA", "C JA", "C JA" 
)), .Names = c("weeks", "A1M.Date", "vv", "rv", "ja", "aa", "bv", 
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), row.names = c(NA, 
20L), class = "data.frame") 

str(fevents) 
'data.frame': 1430 obs. of 14 variables: 
$ weeks : num 1 1 1 1 1 1 1 1 1 1 ... 
$ A1M.Date: chr "2012-05-09" "2012-05-09" "2012-05-09" "2012-05-09" ... 
$ vv  : chr "C RR" "C RR" "C RR" "G RR" ... 
$ rv  : chr "nil" "nil" "nil" "nil" ... 
$ ja  : chr "C VV" "C VV" "C VV" "C VV" ... 
$ aa  : chr "C AJ" "C AJ" "C AJ" "C AJ" ... 
$ bv  : chr "nil" "C VR" "C VR" "C VR" ... 
$ aj  : chr "C RJ" "C RJ" "C RJ" "nil" ... 
$ vb  : chr "C AA" "C AA" "C AA" "nil" ... 
$ rj  : chr "C JR" "C JR" "G JR" "G JR" ... 
$ rr  : chr "C BB" "C BB" "C BB" "C BB" ... 
$ vr  : chr "C JA" "C JA" "C JA" "nil" ... 
$ bb  : chr "C BV" "C BV" "C BV" "C BV" ... 
$ jr  : chr "nil" "nil" "C VB" "C VB" ... 

我發現真的「不」優雅「的解決方法。將data.frame作爲csv寫入文件並使用stringsAsFacrors = False導入它。這不是我想在我的代碼中編寫的內容......必須有一種更簡單的方法來重新排列data.frame的結構來取悅rle?

感謝您的幫助!

+0

你可以用'dput(fevents [1:20,])的輸出替換'str(fevents)'的輸出''我們可以將您的文本直接複製到R&與數據一起工作。 –

+0

爲編寫樣本編輯的問題 – Chargaff

+0

'fevents'和'fevents2'是同一件事嗎?編輯:我現在看到你的編輯,沒關係。 –

回答

3

問題是因素 *不是*原子向量,因爲錯誤清楚地表明。首先將所有因素轉換爲字符(而不是將它們強制轉換爲數字!)或者在內部轉換您正在應用的匿名函數。

所以這一點,它實現了第二個想法,工作原理:

aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), 
      function(x) rle(as.character(x))$values) 

一種時尚後:

> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), 
+   function(x) rle(as.character(x))$values) 
    weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1 
1  1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV 
    ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2 
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil 
    vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3 
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB 
    jr.4 jr.5 
1 nil C JA 

,雖然我不知道你希望得到什麼 - 在這裏僅一個星期有並且aggregaterle已將所有值粘在一起。對於fevents2中正在聚合的每個變量,是否需要單獨使用$values

另一件事:

as.numeric(as.character(fevents2))不能因爲數據不是數字可能工作!而且你不能將這些函數應用到數據框架上,並得到任何你想要的東西 - 如果它們工作的話。

sapply()事情應該工作。下面是檢查每個變量是否是一個因素或不併且強制它,如果它是一個版本:

fevents3 <- sapply(fevents2, 
        function(x) if(is.factor(x)) { as.character(x) } else { x }) 

但需要注意的sapply()簡化爲一個矩陣,這將改變aggregate()方法指派:

> class(fevents3) 
[1] "matrix" 

相反,也許

fevents3 <- lapply(fevents2, 
        function(x) if(is.factor(x)) { as.character(x) } else { x }) 
fevents3 <- data.frame(fevents3, stringsAsFactors = FALSE) 

現在,如果你想申請rle()到分裂數據的每一列,並保持分開怎麼樣

spl <- split(fevents3, list(weeks = fevents3[, 1])) 
res <- lapply(spl, function(x) lapply(x[, 3:14], function(y) rle(y)$values)) 

其給出

> res 
$`1` 
$`1`$vv 
[1] "C RR" "G RR" "nil" "C AA" "G AA" "nil" 

$`1`$rv 
[1] "nil" "C VB" "G VB" "nil" "C VB" "G VB" "nil" 

$`1`$ja 
[1] "C VV" "nil" "C VV" "G VV" 

$`1`$aa 
[1] "C AJ" "nil" 

$`1`$bv 
[1] "nil" "C VR" "G VR" 

$`1`$aj 
[1] "C RJ" "nil" "C RV" "G RV" "nil" 

$`1`$vb 
[1] "C AA" "nil" "C AJ" "nil" "C AJ" 

$`1`$rj 
[1] "C JR" "G JR" 

$`1`$rr 
[1] "C BB" 

$`1`$vr 
[1] "C JA" "nil" "C RJ" "nil" "C RJ" 

$`1`$bb 
[1] "C BV" 

$`1`$jr 
[1] "nil" "C VB" "G VB" "nil" "C JA" 

哪一個是相同的答案,對於上述aggregate(),但與每個rle()輸出保持分離:

> unlist(res) 
1.vv1 1.vv2 1.vv3 1.vv4 1.vv5 1.vv6 1.rv1 1.rv2 1.rv3 1.rv4 1.rv5 
"C RR" "G RR" "nil" "C AA" "G AA" "nil" "nil" "C VB" "G VB" "nil" "C VB" 
1.rv6 1.rv7 1.ja1 1.ja2 1.ja3 1.ja4 1.aa1 1.aa2 1.bv1 1.bv2 1.bv3 
"G VB" "nil" "C VV" "nil" "C VV" "G VV" "C AJ" "nil" "nil" "C VR" "G VR" 
1.aj1 1.aj2 1.aj3 1.aj4 1.aj5 1.vb1 1.vb2 1.vb3 1.vb4 1.vb5 1.rj1 
"C RJ" "nil" "C RV" "G RV" "nil" "C AA" "nil" "C AJ" "nil" "C AJ" "C JR" 
1.rj2 1.rr 1.vr1 1.vr2 1.vr3 1.vr4 1.vr5 1.bb 1.jr1 1.jr2 1.jr3 
"G JR" "C BB" "C JA" "nil" "C RJ" "nil" "C RJ" "C BV" "nil" "C VB" "G VB" 
1.jr4 1.jr5 
"nil" "C JA" 
> aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), 
+   function(x) rle(as.character(x))$values) 
    weeks vv.1 vv.2 vv.3 vv.4 vv.5 vv.6 rv.1 rv.2 rv.3 rv.4 rv.5 rv.6 rv.7 ja.1 
1  1 C RR G RR nil C AA G AA nil nil C VB G VB nil C VB G VB nil C VV 
    ja.2 ja.3 ja.4 aa.1 aa.2 bv.1 bv.2 bv.3 aj.1 aj.2 aj.3 aj.4 aj.5 vb.1 vb.2 
1 nil C VV G VV C AJ nil nil C VR G VR C RJ nil C RV G RV nil C AA nil 
    vb.3 vb.4 vb.5 rj.1 rj.2 rr vr.1 vr.2 vr.3 vr.4 vr.5 bb jr.1 jr.2 jr.3 
1 C AJ nil C AJ C JR G JR C BB C JA nil C RJ nil C RJ C BV nil C VB G VB 
    jr.4 jr.5 
1 nil C JA 

[注:這是這裏唯一真正的,因爲數據片斷告訴你有隻有一個星期。我不記得unlist(res))看起來如果有超過一個星期。]

+0

是的,aggregate(fevents2 [,3:14],list(weeks = fevents2 [,1]),function(x)rle(as.character(x))$ values)很好用。我有幾個星期的時間,數據彙總如預期般運作。感謝您的解釋,這個問題讓我頭疼! – Chargaff