2014-01-22 44 views
0

我試圖將10列的值逐行地轉換爲NAs。如果每個[1 row, 10 columns]的有效觀測值之和小於4,則該範圍內的所有值都必須轉換爲NA。通過條件將所有行值轉換爲NA

sum(!is.na(x)) < 4 

不過,我可以得到最遠的是改變列TRUEFALSE。我錯過了什麼?

的代碼我現在有:

for (i in seq(2,22,11)) { 
test[,i:(i+9)] <- apply(test[,i:(i+9)], 1, function(x) {is.na(x) <- sum(!is.na(x)) < 4}) 
} 

什麼代碼看起來像只有1 [1 row, 10 columns]組合:

is.na(test[1,13:22]) <- sum(!is.na(test[1,13:22])) < 4 

我的數據(10x22數據幀):

structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945, 
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018, 
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462, 
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066, 
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974, 
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647, 
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_ 
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332, 
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725, 
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294, 
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661, 
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369, 
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992, 
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986, 
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044, 
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), `3` = c(26.85134277, 27.53970357, 27.51278, 
28.3514299, 27.78354957, 27.10130081, 26.80624001, 26.21709938, 
26.94341987, 27.314133), `4` = c(31.19462648, 31.15539722, 31.21277968, 
31.05590996, 30.81308378, 31.25366008, 31.49064028, 31.04006683, 
31.10000038, 31.38852975), `5` = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_ 
), `6` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `7` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), `8` = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
`9` = c(32.5, 32.23973073, 32.59473101, 32.74366054, 32.57046727, 
31.45634057, 31.95467871, 31.40423339, 31.66785945, 32.32585331 
), `10` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `11` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_)), .Names = c("a", "b", "c", 
"d", "e", "f", "g", "h", "i", "j", "k", "1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11"), row.names = c(NA, 10L), class = "data.frame") 

預期的輸出如下所示:

structure(list(a = c(23.02522869, 22.75698125, 23.25638988, 23.4618945, 
22.79093387, 23.03077455, 22.79532129, 22.88151619, 22.55282018, 
22.82755365), b = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), c = c(25.1671462, 
24.50974654, 24.37007385, 25.79732045, 25.97977955, 27.75533066, 
25.61258476, 24.68043644, 25.06214083, 24.14456558), d = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), e = c(23.20319674, 23.01461971, 23.44023974, 
23.43422643, 22.74593993, 23.10000038, 22.84586729, 22.76342647, 
22.8174631, 22.90336953), f = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_ 
), g = c(22.21603961, 22.13781649, 21.77009694, 21.97247332, 
20.35237025, 21.3321401, 20.67065374, 20.37071737, 20.35318725, 
21.46554323), h = c(21.41724009, 22.76705639, 21.98989349, 21.87247294, 
20.50963963, 20.83750966, 21.52251395, 20.53171348, 20.54405661, 
21.23782679), i = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), j = c(22.83327369, 
22.67193019, 23.45060031, 23.46845388, 23.08248018, 23.96606992, 
23.07933679, 23.02442296, 22.37349943, 22.90674004), k = c(23.14628986, 
23.61461919, 23.35085341, 23.8013399, 22.76644009, 23.06428044, 
23.3082666, 23.06342696, 23.05868003, 23.20000076), `1` = c(25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076, 25.45000076, 
25.45000076, 25.45000076, 25.45000076, 25.45000076), `2` = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), `5` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
`6` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `7` = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), `8` = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), `9` = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), `10` = c(NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), `11` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA)), .Names = c("a", "b", "c", "d", "e", "f", "g", "h", 
"i", "j", "k", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", 
"11"), row.names = c(NA, 10L), class = "data.frame") 
+1

請出示預期的輸出。我不明白你在做什麼。 – Roland

+0

'test [which(rowSums(is.na(test)* 1L)> = 4),1:10] < - NA'呢? – lukeA

+0

更新了問題以澄清。 @lukeA你的建議不是我要找的,但謝謝! – amzu

回答

1
test[which(rowSums(is.na(test)*1L) >= 4), as.character(2:11)] <- NA 

下一個嘗試:

test <- matrix(runif(100*10), ncol=100) 
test[9, 28:37] <- NA 
idx <- c(6:15, 17:26, 28:37, 39:48, 50:59, 61:70) 
test[,idx] <- apply(test[,idx], 1, function(x) {ifelse(sum(is.na(x)) >= 4, x, NA)}) 
test 
+0

它在'for'循環中不起作用。我需要循環,因爲我的實際數據框有100多個變量。我得到這個錯誤:'* tmp * [[j]]中的錯誤:遞歸索引失敗在級別2' – amzu

+0

'for(i in seq(2,22,11)){ test3 [which(rowSums(is.na (test3)* 1L)> = 4),as.character(i:(i + 9))] < - NA }' – amzu

+0

我不明白你的for循環的需要。但無論如何,通過將函數改爲function(x){ifelse(sum(!is.na(x))<4,x,NA)},您可以首先擺脫TRUE/FALSE。 – lukeA