2014-02-25 28 views
0

在R中,我們如何基於另一列中的唯一值以編程方式創建新變量?R根據列值創建1/0 T/F對比度_矩陣/新變量

數據幀的一個簡單的例子,我們可能會開始:

structure(list(obsNum = structure(c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 2L, 3L), .Label = c("obs1", "obs10", "obs11", "obs2", 
"obs3", "obs4", "obs5", "obs6", "obs7", "obs8", "obs9"), class = "factor"), 
    charVector = structure(c(1L, 2L, 3L, 2L, 2L, 3L, 1L, 1L, 
    2L, 2L, 3L), .Label = c("blue", "green", "red"), class = "factor")), .Names = c("obsNum", 
"charVector"), class = "data.frame", row.names = c(NA, -11L)) 

obsNum charVector 
    obs1  blue 
    obs2  green 
    obs3   red 
    obs4  green 
    obs5  green 
    obs6   red 
    obs7  blue 
    obs8  blue 
    obs9  green 
obs10  green 
obs11   red 

,並在那裏我想結束了:

structure(list(obsNum = structure(c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 2L, 3L), .Label = c("obs1", "obs10", "obs11", "obs2", 
"obs3", "obs4", "obs5", "obs6", "obs7", "obs8", "obs9"), class = "factor"), 
    charVector = structure(c(1L, 2L, 3L, 2L, 2L, 3L, 1L, 1L, 
    2L, 2L, 3L), .Label = c("blue", "green", "red"), class = "factor"), 
    blue = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L), green = c(0L, 
    1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L), red = c(0L, 0L, 
    1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L)), .Names = c("obsNum", 
"charVector", "blue", "green", "red"), class = "data.frame", row.names = c(NA, 
-11L)) 

obsNum charVector blue green red 
    obs1  blue 1  0 0 
    obs2  green 0  1 0 
    obs3  red 0  0 1 
    obs4  green 0  1 0 
    obs5  green 0  1 0 
    obs6  red 0  0 1 
    obs7  blue 1  0 0 
    obs8  blue 1  0 0 
    obs9  green 0  1 0 
obs10  green 0  1 0 
obs11  red 0  0 1 

我要多步驟的解決方案非常開放例如:首先創建新變量;然後根據charVec評估每個新變量(名稱),每次一個變量。假設觀察的順序將被保留,創建一個單獨的data.frame也可以是cbind'編輯到起始文件也是完全正確的。

在此先感謝和問候!

回答

2
cbind(dat, model.matrix(~ . - 1, dat["charVector"])) 

## obsNum charVector charVectorblue charVectorgreen charVectorred                                           
## 1 obs1  blue    1    0    0                                           
## 2 obs2  green    0    1    0                                           
## 3 obs3  red    0    0    1                                           
## 4 obs4  green    0    1    0                                           
## 5 obs5  green    0    1    0                                           
## 6 obs6  red    0    0    1                                           
## 7 obs7  blue    1    0    0                                           
## 8 obs8  blue    1    0    0                                           
## 9 obs9  green    0    1    0                                           
## 10 obs10  green    0    1    0                                           
## 11 obs11  red    0    0    1 
+0

'@Jake Burkhead'和'@Ananda Mahto':既大且可行的解決方案。抱歉,延遲迴復。謝謝! – user2621147

2

你可以使用table(和as.data.frame.matrix保留表格格式):

x <- as.data.frame.matrix(table(df)) 
cbind(df, x[match(df$obsNum, rownames(x)), ]) 
#  obsNum charVector blue green red 
# obs1 obs1  blue 1  0 0 
# obs2 obs2  green 0  1 0 
# obs3 obs3  red 0  0 1 
# obs4 obs4  green 0  1 0 
# obs5 obs5  green 0  1 0 
# obs6 obs6  red 0  0 1 
# obs7 obs7  blue 1  0 0 
# obs8 obs8  blue 1  0 0 
# obs9 obs9  green 0  1 0 
# obs10 obs10  green 0  1 0 
# obs11 obs11  red 0  0 1 
0

下面是一個使用循環來定義變量0/1的一種方式。它利用了TRUE/FALSE - > 0/1轉換成數字這一事實。

colors <- unique(df$charVector) 
to.append <- matrix(0, nrow = dim(df[1]), ncol = length(colors)) 
colnames(to.append) <- colors 

i <- 0 
for(color in colors){ 
    i <- i + 1 
    to.append[, i] <- df$charVector == color 
} 
df <- cbind(df, to.append)