2016-04-22 73 views
-1

說我有data.frame分流/融數據

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4)) 
df 
#> letters value 
#> 1 a, b  1 
#> 2  a  2 
#> 3  b  3 
#> 4 a, c  4 

,我想分裂/融化

#> letters value 
#> 1  a  1 
#> 2  b  1 
#> 3  a  2 
#> 4  b  3 
#> 5  a  4 
#> 6  c  4 

下面爲了讓每個letters

回答

3

我們可以用cSplit

library(splitstackshape) 
cSplit(df, "letters", ", ", "long") 
# letters value 
#1:  a  1 
#2:  b  1 
#3:  a  2 
#4:  b  3 
#5:  a  4 
#6:  c  4 
3

您可以單條記錄試用strsplit。根據,拆分字符,然後重複值字段。

char <- strsplit(as.character(df$letters), ',') 
data.frame(letter=unlist(char), value=rep(df$value, sapply(char, FUN=length))) 


    letter value 
#1  a  1 
#2  b  1 
#3  a  2 
#4  b  3 
#5  a  4 
#6  c  4 

按在更快的結果,你可以嘗試的意見@docendo discimus更新,

char <- strsplit(as.character(df$letters), ',', fixed = T) 
data.frame(letter=unlist(char), value=rep(df$value, lengths(char))) 
+2

你可以使用'fixed = TRUE'來加快字符串分割和'length(char)'而不是'sapply(char,FUN = length)' –

1

你也可以像這樣逐步的

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4)) 
l <- do.call(c,strsplit(as.character(df$letters),split = ",")) 
Indx <- grepl(",",df$letters) 
x <- c() 
for(i in 1:length(Indx)){ 
    ifelse(Indx[i],x <- c(x,rep(df$value[i],2)),x<-c(x,df$value[i])) 
} 

df <- data.frame(l=l,x=x) 
3
library(dplyr) 
library(tidyr) 

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4)) 

df %>% mutate(letters = strsplit(as.character(letters), ", ")) %>% unnest(letters) 

Source: local data frame [6 x 2] 

    value letters 
    (dbl) (chr) 
1  1  a 
2  1  b 
3  2  a 
4  3  b 
5  4  a 
6  4  c 
+1

好的答案 - 你也可以使用'strsplit(as.character ),「,」,fixed = TRUE)'用於更快的字符串分割 –