0
我正在嘗試構建如下的數據幀(如DataProfile
)的彙總表。 這個想法是將每一列轉換成一行,併爲count,nulls,not nulls,unique添加變量,並添加這些變量的額外變量。轉置數據幀變量並在[r]中添加空值,唯一的計數
看起來應該有更好的方法來做到這一點。有沒有這樣的功能?
#trying to write the functions within dplyr & magrittr framework
library(tidyverse)
mtcars[2,2] <- NA # Add a null to test completeness
#
total <- mtcars %>% summarise_all(funs(n())) %>% melt
nulls <- mtcars %>% summarise_all(funs(sum(is.na(.)))) %>% melt
filled <- mtcars %>% summarise_all(funs(sum(!is.na(.)))) %>% melt
uniques <- mtcars %>% summarise_all(funs(length(unique(.)))) %>% melt
mtcars %>% summarise_all(funs(n_distinct(.))) %>% melt
#Build a Data Frame from names of mtcars and add variables with mutate
DataProfile <- as.data.frame(names(mtcars))
DataProfile <- DataProfile %>% mutate(Total = total$value,
Nulls = nulls$value,
Filled = filled $value,
Complete = Filled/Total,
Cardinality = uniques$value,
Uniqueness = Cardinality/Total,
Distinctness = Cardinality/Filled)
DataProfile
#These are other attempts with Base R, but they are harder to read and don't play well with summarise_all
sapply(mtcars, function(x) length(unique(x[!is.na(x)]))) %>% melt
rapply(mtcars,function(x)length(unique(x))) %>% melt