2017-05-26 75 views
0

我正在使用flights14.csv data來測試data.table中的一些代碼。使用.SD,lapply和函數的data.table轉換隻返回一行

特別是,我想將所有整數變量轉換爲數字。

> glimpse(flights) 
Observations: 253,316 
Variables: 17 
$ year  <int> 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,... 
$ month  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,... 
$ day  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,... 
$ dep_time <int> 914, 1157, 1902, 722, 1347, 1824, 2133, 1542, 1509, 1848, 1655, 1752, 1253, 1907, 1720, 1733, 1640, 1714, 1611, 553, 62... 
$ dep_delay <int> 14, -3, 2, -8, 2, 4, -2, -3, -1, -2, -5, 7, 3, 142, -5, 18, 25, -1, 191, -7, -7, -8, -2, -3, 44, -1, 3, -5, 26, 52, 55,... 
$ arr_time <int> 1238, 1523, 2224, 1014, 1706, 2145, 37, 1906, 1828, 2206, 2003, 2120, 1351, 2223, 1819, 2024, 2001, 2036, 1910, 739, 81... 
$ arr_delay <int> 13, 13, 9, -26, 1, 0, -18, -14, -17, -14, -17, -5, 1, 133, -26, 69, 36, 1, 185, -6, 0, -17, 15, 1, 42, -2, 12, 24, 46, ... 
$ cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 
$ carrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA",... 
$ tailnum <chr> "N338AA", "N335AA", "N327AA", "N3EHAA", "N319AA", "N3DEAA", "N323AA", "N328AA", "N5FJAA", "N3HYAA", "N5CFAA", "N332AA",... 
$ flight <int> 1, 3, 21, 29, 117, 119, 185, 133, 145, 235, 172, 177, 178, 181, 256, 199, 211, 291, 300, 301, 303, 305, 307, 313, 317, ... 
$ origin <chr> "JFK", "JFK", "JFK", "LGA", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", ... 
$ dest  <chr> "LAX", "LAX", "LAX", "PBI", "LAX", "LAX", "LAX", "LAX", "MIA", "SEA", "MIA", "SFO", "BOS", "LAX", "BOS", "ORD", "IAH", ... 
$ air_time <int> 359, 363, 351, 157, 350, 339, 338, 356, 161, 349, 161, 365, 39, 345, 35, 155, 234, 232, 214, 142, 143, 139, 145, 139, 1... 
$ distance <int> 2475, 2475, 2475, 1035, 2475, 2454, 2475, 2475, 1089, 2422, 1085, 2586, 187, 2475, 187, 740, 1417, 1521, 1372, 733, 733... 
$ hour  <int> 9, 11, 19, 7, 13, 18, 21, 15, 15, 18, 16, 17, 12, 19, 17, 17, 16, 17, 16, 5, 6, 6, 7, 8, 10, 14, 10, 11, 13, 16, 17, 18... 
$ min  <int> 14, 57, 2, 22, 47, 

我的代碼如下:

> data <- flights[, lapply(.SD, function(x){ifelse(is.integer(x), as.numeric(x), x)})] 

轉型成功執行,但只返回一個記錄。

> glimpse(data) 
Observations: 1 
Variables: 18 
$ year  <dbl> 2014 
$ month  <dbl> 1 
$ day  <dbl> 1 
$ dep_time <dbl> 914 
$ dep_delay <dbl> 14 
$ arr_time <dbl> 1238 
$ arr_delay <dbl> 13 
$ cancelled <dbl> 0 
$ carrier <chr> "AA" 
$ tailnum <chr> "N338AA" 
$ flight <dbl> 1 
$ origin <chr> "JFK" 
$ dest  <chr> "LAX" 
$ air_time <dbl> 359 
$ distance <dbl> 2475 
$ hour  <dbl> 9 
$ min  <dbl> 14 
$ speed  <dbl> 6.89415 

你能向我解釋這個結果並告訴我如何修復代碼嗎?

您的建議將不勝感激。

+0

您對'ifelse'的理解可能有缺陷。你應該使用這個:'flights [,lapply(.SD,function(x){if(is.integer(x))as.numeric(x)else x})]'。 – nicola

回答

1
library(data.table) 
flights <- fread("https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv") 
needed_names <- names(c(do.call(c,c(flights[,lapply(.SD,class)])))[c(do.call(c,c(flights[,lapply(.SD,class)])))=="integer"]) 

flights[, c(needed_names) := lapply(.SD, as.numeric), .SDcols = needed_names] 

str(flights) 
Classes ‘data.table’ and 'data.frame': 253316 obs. of 11 variables: 
$ year  : num 2014 2014 2014 2014 2014 ... 
$ month : num 1 1 1 1 1 1 1 1 1 1 ... 
$ day  : num 1 1 1 1 1 1 1 1 1 1 ... 
$ dep_delay: num 14 -3 2 -8 2 4 -2 -3 -1 -2 ... 
$ arr_delay: num 13 13 9 -26 1 0 -18 -14 -17 -14 ... 
$ carrier : chr "AA" "AA" "AA" "AA" ... 
$ origin : chr "JFK" "JFK" "JFK" "LGA" ... 
$ dest  : chr "LAX" "LAX" "LAX" "PBI" ... 
$ air_time : num 359 363 351 157 350 339 338 356 161 349 ... 
$ distance : num 2475 2475 2475 1035 2475 ... 
$ hour  : num 9 11 19 7 13 18 21 15 15 18 ... 
- attr(*, ".internal.selfref")=<externalptr> 
+1

選擇整數列名稱的簡短方法:'names(flights)[sapply(flights,is.integer)]' – Jaap