0
我正在使用flights14.csv data來測試data.table中的一些代碼。使用.SD,lapply和函數的data.table轉換隻返回一行
特別是,我想將所有整數變量轉換爲數字。
> glimpse(flights)
Observations: 253,316
Variables: 17
$ year <int> 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,...
$ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ day <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ dep_time <int> 914, 1157, 1902, 722, 1347, 1824, 2133, 1542, 1509, 1848, 1655, 1752, 1253, 1907, 1720, 1733, 1640, 1714, 1611, 553, 62...
$ dep_delay <int> 14, -3, 2, -8, 2, 4, -2, -3, -1, -2, -5, 7, 3, 142, -5, 18, 25, -1, 191, -7, -7, -8, -2, -3, 44, -1, 3, -5, 26, 52, 55,...
$ arr_time <int> 1238, 1523, 2224, 1014, 1706, 2145, 37, 1906, 1828, 2206, 2003, 2120, 1351, 2223, 1819, 2024, 2001, 2036, 1910, 739, 81...
$ arr_delay <int> 13, 13, 9, -26, 1, 0, -18, -14, -17, -14, -17, -5, 1, 133, -26, 69, 36, 1, 185, -6, 0, -17, 15, 1, 42, -2, 12, 24, 46, ...
$ cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ carrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA",...
$ tailnum <chr> "N338AA", "N335AA", "N327AA", "N3EHAA", "N319AA", "N3DEAA", "N323AA", "N328AA", "N5FJAA", "N3HYAA", "N5CFAA", "N332AA",...
$ flight <int> 1, 3, 21, 29, 117, 119, 185, 133, 145, 235, 172, 177, 178, 181, 256, 199, 211, 291, 300, 301, 303, 305, 307, 313, 317, ...
$ origin <chr> "JFK", "JFK", "JFK", "LGA", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", ...
$ dest <chr> "LAX", "LAX", "LAX", "PBI", "LAX", "LAX", "LAX", "LAX", "MIA", "SEA", "MIA", "SFO", "BOS", "LAX", "BOS", "ORD", "IAH", ...
$ air_time <int> 359, 363, 351, 157, 350, 339, 338, 356, 161, 349, 161, 365, 39, 345, 35, 155, 234, 232, 214, 142, 143, 139, 145, 139, 1...
$ distance <int> 2475, 2475, 2475, 1035, 2475, 2454, 2475, 2475, 1089, 2422, 1085, 2586, 187, 2475, 187, 740, 1417, 1521, 1372, 733, 733...
$ hour <int> 9, 11, 19, 7, 13, 18, 21, 15, 15, 18, 16, 17, 12, 19, 17, 17, 16, 17, 16, 5, 6, 6, 7, 8, 10, 14, 10, 11, 13, 16, 17, 18...
$ min <int> 14, 57, 2, 22, 47,
我的代碼如下:
> data <- flights[, lapply(.SD, function(x){ifelse(is.integer(x), as.numeric(x), x)})]
轉型成功執行,但只返回一個記錄。
> glimpse(data)
Observations: 1
Variables: 18
$ year <dbl> 2014
$ month <dbl> 1
$ day <dbl> 1
$ dep_time <dbl> 914
$ dep_delay <dbl> 14
$ arr_time <dbl> 1238
$ arr_delay <dbl> 13
$ cancelled <dbl> 0
$ carrier <chr> "AA"
$ tailnum <chr> "N338AA"
$ flight <dbl> 1
$ origin <chr> "JFK"
$ dest <chr> "LAX"
$ air_time <dbl> 359
$ distance <dbl> 2475
$ hour <dbl> 9
$ min <dbl> 14
$ speed <dbl> 6.89415
你能向我解釋這個結果並告訴我如何修復代碼嗎?
您的建議將不勝感激。
您對'ifelse'的理解可能有缺陷。你應該使用這個:'flights [,lapply(.SD,function(x){if(is.integer(x))as.numeric(x)else x})]'。 – nicola