1
我有一些樹的結構模型(如樹木與木材,而不是統計樹木)。這些模型可以用簡單的數據框表示,每行表示一個分支段,並且包含關於該段的長度和父行的信息。例如爲:如何高效地計算樹形拓撲中的路徑長度?
len parent_row
.3 0
.4 1
.2 2
.5 1
...
我需要計算的路徑長度的至少每一個尖端,但可能很容易地在每個「節點」或分支端。
我想知道是否有一個更有效的方式做到這一點比循環通過段?下面的例子足夠快地運行,但是當運行跨越數千個分段和數百棵樹時,需要數小時/天。我嘗試遞歸,雖然優雅,但速度非常緩慢。更好的解決方案比我的下面非常感謝!
感謝, 艾莉
calc_pathlen <- function(tree_structure) {
tree_structure[1,]$path_len = tree_structure[1,]$len
for (this_row in 2:nrow(tree_structure)) {
prev_len = tree_structure[ tree_structure[this_row,]$parent_row, ]$path_len
tree_structure[this_row,]$path_len = tree_structure[this_row,]$len + prev_len
}
return(tree_structure)
}
treestr =
structure(list(len = c(2.3083, 1.632, 1.5825, 1.5592, 1.4847,
1.367, 1.3112, 1.3211, 1.4324, 1.295, 1.1503, 0.989, 0.8525,
0.9726, 0.8982, 0.8836, 0.8605, 0.7188, 0.5755, 0.5589, 0.6966,
0.6531, 0.6741, 0.7582, 0.817, 0.7074, 0.7248, 0.5725, 0.6796,
0.405, 0.5567, 0.6619, 0.473, 0.3485, 0.3946, 0.3593, 0.6572,
0.3869, 0.4623, 3.4597, 0.5878, 0.7538, 0.3745, 2.3949, 0.3239,
2.2521, 0.7563, 6.6197, 1.026, 0.9864, 0.6527, 0.6708, 0.4002,
0.5858, 1.3728, 0.4469, 0.3309, 0.7075, 0.7637, 0.8429, 0.9874,
0.1311, 0.3487, 1.037, 0.9087, 0.8871, 1.0236, 0.4481, 0.5124,
0.4371, 0.4766, 0.4553, 0.5172, 0.4781, 0.5084, 0.4263, 0.3542,
0.3062, 0.2901, 0.2042, 0.3126, 0.2366, 0.2714, 0.2741, 0.3511,
0.1613, 0.3028, 0.1689, 0.0417, 0.2132, 0.1269, 0.2181, 0.1868,
0.1553, 0.1151, 0.1602, 0.1786, 0.1649, 0.1967, 0.1584, 0.1366,
0.13, 0.1722, 0.1725, 0.111, 0.1752, 0.1306, 0.1582, 0.0942,
0.041, 0.356, 0.1958, 0.1212, 0.4129, 0.3499, 0.1706, 0.4647,
0.272, 0.4036, 0.0949, 0.1161, 0.1271, 0.2153, 0.1685, 0.181,
0.1302, 0.1229, 0.1146, 0.1393, 0.1722, 0.2033, 0.099, 0.1988,
0.1264, 0.1377, 0.1634, 0.2223, 0.1466, 0.1376, 0.1788, 0.1946,
0.1146, 0.2983, 0.1176, 0.0951, 0.1279, 0.095, 0.1909, 0.162,
0.1562, 0.1225, 0.1471, 0.1976, 0.2194, 0.1256, 0.2105, 0.1147,
0.1855, 0.1011, 0.1735, 0.1254, 0.1793, 0.8308, 0.2481, 0.2479,
0.1788, 0.2907, 0.2904, 0.3411, 0.224, 0.2298, 0.1942, 0.1872,
0.3311, 0.2295, 0.1466, 4.4547, 0.553, 0.2382, 0.2559, 0.2991,
0.3763, 0.117, 0.2036, 0.1795, 0.1933, 0.1765, 0.3909, 0.4619,
2.5901, 0.2655, 0.5306, 0.6369, 0.478, 2.5033, 0.4954, 0.2455,
0.3417, 0.0605, 0.4264, 0.4569, 0.3266, 0.0666, 0.2642, 0.3017,
0.0154, 0.3844, 0.2846, 0.2823, 0.3533, 0.1624, 0.1551, 0.1903,
0.169, 0.1362, 0.1714, 0.1899, 0.1539, 0.1477, 0.1815, 0.2597,
0.1302, 0.225, 0.3408, 0.2535, 0.521, 0.2328, 0.2115, 0.2387,
0.1519, 0.1885, 0.1678, 0.1398, 0.3438, 0.458, 0.197, 0.2821,
0.3694, 0.2039, 0.1518, 0.2985, 1.2251, 0.3328, 0.1593, 0.2945,
0.1412, 0.2308, 0.2833, 2.6558, 0.3388, 0.3979, 0.2965, 0.2548,
3.4452, 0.1267, 0.1801, 0.1494, 0.4831, 2.552, 0.1391, 0.3995,
0.2753, 0.4019, 0.39, 0.7409, 0.5362, 0.7483, 1.6991, 0.2951,
0.389, 0.7077, 0.7083, 0.6922, 0.5679, 0.2076, 0.2729, 0.3376,
0.2368, 0.1639, 0.1788, 0.1885, 0.2328, 0.2057, 0.2091, 0.2673,
0.1848, 0.1389, 0.1879, 0.1537, 0.3528, 0.2515, 0.2249, 0.259,
0.1838, 0.2231, 0.228, 0.1753, 0.267, 0.2278, 0.257), parent_row = c(0L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 8L, 11L, 63L, 64L, 65L, 66L,
67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 17L, 86L, 87L, 88L, 89L, 90L, 91L, 92L,
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L,
105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L,
116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L,
127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L,
138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L,
149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L,
160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L,
171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L,
182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L,
193L, 194L, 195L, 13L, 197L, 13L, 199L, 16L, 201L, 17L, 203L,
204L, 19L, 206L, 207L, 22L, 209L, 210L, 211L, 212L, 213L, 214L,
215L, 216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L,
226L, 227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L,
237L, 238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L, 247L,
248L, 249L, 250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L, 258L,
259L, 260L, 261L, 262L, 263L, 264L, 265L, 266L, 267L, 268L, 269L,
270L, 271L, 272L, 273L, 22L, 275L, 276L, 277L, 278L, 279L, 280L,
281L, 282L, 283L, 284L, 22L, 286L, 287L, 23L, 289L, 290L, 291L,
292L, 293L, 294L, 295L, 296L, 297L, 298L, 299L), path_len = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("len",
"parent_row", "path_len"), row.names = c(NA, 300L), class = "data.frame")
calc_pathlen(treestr)