我是一名學生,試圖分析增加北印度景觀中捕食者殺死牲畜的可能性的因素。爲了做到這一點,我需要一個我將用於最終邏輯迴歸模型的協變量列表。我所提出的問題與我的一個協變量有關 - 即經常性牲畜屠殺的次數。 經常性事件被定義爲發生在小於500米處並且在之前7天內殺死的殺死。 (注 - 強調前述殺這是因爲一組復發事件的可超過7天或超過500米閾只要每個殺之間的空間是至多7天或500米延伸更長) 我有一個數據集其中包括以下內容 - 殺戮地點的GPS座標,殺戮日期,捕食者和獵物物種。對於這個問題,只有kill的日期和位置是相關的。 這是我的原始數據根據共享元素製作配對條目組
rawq <- structure(list(long = c(79.31957, 79.86758, 79.32283, 79.32253, 79.30502, 79.30047, 79.19935, 79.1984, 79.1984, 79.19318, 79.19247, 79.19243, 79.18512, 79.18333, 79.1541, 79.14438, 79.13998, 79.13538, 79.12522, 79.10168, 79.04163, 79.0405, 79.0364, 79.03505, 79.03473, 79.03428, 79.02517, 79.02458, 79.02428, 79.02265, 79.0197, 79.0197, 79.01967, 79.01965, 79.0193, 79.019, 79.01873, 79.0187, 79.0181, 79.01775, 79.00998, 79.00907, 79.00692, 79.00655, 79.0057, 79.00565, 79.00463, 79.00462, 79.00453, 79.00427, 79.0041, 79.00222, 79.00117, 79.00088, 79.00073, 78.9928, 78.9888, 78.98878, 78.98877, 78.9887, 78.98542, 78.98523, 78.9852, 78.98445, 78.97907, 78.9775, 78.9761, 78.97607, 78.97537, 78.97432, 78.97393, 78.97343, 78.97083, 78.95655, 78.9394, 78.92815, 78.92353, 78.92353, 78.92353, 78.92088, 78.92045, 78.91933, 78.91738, 78.90997, 78.90223, 78.90013, 78.90013, 78.88645, 78.8856, 78.8856, 78.8856, 78.88557, 78.86903, 78.86765, 78.85663, 78.8562, 78.85588, 78.8548, 78.83507, 78.80902),
lat = c(29.3684, 29.35495, 29.39068, 29.3907, 29.34828, 29.30717, 29.26702, 29.25967, 29.25967, 29.2672, 29.25588, 29.25588, 29.46787, 29.34175, 29.42, 29.42098, 29.41918, 29.34048, 29.49228, 29.53947, 29.37597, 29.37652, 29.3591, 29.35055, 29.31765, 29.37003, 29.3125, 29.35305, 29.40163, 29.39007, 29.37155, 29.35098, 29.35515, 29.35517, 29.37277, 29.37068, 29.37345, 29.36962, 29.32252, 29.37657, 29.35432, 29.37653, 29.38792, 29.36958, 29.36803, 29.36808, 29.33892, 29.37277, 29.36908, 29.36773, 29.34068, 29.40667, 29.3076, 29.40875, 29.32183, 29.4093, 29.4075, 29.40742, 29.40738, 29.35965, 29.36952, 29.35965, 29.35967, 29.39118, 29.38528, 29.36535, 29.3598, 29.3598, 29.40777, 29.36418, 29.30988, 29.37605, 29.36813, 29.30137, 29.40247, 29.40767, 29.40455, 29.40455, 29.40455, 29.4092, 29.40532, 29.35217, 29.40328, 29.4023, 29.38242, 29.37243, 29.37243, 29.37205, 29.36975, 29.36975, 29.36975, 29.36972, 29.3721, 29.37023, 29.36867, 29.38953, 29.36808, 29.36865, 29.3841, 29.35162),
Cattle.sp. = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Boffalo(Calf)", "Buffalo", "Buffalo(Calf)", "BufFalo(Calf)", "Buffalo(S/A/)", "Bullack(S/A/)", "Bullock", "Bullock(S,A/)", "Bullock(S/A)", "Bullock(S/A/)", "Cow", "Cow (Calf)", "Cow(calf)", "Cow(Calf)", "Cow(S/A)", "Cow(S/A/)", "Horse"), class "factor"),
Predator = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Leopard", "Tiger"), class = "factor"),
Date = structure(c(16891, 17036, 17141, 17141, 16833, 16898, 16845, 16845, 16845, 17125, 17125, 17125, 17005, 17100, 17015, 16813, 16886, 17064, 17096, 16937, 17070, 17101, 17020, 17079, 17164, 16993, 16958, 17010, 17132, 17075, 17023, 17010, 16975, 16975, 16987, 17121, 17054, 17090, 16938, 16984, 16928, 16967, 16927, 16967, 17084, 16975, 16975, 16927, 16975, 16972, 16977, 16928, 17020, 17030, 16979, 16822, 17087, 17156, 17156, 17114, 16959, 17400, 17035, 17037, 17056, 16845, 16984, 16984, 16961, 16861, 17100, 17010, 17034, 16823, 17039, 16819, 16968, 16968, 16968, 16802, 16942, 17098, 16975, 16975, 16836, 17138, 17138, 16808, 16936, 16941, 16949, 16949, 16986, 16986, 16986, 16914, 16925, 16884, 17130, 17041), class = "Date")),
.Names = c("long", "lat", "Cattle.sp.", "Predator", "Date"), row.names = c(NA, 100L), class = "data.frame")
我用下面的代碼的一個例子,首先生成用於分別距離和時間距離成對矩陣。我製作了一個表格coloc.distance,它包含所有距離彼此不到500m的點對。然後我選擇了時間少於7天的行。生成由距離爲< 500米和時間爲< 7天的所有點對組成的最終表格。
raw = read.csv("newraw.csv", header = T)
library(measurements)
library(fossil)
library(data.table)
raw$Date <- as.Date(raw$Date) #formatting date
a = as.matrix(dist(raw$Date)) #generate pairwise distance matrix
m <- as.matrix(earth.dist(raw)) #pairwise distances
sep.km <- 0.5 #threshold distance
coloc.distance <- data.table(which(m<sep.km, arr.ind=T)) #choose
all pairs for which distance falls under threshold
setnames(coloc.distance,c("row","col"),c("SD.1","SD.2")) #adjust names
coloc.distance <- coloc.distance[SD.1<SD.2,] #lower triangular matrix
coloc.distance[,dist:=m[SD.1,SD.2], by="SD.1,SD.2"]
coloc.distance[,time:=a[SD.1,SD.2], by="SD.1,SD.2"]
killsite <- data.table(id=as.integer(rownames(raw)),raw)
setkey(coloc.distance,SD.1)
coloc.distance[killsite,c("long.1","lat.1"):=list(long,lat)]
setkey(coloc.distance,SD.2)
coloc.distance[killsite,c("long.2","lat.2"):=list(long,lat)]
setkey(coloc.distance, SD.1)
coloc.distance[killsite, "date.1":=list(Date)]
setkey(coloc.distance, SD.2)
coloc.distance[killsite,"date.2" :=list(Date)]
finalrows <- data.table(which(coloc.distance$time<7, arr.ind = T))
final <- coloc.distance[finalrows$V1,]
print(final)
我的問題是,我不只是想要對經常性事件。在某些情況下,有不止一次或兩次經常性事件。我需要一種方法來將共享點的對分成組。 例如,如果我的代碼返回以下對 -
[1,2] [1,3] [4,5] [6,7] [3,8]
組我會分配是[1,2,3,8]和[4,5]。有沒有可以自動生成這些組的功能。很明顯,因爲它涉及到3
對於數據幀的「最終」從上面的代碼中8也與1和2,這是我希望給收到
該輸出數據由代碼生成的用於第一50個條目的最後數據幀
final <- structure(list(SD.1 = c(3L, 8L, 11L, 33L, 35L, 46L, 44L, 46L, 49L, 47L, 58L),
SD.2 = c(4L, 9L, 12L, 34L, 40L, 49L, 50L, 50L, 50L, 51L, 59L),
dist = c(0.0291582094908988, 0, 0.00388155718182341, 0.00295090125908757, 0.448566716059333, 0.155426365139648, 0.301966362430361, 0.139316330866369, 0.152255819956674, 0.202390901062769, 0.00455334236486617),
time = c(0, 0, 0, 0, 3, 0, 5, 3, 3, 2, 0),
long.1 = c(79.32283, 79.1984, 79.19247, 79.01967, 79.0193, 79.00565, 79.00655, 79.00565, 79.00453, 79.00463, 78.98878),
lat.1 = c(29.39068, 29.25967, 29.25588, 29.35515, 29.37277, 29.36808, 29.36958, 29.36808, 29.36908, 29.33892, 29.40742),
long.2 = c(79.32253, 79.1984, 79.19243, 79.01965, 79.01775, 79.00453, 79.00427, 79.00427, 79.00427, 79.0041, 78.98877),
lat.2 = c(29.3907, 29.25967, 29.25588, 29.35517, 29.37657, 29.36908, 29.36773, 29.36773, 29.36773, 29.34068, 29.40738),
date.1 = structure(c(17141, 16845, 17125, 16975, 16987, 16975,16967, 16975, 16975, 16975, 17156), class = "Date"),
date.2 = structure(c(17141, 16845, 17125, 16975, 16984, 16975, 16972, 16972, 16972, 16977, 17156), class = "Date")),
.Names = c("SD.1", "SD.2", "dist", "time", "long.1", "lat.1", "long.2", "lat.2", "date.1", "date.2"),
sorted = "SD.2", class = c("data.table", "data.frame"), row.names = c(NA, -11L))
正如你可以看到有11對在此表 -
[3,4] [8,9] [11,12 ] [33,34] [35,40] [46,49] [44,50] [46,50] [49,50] [47,51] [58,59]
足夠的輸出是如下 -
[3,4] [8,9] [11,12] [33,34] [35,40] [44,46,49,50] [58 ,59]
在這個有限的數據集中,只有一個組超過2個事件。當然,在我的大盤裏,還有更多。這只是最容易獲得的一套。
您可以爲'killsite'對象提供一些數據嗎?我可以將你的示例數據讀入一個名爲'raw'的對象,然後運行你的代碼,直到'coloc.distance [killsite,c(「long.1」,「lat.1」):= list(long,lat )]'因爲無法找到對象'killsite'而失敗。 – meenaparam
糟糕。忘了在這裏定義它。現在嘗試運行代碼。對不起, –
太棒了!你是否也可以將final < - coloc.distance [final $ V1,]'改爲final < - coloc.distance [finalrows $ V1,]'以使該位正常工作? – meenaparam