我想使用tmerge()
函數來轉換數據集以用於Cox迴歸框架的Andersen-Gill擴展重複的事件。見Therneau的excellent vignette。Andersen-Gill計數過程中的無風險區間使用survival生存的Cox迴歸公式:: tmerge()
我想說明的是,個人不受事件發生後30天重複的事件,也就是我想單獨退出的風險暫時設置,例如,如果當個人是不是在事件發生風險,它被忽略。
原始方法是迭代地添加所有事件,然後簡單地將30添加到tstart
變量。但是,這可能導致實例爲tstart >= tstop
,並且在更大和更復雜的數據集中將會是災難性的。
我曾嘗試利用tmerge()
函數與forloop糾正我上面提到的問題。對於這個例子,我將在生存包中使用cgd
數據。
編輯:見更正for循環低於
library(survival)
cgd0 <- cgd0
newcgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
for(i in 1:7){
x <- paste0("etime", i) #etime1:etime7
# iteratively add each event
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
# select only observations that end in an event and iteratively create
# cumulative number of events for each individual
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1),
id = id, cum_infect = cumtdc(tstop))
# for each loop add 30 days to the start time of the ith cumulative event
newcgd[which(newcgd$cum_infect == i), "tstart"] <-
newcgd[which(newcgd$cum_infect == i), "tstart"] + 30
# for each loop remove observations were the start time >= stop time
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
}
attr(newcgd, "tcount")
# early late gap within boundary leading trailing tied
#infect 0 0 0 44 0 0 0 0
#cum_infect 0 0 0 0 44 0 0 0
#infect 0 0 4 11 0 1 1 0
#cum_infect 0 0 0 0 11 0 45 0
#infect 0 0 2 6 0 0 0 0
#cum_infect 0 0 0 0 6 0 56 0
#infect 0 0 1 2 0 0 0 0
#cum_infect 0 0 0 0 6 0 58 0
#infect 0 0 0 2 0 0 0 0
#cum_infect 0 0 0 0 8 0 58 0
#infect 0 0 0 1 0 0 0 0
#cum_infect 0 0 0 0 9 0 58 0
#infect 0 0 0 1 0 0 0 0
#cum_infect 0 0 0 0 10 0 58 0
我相信這個解決方案是正確的。然而,這是生存分析中的一個常見問題,我擔心的是,我忽略了一些東西,代碼並沒有做到我認爲的那樣。
二)我俯瞰驗證方式R中要做到這一點
iii)如i)和ii)沒有問題,我相信這個代碼是低效的,不知道是否有改善明顯的方式執行速度。
--------------------------------------------- -------------------------------------------------- ------------------------------------
編輯:進一步的錯誤檢查與評論。希望這可以澄清一下我正在嘗試做的事情。從概念上;我明確規定,個人在經歷事件後30天內沒有再次發生其他事件的風險。在Andersen-Gill計數過程公式中,每一行代表一個觀察值,其中包含開始時間tstart
和停止時間tstop
以及指示觀察是否因事件infect == 1
或檢查infect == 0
而結束的指示器(在此例中爲infect
) 。在這裏,我手動完成上述forloop中的步驟,並量化每個循環中發生多少事件以及指定30天免疫週期時的總體隨訪時間。然後將這個相同的代碼作爲forloop來實現完整性。結果顯示在下面的代碼塊中。
newcgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
###1st event
x <- "etime1"
immunecgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 1), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 1), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 1), "tstart"] <- newcgd[which(newcgd$cum_infect == 1), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime1 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime1 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###2nd event
x <- "etime2"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 2), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 2), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 2), "tstart"] <- newcgd[which(newcgd$cum_infect == 2), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime2 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime2 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###3rd event
x <- "etime3"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 3), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 3), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 3), "tstart"] <- newcgd[which(newcgd$cum_infect == 3), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime3 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime3 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###4th event
x <- "etime4"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 4), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 4), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 4), "tstart"] <- newcgd[which(newcgd$cum_infect == 4), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime4 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime4 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###5th event
x <- "etime5"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 5), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 5), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 5), "tstart"] <- newcgd[which(newcgd$cum_infect == 5), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime5 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime5 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###6th event
x <- "etime6"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 6), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 6), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 6), "tstart"] <- newcgd[which(newcgd$cum_infect == 6), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime6 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime6 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
###7th event
x <- "etime7"
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
immunecgd[which(immunecgd$cum_infect == 7), "tstart"] <- immunecgd[which(immunecgd$cum_infect == 7), "tstart"] + 30
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd[which(newcgd$cum_infect == 7), "tstart"] <- newcgd[which(newcgd$cum_infect == 7), "tstart"]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
etime7 <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime7 <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
df_event <- rbind.data.frame(etime1, etime2, etime3, etime4, etime5, etime6, etime7)
colnames(df_event) <- c("immunity", "no_immunity")
df_event$diff <- df_event$no_immunity - df_event$immunity
df_futime <- rbind.data.frame(futime1, futime2, futime3, futime4, futime5, futime6, futime7)
colnames(df_futime) <- c("immunity", "no_immunity")
df_futime$diff <- df_futime$no_immunity - df_futime$immunity
與forloop的代碼相同。
newcgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
immunecgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
event <- matrix(NA, nrow = 7, ncol = 2)
futime <- matrix(NA, nrow = 7, ncol = 2)
for(i in 1:7){
x <- paste0("etime", i) #etime1:etime7
# iteratively add each event
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
# select only observations that end in an event and iteratively create
# cumulative number of events for each individual
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
# for each loop add 30 days to the start time of the ith cumulative event
immunecgd[which(immunecgd$cum_infect == i), "tstart"] <- immunecgd[which(immunecgd$cum_infect == i), "tstart"] + 30
newcgd[which(newcgd$cum_infect == i), "tstart"] <- newcgd[which(newcgd$cum_infect == i), "tstart"]
# for each loop remove observations were the start time >= stop time
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
event[i,] <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime[i,] <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
}
event <- data.frame(event)
colnames(event) <- c("immunity", "no_immunity")
event$diff <- event$no_immunity - event$immunity
futime <- data.frame(futime)
colnames(futime) <- c("immunity", "no_immunity")
futime$diff <- futime$no_immunity - futime$immunity
上述錯誤檢測碼的結果如下
df_event
immunity no_immunity diff
1 44 44 0
2 56 61 5
3 62 69 7
4 64 72 8
5 66 74 8
6 67 75 8
7 68 76 8
df_futime
immunity no_immunity diff
1 36202 37477 1275
2 35935 37477 1542
3 35875 37477 1602
4 35875 37477 1602
5 35875 37477 1602
6 35875 37477 1602
7 35875 37477 1602
------------------------- -------------------------------------------------- -------------------------------------------------- ------
通過對survival
包中的不同數據集進行進一步測試,模擬數據集和我自己的個人數據集(我希望使用此代碼的數據集),我已經發現了d'毛刺'。在上面的代碼版本中,如果一個新事件etime[i-1]
屬於其中一個時期,我們已經指定該個體免於發生事件 - 這正是代碼旨在創建的實例 - 事件沒有得到併入累積事件計數器cum_infect
。在接下來的遊程etime[i]
個體只會有[I-1]累積事件,以及控制30天是否應該被添加到開始時間的代碼的所述部分
immunecgd[which(immunecgd$cum_infect == i), "tstart"] <- immunecgd[which(immunecgd$cum_infect == i), "tstart"] + 30
不會識別個體爲具有有一個事件。這意味着forloop只會在事件發生後正確增加30天的免疫力,直到事件落在這樣的免疫時期的第一次。我製作了一個相當不雅的解決方案。但它的工作。
newcgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
immunecgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime)
newcgd$cum_infect_0 <- 0
immunecgd$cum_infect_0 <- 0
event <- matrix(NA, nrow = 7, ncol = 2)
futime <- matrix(NA, nrow = 7, ncol = 2)
for(i in 1:7){
x <- paste0("etime", i) #etime1:etime7
# iteratively add each event
immunecgd <- tmerge(immunecgd, cgd0, id = id, infect = event(cgd0[,x]))
newcgd <- tmerge(newcgd, cgd0, id = id, infect = event(cgd0[,x]))
# select only observations that end in an event and iteratively create
# cumulative number of events for each individual
immunecgd <- tmerge(immunecgd, subset(immunecgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
newcgd <- tmerge(newcgd, subset(newcgd, infect == 1), id = id, cum_infect = cumtdc(tstop))
# create new column that will hold cumulative events between loops
immunecgd[, paste0("cum_infect_", i)] <- immunecgd[, "cum_infect"]
newcgd[, paste0("cum_infect_", i)] <- newcgd[, "cum_infect"]
# for each loop add 30 days to the start time if there is atleast one cumulative event
# and the value of the ith cumulative event is larger than the i-1th cumulative event
immunecgd[which(immunecgd$cum_infect > 0 & immunecgd$cum_infect > immunecgd[, paste0("cum_infect_", i - 1)]), "tstart"] <-
immunecgd[which(immunecgd$cum_infect > 0 & immunecgd$cum_infect > immunecgd[, paste0("cum_infect_", i - 1)]), "tstart"] + 30
newcgd[which(newcgd$cum_infect > 0 & newcgd$cum_infect > newcgd[, paste0("cum_infect_", i - 1)]), "tstart"] <-
newcgd[which(newcgd$cum_infect > 0 & newcgd$cum_infect > newcgd[, paste0("cum_infect_", i - 1)]), "tstart"]
# for each loop remove observations were the start time >= stop time
immunecgd <- immunecgd[which(immunecgd$tstart < immunecgd$tstop),]
newcgd <- newcgd[which(newcgd$tstart < newcgd$tstop),]
event[i,] <- c(sum(immunecgd$infect), sum(newcgd$infect))
futime[i,] <- c(sum(immunecgd$tstop - immunecgd$tstart), sum(newcgd$tstop - newcgd$tstart))
}
immunecgd <- immunecgd[,!grepl("cum_infect_", colnames(immunecgd))]
newcgd <- newcgd[,!grepl("cum_infect_", colnames(newcgd))]
event <- data.frame(event)
colnames(event) <- c("immunity", "no_immunity")
event$diff <- event$no_immunity - event$immunity
futime <- data.frame(futime)
colnames(futime) <- c("immunity", "no_immunity")
futime$diff <- futime$no_immunity - futime$immunity
在這裏我們可以看到差異的事件
immunity no_immunity diff
1 44 44 0
2 56 61 5
3 62 69 7
4 64 72 8
5 65 74 9
6 66 75 9
7 66 76 10
正確,指定for循環的總人數已發現2個實例是一個事件落在免疫的時期。
我想我可以在每一步添加7到tstart? – user6571411
我想你可以提供一個[MCVE]。 –
如果我能我會回答我自己的問題。我真的不知道如何去做我所要求的。 – user6571411