2013-01-04 43 views
3

我試圖用畫一些Kaplan-Meier曲線時,空行GGPLOT2代碼中的發現:https://github.com/kmiddleton/rexamples/blob/master/qplot_survival.RGGPLOT2找到生存數據幀繪製Kaplan-Meier曲線

我曾與在這個偉大的代碼的好成績不同的數據庫然而,在這種情況下,它給了我下面的錯誤......我彷彿在我的數據框空行:

Error en if (nrow(layer_data) == 0) return() : argument is of length zero.

關於這種類型的錯誤前面的問題似乎並沒有對我有用的,因爲在我的情況下數據和功能的類型不一樣。

我對使用R的統計分析頗爲陌生,而且我沒有編程背景,所以我認爲這在我的數據中必須是一個「啞巴錯誤」,但我無法找到它的位置......它絕對似乎ggplot2找不到要繪製的行。請你能以任何方式幫助我,提供線索,建議等等。

這裏是我的數據和使用的代碼,按順序,準備好了控制檯 - 我試着用knitr腳本 - 。最後,我已爲我的sessionInfo:

library(splines) 
library(survival) 
library(abind) 
library(ggplot2) 
library(grid) 

我創建了一個名爲acbi30(真實數據)數據幀:

mort28day <- c(1,0,1,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,0,1) 
daysurv <- c(4,29,24,29,29,29,29,19,29,29,29,3,9,29,15,29,29,11,29,5,13,20,22,29,16,21,9,29,29,15) 
levo <- c(0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0) 
acbi30 <- data.frame(mort28day, daysurv, levo) 
save(acbi30, file="acbi30.rda") 
acbi30 

然後,我粘貼下面的命令來創建一個功能使用GGPLOT2:

t.Surv <- Surv(acbi30$daysurv, acbi30$mort28day) 
t.survfit <- survfit(t.Surv~1, data=acbi30) 


#define custom function to create a survival data.frame# 
createSurvivalFrame <- function(f.survfit){ 

#initialise frame variable# 
f.frame <- NULL 

#check if more then one strata# 
if(length(names(f.survfit$strata)) == 0){ 

#create data.frame with data from survfit# 
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit 
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower) 
#create first two rows (start at 1)# 
f.start <- data.frame(time=c(0, f.frame$time[1]), n.risk=c(f.survfit$n, f.survfit$n), n.event=c(0,0), 
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1)) 
#add first row to dataset# 
f.frame <- rbind(f.start, f.frame) 
#remove temporary data# 
rm(f.start) 
} 
else { 
#create vector for strata identification# 
f.strata <- NULL 
for(f.i in 1:length(f.survfit$strata)){ 
#add vector for one strata according to number of rows of strata# 
f.strata <- c(f.strata, rep(names(f.survfit$strata)[f.i], f.survfit$strata[f.i])) 
} 
#create data.frame with data from survfit (create column for strata)# 
f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit 
$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower, strata=factor(f.strata)) 
#remove temporary data# 
rm(f.strata) 
#create first two rows (start at 1) for each strata# 
for(f.i in 1:length(f.survfit$strata)){ 
#take only subset for this strata from data# 
f.subset <- subset(f.frame, strata==names(f.survfit$strata)[f.i]) 
#create first two rows (time: 0, time of first event)# 
f.start <- data.frame(time=c(0, f.subset$time[1]), n.risk=rep(f.survfit[f.i]$n, 2), n.event=c(0,0), 
n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1), strata=rep(names(f.survfit$strata)[f.i], 
2)) 
#add first two rows to dataset# 
f.frame <- rbind(f.start, f.frame) 
#remove temporary data# 
rm(f.start, f.subset) 
} 
#reorder data# 
f.frame <- f.frame[order(f.frame$strata, f.frame$time), ] 
#rename row.names# 
rownames(f.frame) <- NULL 
} 
#return frame# 
return(f.frame) 
} 


#define custom function to draw kaplan-meier curve with ggplot# 
qplot_survival <- function(f.frame, f.CI="default", f.shape=3){ 
#use different plotting commands dependig whether or not strata's are given# 
if("strata" %in% names(f.frame) == FALSE){ 
#confidence intervals are drawn if not specified otherwise# 
if(f.CI=="default" | f.CI==TRUE){ 
#create plot with 4 layers (first 3 layers only events, last layer only censored)# 
#hint: censoring data for multiple censoring events at timepoint are overplotted# 
#(unlike in plot.survfit in survival package)# 
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time, 
y=upper), directions="hv", linetype=2) + geom_step(aes(x=time,y=lower), direction="hv", linetype=2) + 
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) 
} 
else { 
#create plot without confidence intervals# 
ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + 
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) 
} 
} 
else { 
if(f.CI=="default" | f.CI==FALSE){ 
#without CI# 
ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv), 
direction="hv") + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) 
} 
else { 
#with CI (hint: use alpha for CI)# 
ggplot(data=f.frame, aes(colour=strata, group=strata)) + geom_step(aes(x=time, y=surv), 
direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2, alpha=0.5) + 
geom_step(aes(x=time,y=lower), direction="hv", linetype=2, alpha=0.5) + 
geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) 
} 
} 
} 

繪製全球生存曲線(95%CI):

它不給任何錯誤:

# Kaplan-Meier plot, global survival (with CI) 
t.survfit <- survfit(t.Surv~1, data=acbi30) 
t.survframe <- createSurvivalFrame(t.survfit) 
t.survfit 
qplot_survival(t.survframe, TRUE, 20) 

繪製分層生存曲線:

給出了錯誤上面提到:

# Kaplan-Meier plot, stratified survival 
t.survfit2 <- survfit(t.Surv~levo, data=acbi30) 
t.survframe2 <- createSurvivalFrame(t.survfit2) 
t.survfit2 
qplot_survival(t.survframe2, TRUE, 20) 

繪圖而不GGPLOT2結果:

t.survframe2的結構對我來說似乎沒問題,沒有任何空行,所以它必定是qplot_survival在t.survframe2中讀取我的數據的問題。做一個簡單的陰謀不會返回任何錯誤:

t.survframe2 
plot(t.survfit2) 

問題與我的數據框在哪裏?該功能創建的工作以及與其他數據集,但與這一個...

謝謝你在前進,

Mareviv

會議信息:

sessionInfo() 

[R版2.15 。2(2012年10月26日) 平臺:I386-W64-的mingw32/I386(32位)

locale: 

[1] LC_COLLATE=Spanish_Spain.1252 LC_CTYPE=Spanish_Spain.1252 
[3] LC_MONETARY=Spanish_Spain.1252 LC_NUMERIC=C     
[5] LC_TIME=Spanish_Spain.1252  

attached base packages: 
[1] grid  splines stats  graphics grDevices utils  datasets 
[8] methods base  

other attached packages: 
[1] ggplot2_0.9.3 abind_1.4-0  survival_2.36-14 knitr_0.8  

loaded via a namespace (and not attached): 
[1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.2  
[4] evaluate_0.4.2  formatR_0.7  gtable_0.1.2  
[7] labeling_0.1  MASS_7.3-22  munsell_0.4  
[10] plyr_1.8   proto_0.3-9.2  RColorBrewer_1.0-5 
[13] reshape2_1.2.1  scales_0.2.3  stringr_0.6.1  
[16] tools_2.15.2  

回答

2

我做你的qplot_survival()的作用有點整容手術。主要問題似乎是data =參數geom_point中的子集條件;在t.survframet.survframe2中,n.censor的表得出值0,3和12.通過將子集條件改變爲n.censor > 0,我設法在所有情況下得到一個圖。我也沒有看到f.CI = "default"的點,所以我將默認設置爲TRUE,並相應地修改if條件。

qplot_survival <- function(f.frame, f.CI= TRUE, f.shape=3) 
{ 
# use different plotting commands depending whether 
# or not strata are given# 
if(!("strata" %in% names(f.frame))) 
{ 
    #confidence intervals are drawn if not specified otherwise# 
    if(isTRUE(f.CI)) 
    { 
     # create plot with 4 layers (first 3 layers only events, 
     # last layer only censored)# 
     # hint: censoring data for multiple censoring events at 
     # timepoint are overplotted# 
     # (unlike in plot.survfit in survival package)# 
    ggplot(data=f.frame) + 
     geom_step(aes(x=time, y=surv), direction="hv") + 
     geom_step(aes(x=time, y=upper), direction ="hv", linetype=2) + 
     geom_step(aes(x=time,y=lower), direction="hv", linetype=2) + 
     geom_point(data=subset(f.frame, n.censor > 0), 
       aes(x=time, y=surv), shape=f.shape) 
    } else { 
    #create plot without confidence intervals# 
    ggplot(data=f.frame) + 
     geom_step(aes(x=time, y=surv), direction="hv") + 
     geom_point(data=subset(f.frame, n.censor > 0), 
       aes(x=time, y=surv), shape=f.shape) 
      } 
} else { 
    if(!(isTRUE(f.CI))){ 
#without CI# 
    ggplot(data=f.frame, aes(group=strata, colour=strata)) + 
    geom_step(aes(x=time, y=surv), direction="hv") + 
    geom_point(data=subset(f.frame, n.censor > 0), 
       aes(x=time, y=surv), shape=f.shape) 
} else { 

#with CI (hint: use alpha for CI)# 
    ggplot(data=f.frame, aes(x = time, colour=strata, group=strata)) + 
     geom_step(aes(y=surv), direction="hv") + 
     geom_step(aes(y=upper), direction="hv", 
        linetype=2, alpha=0.5) + 
     geom_step(aes(y=lower), direction="hv", 
        linetype=2, alpha=0.5) + 
     geom_point(data=subset(f.frame, n.censor > 0), 
       aes(y=surv), shape=f.shape) 
     } 
    } 
} 

下面的情節做了這些改變後,所有的工作對我來說:

qplot_survival(t.survframe2, TRUE, 20) 
qplot_survival(t.survframe2, FALSE, 20) 
qplot_survival(t.survframe, TRUE, 20) 
qplot_survival(t.survframe, FALSE, 20) 

一對夫婦的意見:

  1. 在函數內部子集可能是危險的,因爲有時,在本情況下,滿足條件返回一個零行數據幀。我會考慮geom_point()層是否真的有必要。
  2. 在幾個地方,您在geom_step()呼叫中有directions = "hv"。這個論點不是多元化的,並且已經在上面改變了。
  3. 這可以有效地完成多一點我想,而是從一個survfit對象提取所關注列的一種方式,說t.survfit,是這樣的:

(展開譜曲時階層都存在)

comps <- c(2:6, 8, 10); 
t.fit <- as.data.frame(do.call(cbind, lapply(comps, function(j) t.survfit[[j]]))) 
names(t.fit) <- names(t.survfit)[comps] 
+0

Omg!那'n.censor == 1'一直在我眼前...現在它完美地工作。沒有'geom_point()'圖層也沒問題,但是(不是這種情況下)有時我需要在整個圖中顯示截尾標記。該功能現在對所有這些調整都非常有用,並且我注意到了提取列的代碼。我學到了更多的ggplot2。謝謝! – Mareviv

1

這裏是另一個版本時出現在你的數據不刪失分(@丹尼斯的版本,在這種情況下仍然無法)也佔的情況。這可以變得更高效,可能是通過創建一個變量來存儲整個數據框中的檢查點數量,然後重新使用它,而不是像我在每種情況下那樣進行測試。

# define custom function to draw kaplan-meier curve with ggplot 
qplot_survival <- function(f.frame, f.CI="default", f.shape=3){ 
    # use different plotting commands dependig whether or not strata's are given 
    if("strata" %in% colnames(f.frame) == FALSE){ 
    # confidence intervals are drawn if not specified otherwise 
    if(f.CI=="default" | f.CI==TRUE){ 
     # create plot with 4 layers (first 3 layers only events, last layer only censored) 
     # hint: censoring data for multiple censoring events at timepoint are overplotted 



     # (unlike in plot.survfit in survival package) 
     p <- ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time, 
                          y=upper), directions="hv", linetype=2) + geom_step(aes(x=time,y=lower), direction="hv", linetype=2) 
     if(nrow(subset(f.frame, n.censor > 0)) > 0){ 
     p+geom_point(data=subset(f.frame, n.censor > 0), aes(x=time, y=surv), shape=f.shape) 
     }else{ 
     p 
     } 
    } 
    else { 
     # create plot without confidence intervalls 
     p <- ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") 
     if(nrow(subset(f.frame, n.censor > 0)) > 0){ 
     p + geom_point(data=subset(f.frame, n.censor > 0), aes(x=time, y=surv), shape=f.shape) 
     }else{ 
     p 
     } 
    } 
    } 
    else { 
    if(f.CI=="default" | f.CI==FALSE){ 
     # without CI 
     p <- ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv), 
                     direction="hv") 
     if(nrow(subset(f.frame, n.censor > 0)) > 0){ 
     p +geom_point(data=subset(f.frame, n.censor > 0), aes(x=time, y=surv), shape=f.shape) 
     }else{ 
     p 
     } 
    } 
    else { 
     # with CI (hint: use alpha for CI) 
     p <- ggplot(data=f.frame, aes(colour=strata, group=strata)) + geom_step(aes(x=time, y=surv), 
                     direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2, alpha=0.5) + 
     geom_step(aes(x=time,y=lower), direction="hv", linetype=2, alpha=0.5) 
     if(nrow(subset(f.frame, n.censor > 0)) > 0){ 
     p + geom_point(data=subset(f.frame, n.censor > 0), aes(x=time, y=surv), shape=f.shape) 
     }else{ 
     p 
     } 
    } 
    } 
}