2014-04-07 92 views
6

我一直在尋找一個使用ggplot2繪製生存曲線的解決方案。我發現了一些很好的例子,但它們並沒有遵循整個ggplot2美學(主要關於陰影置信區間等)。所以最後我寫我自己的函數:使用ggplot2繪製R中的生存曲線

ggsurvplot<-function(s, conf.int=T, events=T, shape="|", xlab="Time", 
        ylab="Survival probability", zeroy=F, col=T, linetype=F){ 

#s: a survfit object. 
#conf.int: TRUE or FALSE to plot confidence intervals. 
#events: TRUE or FALSE to draw points when censoring events occur 
#shape: the shape of these points 
#zeroy: Force the y axis to reach 0 
#col: TRUE, FALSE or a vector with colours. Colour or B/W 
#linetype: TRUE, FALSE or a vector with line types. 

require(ggplot2) 
require(survival) 

if(class(s)!="survfit") stop("Survfit object required") 

#Build a data frame with all the data 
sdata<-data.frame(time=s$time, surv=s$surv, lower=s$lower, upper=s$upper) 
sdata$strata<-rep(names(s$strata), s$strata) 

#Create a blank canvas 
kmplot<-ggplot(sdata, aes(x=time, y=surv))+ 
    geom_blank()+ 
    xlab(xlab)+ 
    ylab(ylab)+ 
    theme_bw() 

#Set color palette 
if(is.logical(col)) ifelse(col, 
         kmplot<-kmplot+scale_colour_brewer(type="qual", palette=6)+scale_fill_brewer(type="qual", palette=6), 
         kmplot<-kmplot+scale_colour_manual(values=rep("black",length(s$strata)))+scale_fill_manual(values=rep("black",length(s$strata))) 
         ) 
else kmplot<-kmplot+scale_fill_manual(values=col)+scale_colour_manual(values=col) 

#Set line types 
if(is.logical(linetype)) ifelse(linetype, 
           kmplot<-kmplot+scale_linetype_manual(values=1:length(s$strata)), 
           kmplot<-kmplot+scale_linetype_manual(values=rep(1, length(s$strata))) 
          ) 
else kmplot<-kmplot+scale_linetype_manual(values=linetype) 

#Force y axis to zero 
if(zeroy) { 
    kmplot<-kmplot+ylim(0,1) 
} 

#Confidence intervals 
if(conf.int) { 

    #Create a data frame with stepped lines 
    n <- nrow(sdata) 
    ys <- rep(1:n, each = 2)[-2*n] #duplicate row numbers and remove the last one 
    xs <- c(1, rep(2:n, each=2)) #first row 1, and then duplicate row numbers 
    scurve.step<-data.frame(time=sdata$time[xs], lower=sdata$lower[ys], upper=sdata$upper[ys], surv=sdata$surv[ys], strata=sdata$strata[ys]) 

    kmplot<-kmplot+ 
     geom_ribbon(data=scurve.step, aes(x=time,ymin=lower, ymax=upper, fill=strata), alpha=0.2) 
} 

#Events 
if(events) { 
    kmplot<-kmplot+ 
     geom_point(aes(x=time, y=surv, col=strata), shape=shape) 
} 

#Survival stepped line 
kmplot<-kmplot+geom_step(data=sdata, aes(x=time, y=surv, col=strata, linetype=strata)) 

#Return the ggplot2 object 
kmplot 
} 

我使用的每一層循環寫了以前的版本,但速度較慢。由於我不是程序員,因此我尋求改進功能的建議。也許增加一個風險患者的數據表,或者更好地整合ggplot2框架。

感謝

回答

6

你可以嘗試的東西與CI之間陰影區下面:

(我在這裏使用開發版本,因爲是在生產模式的參數alpha一個缺陷(沒有按」對於非默認值,正確填充上方矩形)否則功能相同)。

library(devtools) 
dev_mode(TRUE) # in case you don't want a permanent install 
install_github("survMisc", "dardisco") 
library("survMisc", lib.loc="C:/Users/c/R-dev") # or wherever you/devtools has put it 
data(kidney, package="KMsurv") 
p1 <- autoplot(survfit(Surv(time, delta) ~ type, data=kidney), 
       type="fill", survSize=2, palette="Pastel1", 
       fillLineSize=0.1, alpha=0.4)$plot 
p1 + theme_classic() 
dev_mode(FALSE) 

,並提供:

enter image description here

而對於經典的情節和表:

autoplot(autoplot(survfit(Surv(time, delta) ~ type, data=kidney), 
        type="CI")) 

enter image description here

更多選項見?survMisc::autoplot.survfit?survMisc::autoplot.tableAndPlot

+0

那正是我需要的,但試圖利用與填充選項我得到以下錯誤的圖形:在vecseq錯誤(f__,len__,如果(allow.cartesian || notjoin)NULL別的as.integer (max(nrow(x),: 以32行連接結果;超過28 = max(nrow(x),nrow(i))...如果您確定要繼續,請重新運行allow.cartesian =真的......我試圖理解它的含義,但我不明白!你有什麼想法嗎? –

0

我想做同樣的事情,也從錯誤的笛卡爾錯誤。另外,我想要在我的代碼和事件數量中進行審查。所以我寫了這個小片段。仍然有點生,但也許對一些有用。

ggsurvplot<-function( 
    time, 
    event, 
    event.marker=1, 
    marker, 
    tabletitle="tabletitle", 
    xlab="Time(months)", 
    ylab="Disease Specific Survival", 
    ystratalabs=c("High", "Low"), 
    pv=TRUE, 
    legend=TRUE, 
    n.risk=TRUE, 
    n.event=TRUE, 
    n.cens=TRUE, 
    timeby=24, 
    xmax=120, 
    panel="A") 

{ 
    require(ggplot2) 
    require(survival) 
    require(gridExtra) 

    s.fit=survfit(Surv(time, event==event.marker)~marker) 
    s.diff=survdiff(Surv(time, event=event.marker)~marker) 


    #Build a data frame with all the data 
    sdata<-data.frame(time=s.fit$time, 
        surv=s.fit$surv, 
        lower=s.fit$lower, 
        upper=s.fit$upper, 
        n.censor=s.fit$n.censor, 
        n.event=s.fit$n.event, 
        n.risk=s.fit$n.risk) 
    sdata$strata<-rep(names(s.fit$strata), s.fit$strata) 
    m <- max(nchar(ystratalabs)) 
    if(xmax<=max(sdata$time)){ 
    xlims=c(0, round(xmax/timeby, digits=0)*timeby) 
    }else{ 
    xlims=c(0, round((max(sdata$time))/timeby, digits=0)*timeby) 
    } 
    times <- seq(0, max(xlims), by = timeby) 
    subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata) 
    strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]) 
    time = summary(s.fit, time = times, extend = TRUE)$time 


    #Buidling the plot basics 
    p<-ggplot(data = sdata, aes(colour = strata, group = strata, shape=strata)) + 
         theme_classic()+ 
         geom_step(aes(x = time, y = surv), direction = "hv")+ 
         scale_x_continuous(breaks=times)+ 
         scale_y_continuous(breaks=seq(0,1,by=0.1)) + 
         geom_ribbon(aes(x = time, ymax = upper, ymin = lower, fill = strata), directions = "hv", linetype = 0,alpha = 0.10) + 
         geom_point(data = subset(sdata, n.censor == 1), aes(x = time, y = surv), shape = 3) + 
         labs(title=tabletitle)+ 
         theme(
          plot.margin=unit(c(1,0.5,(2.5+length(levels(factor(marker)))*2),2), "lines"), 
          legend.title=element_blank(), 
          legend.background=element_blank(), 
          legend.position=c(0.2,0.2))+ 
         scale_colour_discrete(
          breaks=c(levels(factor(sdata$strata))), 
          labels=ystratalabs) + 
         scale_shape_discrete(
          breaks=c(levels(factor(sdata$strata))), 
          labels=ystratalabs) + 
         scale_fill_discrete(
          breaks=c(levels(factor(sdata$strata))), 
          labels=ystratalabs) + 
         xlab(xlab)+ 
         ylab(ylab)+ 
         coord_cartesian(xlim = xlims, ylim=c(0,1)) 

         #addping the p-value 
         if (pv==TRUE){ 
           pval <- 1 - pchisq(s.diff$chisq, length(s.diff$n) - 1) 
           pvaltxt<-if(pval>=0.001){ 
               paste0("P = ", round(pval, digits=3)) 
              }else{ 
               "P < 0.001" 
              } 
              p <- p + annotate("text", x = 0.85 * max(xlims), y = 0.1, label = pvaltxt) 
         } 

         #adding information for tables 
         times <- seq(0, max(xlims), by = timeby) 
         subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata) 

         risk.data<-data.frame(strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]), 
               time = summary(s.fit, time = times, extend = TRUE)$time[subs], 
               n.risk = summary(s.fit,times = times,extend = TRUE)$n.risk[subs], 
               n.cens = summary(s.fit, times=times, extend=TRUE)$n.cens[subs], 
               n.event=summary(s.fit, times=times, extend=TRUE)$n.event[subs]) 
         #adding the risk table 
         if(n.risk==TRUE){ 
           p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=-0.15, label="Numbers at risk") 
           for (q in 1:length(levels(factor(marker)))){   
            p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.15+(-0.05*q)), label=paste0(ystratalabs[q])) 
            for(i in ((q-1)*length(times)+1):(q*length(times))){ 
              p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.15+(-0.05*q)), label=paste0(risk.data$n.risk[i])) 
            } 
           } 
         } 
         #adding the event table 
         if(n.event==TRUE){ 
           p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.20+(-0.05*length(levels(factor(marker))))), label="Number of events") 
           for (q in 1:length(levels(factor(marker)))){   
            p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(ystratalabs[q])) 
           for(i in ((q-1)*length(times)+1):(q*length(times))){ 
            p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(risk.data$n.event[i])) 
            } 
           } 
           } 
         #adding the cens table 
         if(n.event==TRUE){ 
           p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.25+(-0.05*length(levels(factor(marker)))*2)), label="Number of censored") 
           for (q in 1:length(levels(factor(marker)))){   
            p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(ystratalabs[q])) 
           for(i in ((q-1)*length(times)+1):(q*length(times))){ 
            p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(risk.data$n.cens[i])) 
            } 
           } 
           } 

         #adding panel marker 
           p <- p + annotate("text", cex=10, x= -0.2*max(xlims), y=1.1, label=panel) 
         #drawing the plot with the tables outside the margins 
           gt <- ggplot_gtable(ggplot_build(p)) 
           gt$layout$clip[gt$layout$name=="panel"] <- "off" 
           grid.draw(gt) 
}