2015-05-18 49 views
0

的給定百分比我情節圓/凸包以防萬一點

x=rnorm(100) 
y=rnorm(100) 
plot(x,y) 
abline(h=0); abline(v=0) 

從點(0,0)和去向外,我想畫一個輪廓/圓/橢圓/寫意凸包包圍任何給定的百分比點。

是否有任何功能或包可以自動執行此操作?到目前爲止,我嘗試了以下方法,但我只能用一些推斷和近似來得到一個圓。

到目前爲止,我已經試過這樣:

#calculate radius 
r<- sqrt(x^2+y^2) 

df<-data.frame(radius=seq(0,3,0.1), percentage=NA) 

#get the percentage of points that have a smaller radius than i 
k<-1 
for (i in seq(0,3,0.1)){ 
    df$percentage[k] <- sum(r<i)/length(r) 
    k<-k+1 
} 

#extrapolation function 
prox.function<- approxfun(df$percentage, df$radius) 


#get the radius of the circle that encloses about 50% of 
prox.function(.50) 

#draw the circle 
library(plotrix) 
draw.circle(0,0,prox.function(.50)) 

enter image description here

回答

6

半徑包圍分數f要點是:

f <- 0.5 # use half for this example as in the question 
sort(r)[ ceiling(f * length(r)) ] 
4

是的,我們可以創建一個新的GEOM ggplot,它將圍繞數據中所有點的任何給定百分比繪製凸包。這與bagplot類似,並使用aplpack包中的bagplot函數中的一些代碼(固定爲50%的點數)。

這是新的GEOM,使您可以選擇附上什麼個百分點的定義:

library(ggplot2) 

# Here's the stat_ 
StatBag <- ggproto("Statbag", Stat, 
        compute_group = function(data, scales, prop = 0.5) { 

        ################################# 
        ################################# 
        # originally from aplpack package, plotting functions removed 
        plothulls_ <- function(x, y, fraction, n.hull = 1, 
              col.hull, lty.hull, lwd.hull, density=0, ...){ 
         # function for data peeling: 
         # x,y : data 
         # fraction.in.inner.hull : max percentage of points within the hull to be drawn 
         # n.hull : number of hulls to be plotted (if there is no fractiion argument) 
         # col.hull, lty.hull, lwd.hull : style of hull line 
         # plotting bits have been removed, BM 160321 
         # pw 130524 
         if(ncol(x) == 2){ y <- x[,2]; x <- x[,1] } 
         n <- length(x) 
         if(!missing(fraction)) { # find special hull 
         n.hull <- 1 
         if(missing(col.hull)) col.hull <- 1 
         if(missing(lty.hull)) lty.hull <- 1 
         if(missing(lwd.hull)) lwd.hull <- 1 
         x.old <- x; y.old <- y 
         idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx] 
         for(i in 1:(length(x)/3)){ 
          x <- x[-idx]; y <- y[-idx] 
          if((length(x)/n) < fraction){ 
          return(cbind(x.hull,y.hull)) 
          } 
          idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx]; 
         } 
         } 
         if(missing(col.hull)) col.hull <- 1:n.hull 
         if(length(col.hull)) col.hull <- rep(col.hull,n.hull) 
         if(missing(lty.hull)) lty.hull <- 1:n.hull 
         if(length(lty.hull)) lty.hull <- rep(lty.hull,n.hull) 
         if(missing(lwd.hull)) lwd.hull <- 1 
         if(length(lwd.hull)) lwd.hull <- rep(lwd.hull,n.hull) 
         result <- NULL 
         for(i in 1:n.hull){ 
         idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx] 
         result <- c(result, list(cbind(x.hull,y.hull))) 
         x <- x[-idx]; y <- y[-idx] 
         if(0 == length(x)) return(result) 
         } 
         result 
        } # end of definition of plothulls 
        ################################# 


        # prepare data to go into function below 
        the_matrix <- matrix(data = c(data$x, data$y), ncol = 2) 

        # get data out of function as df with names 
        setNames(data.frame(plothulls_(the_matrix, fraction = prop)), nm = c("x", "y")) 
        # how can we get the hull and loop vertices passed on also? 
        }, 

        required_aes = c("x", "y") 
) 

# Here's the stat_ function 
#' @inheritParams ggplot2::stat_identity 
#' @param prop Proportion of all the points to be included in the bag (default is 0.5) 
stat_bag <- function(mapping = NULL, data = NULL, geom = "polygon", 
        position = "identity", na.rm = FALSE, show.legend = NA, 
        inherit.aes = TRUE, prop = 0.5, alpha = 0.3, ...) { 
    layer(
    stat = StatBag, data = data, mapping = mapping, geom = geom, 
    position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
    params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...) 
) 
} 

# here's the geom_ 
geom_bag <- function(mapping = NULL, data = NULL, 
        stat = "identity", position = "identity", 
        prop = 0.5, 
        alpha = 0.3, 
        ..., 
        na.rm = FALSE, 
        show.legend = NA, 
        inherit.aes = TRUE) { 
    layer(
    data = data, 
    mapping = mapping, 
    stat = StatBag, 
    geom = GeomBag, 
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes, 
    params = list(
     na.rm = na.rm, 
     alpha = alpha, 
     prop = prop, 
     ... 
    ) 
) 
} 

#' @rdname ggplot2-ggproto 
#' @format NULL 
#' @usage NULL 
#' @export 
GeomBag <- ggproto("GeomBag", Geom, 
        draw_group = function(data, panel_scales, coord) { 
        n <- nrow(data) 
        if (n == 1) return(zeroGrob()) 

        munched <- coord_munch(coord, data, panel_scales) 
        # Sort by group to make sure that colors, fill, etc. come in same order 
        munched <- munched[order(munched$group), ] 

        # For gpar(), there is one entry per polygon (not one entry per point). 
        # We'll pull the first value from each group, and assume all these values 
        # are the same within each group. 
        first_idx <- !duplicated(munched$group) 
        first_rows <- munched[first_idx, ] 

        ggplot2:::ggname("geom_bag", 
             grid:::polygonGrob(munched$x, munched$y, default.units = "native", 
                 id = munched$group, 
                 gp = grid::gpar(
                  col = first_rows$colour, 
                  fill = alpha(first_rows$fill, first_rows$alpha), 
                  lwd = first_rows$size * .pt, 
                  lty = first_rows$linetype 
                 ) 
            ) 
        ) 


        }, 

        default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, 
            alpha = NA, prop = 0.5), 

        handle_na = function(data, params) { 
        data 
        }, 

        required_aes = c("x", "y"), 

        draw_key = draw_key_polygon 
) 

下面是一些例子。我們可以用不同的α水平堆疊三個凸殼一起顯示在數據中心,它的傳播:

ggplot(mpg, aes(displ, hwy, fill = drv, colour = drv)) + 
    geom_point() + 
    geom_bag(prop = 0.95) + # enclose 95% of points 
    geom_bag(prop = 0.5, alpha = 0.5) + # enclose 50% of points 
    geom_bag(prop = 0.1, alpha = 0.8) # enclose 5% of points 

enter image description here

ggplot(iris, aes(Sepal.Length, Petal.Length, colour = Species, fill = Species)) + 
    geom_point() + 
    stat_bag(prop = 0.95) + # enclose 95% of points 
    stat_bag(prop = 0.5, alpha = 0.5) + # enclose 50% of points 
    stat_bag(prop = 0.05, alpha = 0.9) # enclose 5% of points 

enter image description here