2017-08-06 80 views
0

感謝您抽出寶貴的時間來閱讀。嵌套for循環 - 標記奪回

下面的代碼創建的曲線圖需要100個樣本是5% 和人口(400)的15%之間。

我今天準備這樣做,但是,是其它兩個部分添加到圖表。它 看起來是這樣的:

從1-100個樣本取100個樣本,這些樣本在 人口(400)的5%到15%之間。從101-200個人口中抽取100個人口數量在5%到15%之間的樣本(800個)。從201-300個人中抽取100個樣本,這些樣本在5% 和人口的15%(300)之間。

我認爲這將需要一個嵌套循環。有沒有人有建議作爲 如何做到這一點?

謝謝你的時間。基爾斯滕

N <- 400 
pop <- c(1:N) 

lower.bound <- round(x = .05 * N, digits = 0) 
lower.bound ## Smallest possible sample size 

upper.bound <- round(x = .15 * N, digits = 0) 
upper.bound ## Largest possible sample size 

length.ss.interval <- length(c(lower.bound:upper.bound)) 
length.ss.interval ## total possible sample sizes, ranging form lower.bound 
to upper.bound 
sample(x = c(lower.bound:upper.bound), 
     size = 1, 
     prob = c(rep(1/length.ss.interval, length.ss.interval))) 

n.samples <- 100 

dat <- matrix(data = NA, 
       nrow = length(pop), 
       ncol = n.samples + 1) 

dat[,1] <- pop 

for(i in 2:ncol(dat)) { 
    a.sample <- sample(x = pop, 
        size = sample(x = c(lower.bound:upper.bound), 
            size = 1, 
            prob = c(rep(1/length.ss.interval, 
length.ss.interval))), 
        replace = FALSE) 
    dat[,i] <- dat[,1] %in% a.sample 
} 
schnabel.comp <- data.frame(sample = 1:n.samples, 
          n.sampled = apply(X = dat, MARGIN = 2, FUN = 
sum)[2:length(apply(X = dat, MARGIN = 2, FUN = sum))] 
) 
n.prev.sampled <- c(0, rep(NA, n.samples-1)) 
n.prev.sampled 

n.prev.sampled[2] <- sum(ifelse(test = dat[,3] == 1 & dat[,2] == 1, 
           yes = 1, 
           no = 0)) 

for(i in 4:ncol(dat)) { 
    n.prev.sampled[i-1] <- sum(ifelse(test = dat[,i] == 1 & 
rowSums(dat[,2:(i-1)]) > 0, 
            yes = 1, 
            no = 0)) 
} 

schnabel.comp$n.prev.sampled <- n.prev.sampled 
schnabel.comp$n.newly.sampled <- with(schnabel.comp, 
             n.sampled - n.prev.sampled) 
schnabel.comp$cum.sampled <- c(0, 
cumsum(schnabel.comp$n.newly.sampled)[2:n.samples-1]) 
schnabel.comp$numerator <- with(schnabel.comp, 
           n.sampled * cum.sampled) 
schnabel.comp$pop.estimate <- NA 

for(i in 1:length(schnabel.comp$pop.estimate)) { 
    schnabel.comp$pop.estimate[i] <- sum(schnabel.comp$numerator[1:i])/
sum(schnabel.comp$n.prev.sampled[1:i]) 
} 

if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")} 
if (!require("scales")) {install.packages("scales"); require("scales")} 


small.sample.dat <- schnabel.comp 

small.sample <- ggplot(data = small.sample.dat, 
         mapping = aes(x = sample, y = pop.estimate)) + 
    geom_point(size = 2) + 
    geom_line() + 
    geom_hline(yintercept = N, col = "red", lwd = 1) + 
    coord_cartesian(xlim = c(0:100), ylim = c(300:500)) + 
    scale_x_continuous(breaks = pretty_breaks(11)) + 
    scale_y_continuous(breaks = pretty_breaks(11)) + 
    labs(x = "\nSample", y = "Population estimate\n", 
     title = "Sample sizes are between 5% and 15%\nof the population") + 
    theme_bw(base_size = 12) + 
    theme(aspect.ratio = 1) 

我的想法是通過創建嵌套ifelse聲明如下:

N.2 <- 800 
N.3 <- 300 
pop.2 <- c(401:N.2) 
pop.3 <- c(801:N) 

lower.bound.2 <- round(x = .05 * N.2, digits = 0) 
upper.bound.2 <- round(x = .15 * N.2, digits = 0) 

lower.bound.3 <- round(x = .05 * N.3, digits = 0) 
upper.bound.3 <- round(x = .15 * N.3, digits = 0) 

也許有些置換...

dat <- imatrix(ifelse(n.samples ,= 100), 
       yes = nrow = length(pop), 
       no = ifelse(n.samples > 100 & > 201), 
       yes = nrow = length(pop.2), 
       no = nrow = length(pop.3), 
       ncol = n.samples + 1) 
+0

是什麼意思說「從1-100樣本取100個樣本是5%的人口(400)的15%之間。」 –

+0

如果'N = 400',爲什麼你要把人口3變成'c(801:N)'? – Onyambu

+0

你可以嘗試改說嗎?想知道你在做什麼是極其困難的 –

回答

0

這是不是你想要做什麼?我在下面寫的函數mark_recapture有四個參數(樣本數,樣本的上下限和總體大小),並輸出一個矩陣,其中行代表總體中的個體,列代表樣本。如果一個人在一個給定的樣本中被捕獲,它會得到1,否則它會得到0.定義函數後,您可以運行3次,使用3種不同的種羣大小來獲得3種不同的矩陣。

# define variables 
num_samp <- 100 
lower_sampsize <- 0.05 
upper_sampsize <- 0.15 

# define sampling function that outputs matrix 
mark_recapture <- function (num_samp, pop_size, lower_sampsize, upper_sampsize) { 

    # empty matrix 
    mat <- matrix(0, pop_size, num_samp) 

    # min and max sample size 
    min <- ceiling(lower_sampsize*pop_size) 
    max <- floor(upper_sampsize*pop_size) 

    # vector of random sample sizes between min and max 
    samp_sizes <- sample(min:max, num_samp, replace=TRUE) 

    # draw the samples and fill in the matrix 
    for (i in 1:num_samp) {mat[sample(1:pop_size, samp_sizes[i]),i] <- 1} 

    # return matrix 
    return(mat) 
} 

# do the sampling from the 3 populations 
mat1 <- mark_recapture(num_samp=num_samp, pop_size=400, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize) 
mat2 <- mark_recapture(num_samp=num_samp, pop_size=800, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize) 
mat3 <- mark_recapture(num_samp=num_samp, pop_size=300, lower_sampsize=lower_sampsize, upper_sampsize=upper_sampsize) 

雖然超出了這個問題的範圍,我只提到,有專門的R程序包,以分析和模擬標誌重捕的數據,例如,multimark。只需Google「CRAN標記重新獲得」即可找到許多選項。我會建議仔細觀​​察一下,仔細想想你在這裏做什麼,因爲你可能試圖重新發明輪子。