2017-10-20 65 views
0

我寫了一個循環,在該循環中遍歷給定.csv的列並運行anova和posthoc測試。然後我將每個結果合併到一個數據框中並將其導出到一個.csv文件。但是,我無法獲得rbind()來構建我的data.frame。對此有何幫助?下面是該腳本:rbind()只迭代最後3個anova結果?

setwd("~/School/Lab/mice/sugar_study_2015/MG-RAST and Metagenassist/Trimmed/R. CSV") 
 
#Save your Datasheet into variable X 
 
x <- read.csv("T0_B_Class_Anova.csv") 
 

 
x = x[1:9,] 
 
x[is.na(x)] <- 0 
 

 
DF.Anova <- data.frame() 
 
DF.Tukey <- data.frame() 
 

 
#Counts through the columns 
 
for(i in 2:(ncol(x)-1)){ 
 
    columns <- names(x[i]) 
 
    
 
##Runs an ANOVA - 'Group' being a grouping factor 
 
    anovaresult <- anova(aov(x[,i]~Group,data=x)) 
 
    
 
    DF.Anova <- rbind(DF.Anova, anovaresult) 
 
    
 
    ##fix anova into data frame 
 
    Famall = colnames(x) 
 
    Famall = as.data.frame(Famall) 
 
    Famall = Famall[2:83,] 
 
    Famall = as.data.frame(Famall) 
 
    DFanovanames = rep(Famall, each = 2) 
 
    DFanovanames = as.data.frame(DFanovanames) 
 
    #install.packages("tidyr") 
 
    library(tidyr) 
 
    anovanames = data.frame(Names=unlist(DFanovanames, use.names = FALSE)) 
 
    o.anovanames = dplyr::arrange(anovanames, Names) 
 
###dont forget to change this************************** 
 
    finalanova_BFT0 = cbind(rn = rownames(DF.Anova), DF.Anova, o.anovanames) 
 
    
 
##Runs Tukeys Post-hoc test on Anova 
 
    posthocresult <- TukeyHSD(aov(x[,i]~Group,data=x)) 
 
    
 
    DF.Tukey <- rbind(DF.Tukey, posthocresult$Group) 
 
    
 
    ##fix tukey into data frame 
 
    Famname = colnames(x) 
 
    Famname = as.data.frame(Famname) 
 
    Famname = Famname[2:83,] 
 
    Famname = as.data.frame(Famname) 
 
    DFposthocnames = rep(Famname, each = 3) 
 
    DFposthocnames = data.frame(DFposthocnames) 
 
    #install.packages("tidyr") 
 
    library(tidyr) 
 
    library(dplyr) 
 
    posthocnames = data.frame(Names=unlist(DFposthocnames, use.names = FALSE)) 
 
    o.posthocnames = dplyr::arrange(posthocnames, Names) 
 
###dont forget to change this**************************** 
 
    finalposthoc_BFT0 = cbind(rn = rownames(DF.Tukey), DF.Tukey, o.posthocnames) 
 
          
 
##Prints posthoc results into txt file 
 
    print(columns) 
 
    print(anovaresult) 
 
    print(posthocresult) 
 
} 
 
    
 
write.csv(finalanova_BFT0, file="testfinalanova_BCT0") 
 
write.csv(finalposthoc_BFT0, file="finalposthoc_BCT0")

你可以找到示例.csv here

+1

你確定您的數據是否正確讀取?你的'setwd'調用看起來不正確 – tbradley

+0

如果你提供了一個[最小可重現的例子],回答你的問題會更容易(https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-可重現的例子),而不是鏈接到數據。 –

+0

我已編輯數據集以包含3列。我不想從腳本中刪除任何內容,因爲我不確定它在哪裏出錯。我盯着這件事一兩個小時,無法弄清楚。 – Haley

回答

0

假設您想要的輸出爲2個dataframes從兩個不同的測試彙總結果。您可以使用map函數從purrr包和tidy函數從broom包完成此操作。我保存了您發佈的csv並將其保存爲anova-question-data.csv。如果您打算使用setwd,我會建議驗證您的數據是否正確讀取。下面是我用來獲取兩個數據幀代碼:

# read in the data 
df <- read_csv(file = "anova-question-data.csv") 

# create a list to loop over in the `map` call. 
loop_list <- colnames(df[,-1]) 

# create a list of data frames using the `tidy` function from `broom` 
anova_list <- map(loop_list, function(x){ 
    anova_results <- anova(aov(df[[x]]~df[["Group"]])) 

    # this tidies the results from the anova test and add a new 
    # column with the column name being tested. 
    # if bacteria is not your desired name, feel free to change it as 
    # it will not affect any of the rest of the code 
    output <- broom::tidy(anova_results) %>% 
    mutate(bacteria = x) 
}) 

# use `do.call` to bind the dataframes in anova_list together 
anova_df <- anova_list %>% 
    do.call(rbind, .) 

# repeat the exact same process only changing `anova` with `TukeyHSD` 
posthoc_list <- map(loop_list, function(x){ 
    posthoc_results <- TukeyHSD(aov(df[[x]]~df[["Group"]])) 

    output <- broom::tidy(posthoc_results) %>% 
    mutate(bacteria = x) 
}) 

posthoc_df <- posthoc_list %>% 
    do.call(rbind, .) 

這給你以下兩個輸出(我只付印前5行):

> head(anova_df, 5) 
     term df  sumsq  meansq statistic p.value   bacteria 
1 df[["Group"]] 2 1.265562e-07 6.327809e-08 0.02650174 0.9739597  Acidobacteria 
2  Residuals 6 1.432617e-05 2.387695e-06   NA  NA  Acidobacteria 
3 df[["Group"]] 2 9.332880e-02 4.666440e-02 0.84001916 0.4768300  Actinobacteria 
4  Residuals 6 3.333096e-01 5.555159e-02   NA  NA  Actinobacteria 
5 df[["Group"]] 2 9.114521e-04 4.557261e-04 1.08994816 0.3946484 Alphaproteobacteria 


> head(posthoc_df, 5) 
      term comparison  estimate  conf.low conf.high adj.p.value  bacteria 
1 df[["Group"]]  HF-CO 2.234233e-04 -0.003647709 0.004094556 0.9829095 Acidobacteria 
2 df[["Group"]]  HFS-CO -4.903533e-05 -0.003920168 0.003822097 0.9991677 Acidobacteria 
3 df[["Group"]]  HFS-HF -2.724587e-04 -0.004143591 0.003598674 0.9747264 Acidobacteria 
4 df[["Group"]]  HF-CO 2.345822e-01 -0.355886402 0.825050849 0.4856694 Actinobacteria 
5 df[["Group"]]  HFS-CO 1.907267e-01 -0.399741917 0.781195333 0.6084817 Actinobacteria