2012-07-31 158 views
0

我想從我的ICC計算中獲取輸出並創建數據幀(請參閱下面的示例數據和代碼)。當我查看新的數據框時,一切似乎都能正常工作,但如果使用str(),則表明所有變量都是因子,而我的結果實際上是因子級別,並且具有每個因子級別的實際數據計數。這是一個特別的問題,因爲我想操縱新的數據框架。如何從ICC輸出正確創建數據幀

我是相對較新的R,我無法弄清楚如何修改我的代碼糾正這個問題。

數據集:

WW_Wing_13C_Summary <- structure(list(Individual_ID = c("WW_08A_02", "WW_08A_03", 
"WW_08A_04", "WW_08A_05", "WW_08A_06", "WW_08A_08", "WW_08A_09", "WW_08A_13", 
"WW_08B_02", "WW_08G_01", "WW_08G_02", "WW_08G_05", "WW_08G_07", 
"WW_08I_01", "WW_08I_03", "WW_08I_07", "WW_08I_12"), Region = c("South", 
"South", "South", "South", "South", "South", "South", "South", 
"South", "North", "North", "North", "North", "North", "North", 
"North", "North"), P1 = c(-18.67, -20.06, -16.54, -20.33, -21.28, 
-23.86, -21.3, -21.34, -20.87, -20.32, -19.35, -21.2, -21.61, 
-18.3, -22.3, -21.6, -24.12), P2 = c(-19.16, -20.3, -15.6, -20.28, 
-21.24, -23.95, -21.44, -24.13, -20.95, -20.02, -19.38, -21.29, 
-21.42, -18.53, -22.2, -21.77, -24.08), P3 = c(-20.38, -21.21, 
-16.61, -20.58, -21.22, -24, -21.49, -23.03, -20.76, -19.92, 
-19.7, -21.85, -21.84, -19.55, -22.18, -22.17, -24), P4 = c(-20.96, 
-22.9, -19.65, -20.8, -21.2, -24.16, -21.49, -21.77, -20.9, -20.05, 
-19.94, -22.22, -21.68, -20.18, -22.14, -22.21, -24.2), P5 = c(-21.61, 
-22.87, -20.98, -21.24, -21.47, -24.93, -21.1, -21.4, -21.02, 
-20.23, -20.43, -22.34, -21.79, -20.96, -21.55, -22.24, -24.16 
), P6 = c(-21.65, -21.13, -21.18, -20.94, -21.23, -24.93, -20.84, 
-21.57, -20.84, -20.73, -20.08, -22.42, -21.49, -21.08, -20.85, 
-22.47, -22.87), P7 = c(-21.31, -20.68, -21.7, -20.54, -21.89, 
-24.48, -20.78, -21.45, -21.11, -20.91, -20.81, -22.69, -21.88, 
-21.5, -23.1, -22.19, -22.51), P8 = c(-20.8, -20.58, -21.18, 
-21.04, -21.89, -24.17, -21.58, -21.32, -20.64, -19.87, -20.9, 
-22.75, -21.62, -17.42, -20.75, -21.89, -22.12), P9 = c(-21.28, 
-20.69, -21.33, -20.42, -21.6, -23.1, -20.76, -21.59, -20.11, 
-19.58, -19.24, -22.73, -21.54, -13.18, -20.9, -21.89, -22.3)), 
.Names = c("Individual_ID", "Region", "P1", "P2", "P3", "P4", "P5", 
"P6", "P7", "P8", "P9"), class = "data.frame", row.names = c(NA, -17L)) 

代碼:

## split the complete dataset by regions 
WW_Wing_13C_Summary_Region <- split(WW_Wing_13C_Summary, WW_Wing_13C_Summary$Region) 

library(psych) 
# calculate ICC for delta-13C and create a data frame with results for each region 
WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region, 
    function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)] 

# View results 
WW_Wing_13C_ICC 

# View structure of dataframe 
str(WW_Wing_13C_ICC) 

回答

1

它發生,因爲你的函數在這裏:

WW_Wing_13C_ICC <- data.frame(t(sapply(WW_Wing_13C_Summary_Region, 
    function(temp) unlist(ICC(temp[ , c(3:11)])[c("results", "summary")]))))[ , c(49, 52, 55, 58, 61, 9, 39, 45)] 

...(我可能會清理一下了一下)你使用unlist。當你這樣做時,你混合了字符和數字類,而R自動將所有東西都轉換成字符。然後,你只需要你想要的那些數字,但實際上是字符/因素。

兩個定位:

1.改寫你的匿名函數以上只是你想要什麼拉,並確保 一切數字

如...

SUM <- function(temp) { 
    x <- ICC(temp[ , c(3:11)]) 
    y <- x["summary"] 
    z <- x["results"] 
    sumR1 <- data.frame(y[[c(1, 1)]], check.names = FALSE)[1, ] 
    resR3 <- z[[1]][3, c("ICC", "lower bound", "upper bound")] 
    list(sum = sumR1, res = resR3) 
} 

sumres <- unlist(lapply(WW_Wing_13C_Summary_Region, SUM), recursive = FALSE) 
DF <- data.frame(do.call(rbind, sumres[c(T, F)]), 
    do.call(rbind, sumres[c(F, T)]), check.names = FALSE) 

rownames(DF) <- unlist(strsplit(rownames(DF) , "\\."))[c(T, F)] 
DF 

2.手動更改類別:

如...

NEW <- data.frame(apply(WW_Wing_13C_ICC, 2, as.numeric)) 
str(NEW) 
+0

我是一個相對的新手,所以請擴大。你能告訴我用我的數據集和代碼我怎麼能「清理一下」?另外我不確定如何做你的第一次修復。 – 2012-07-31 14:03:18

+0

@Keith我在上面添加了它。用戶'browser()'來弄清楚這些代碼在做什麼,但代碼應該是非常透明的。 – 2012-07-31 14:36:20

+0

謝謝,這比我的方法更有意義,我學到了一些東西! – 2012-07-31 16:25:52