2016-03-10 36 views
1

開始之前:我意識到有很多關於這個主題的問題,並且有各種方法來處理它(plyr,loops,data.table等),但我希望有人可能能夠以非常乾淨的方式幫助我解決這個問題。我有一個數據框(「df」),比方說,有14列:1)因子列(「因子」),2)自變量列(「I」)和3)8因變量列(「D1:D10」)。我想確定d-i的每個組合的「lm」數據,並由這些因子分開。這個數據集中有NAs。跨數據框應用lm的因子

我試過這種方法,我定義函數,1個變量,並嘗試對整個數據框則適用於:

onevariable <- function(i){ 
     by(df, df$factor, 
     function(x) lm(d ~ i, data = x)) 
} 

lapply(df[3:10], FUN = onevariable) 

,但我得到的是「可變長度不同的錯誤(發現「我「)」。我研究過其他方法,並理解它是如何與「卑鄙」或其他更簡單的函數一起工作的,但我承認太多的小白是想弄清楚如何將一些廣義的概念應用到我的「lm」上數據集。

基本上,我正在尋找一種乾淨的方式來做到這一點,以便我可以將這些概念應用到我的其他工作中,因爲我還沒有能夠利用「by」,split-apply-與迄今爲止的這些數據相結合等。

任何幫助,非常感謝!

編輯:我的數據的實例子集:

dput(example) 
structure(list(Factor = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L), .Label = c("ALA010C1", "DEP010C1", "ECN010C1", "FEN020C1" 
), class = "factor"), I = c(3.48321887, 1.446336943, 2.362627675, 
3.456785613, 3.070335007, 4.882472376, 2.026688629, 0.978996431, 
3.558568307, -5.336255193, -5.60451918, -4.735481333, -3.726817281, 
1.498296682, 2.930967205, 0.805149501, 1.080092548, 0.72616109, 
1.099140743, 2.41711586, 0.792410476, 0.98957854, -2.775840991, 
-0.06779079, 1.713816308, 2.729120928, 2.7707559, 1.060674462, 
0.946561155, 1.815599002, 1.127049531, 2.109024916, 2.712345138, 
1.815599002, 0.878352905, 0.72616109, 1.446336943, 0.830150804, 
0.924338019, 1.734019015, 0.805149501, 0.72616109, 0.935511319, 
0.712367768, 0.578836375, 1.070430637, 0.712367768, 1.693197021, 
1.370175582, 1.63419868, 0.640394268, 0.739766742, 0.530046211, 
1.089661999, 1.050822166, 0.779507071, 0.792410476, 0.753189762, 
0.766434989, 0.842420896, 1.333541449, 1.206386273, 1.84287342, 
1.108530483, 1.239722694, 1.703559808, 0.98957854, 1.398546279, 
2.367946836, 2.640259411, 1.010412627, 0.890048945, 1.485557656 
), D1 = c(4.17438727, 4.766438334, 4.559126247, 4.127134385, 
4.317488114, 3.839452313, 4.40060302, 4.945207489, 4.051784948, 
5.49100171, 5.418320159, 5.409411414, 5.455321115, 4.430816799, 
3.998200702, 4.350277936, 4.859812404, 4.276666119, 4.043051268, 
4.085976313, 4.102643365, 4.182050143, 3.828641396, 5.92157842, 
4.158883083, 3.828641396, 3.860729711, 5.013963084, 5.033700567, 
4.40060302, 4.993828176, 4.151039906, 3.749504076, 4.483002552, 
7.66645609, 7.654443226, 7.320857814, 7.621684999, 7.725330038, 
4.543294782, 7.680637428, 7.686850772, 7.510704192, 7.674617497, 
7.556427969, 6.920177799, 7.558777533, 6.65544035, 7.288244401, 
6.914730893, 7.501082124, 7.495541944, 7.803843304, 7.823645931, 
7.644919345, 7.815610532, 7.849518807, 7.673223121, 7.835974582, 
7.769589825, 7.288586106, 7.260522598, 6.501289671, 7.337587744, 
7.528331767, 7.17395832, 7.74694896, 7.090076836, 6.478509642, 
5.647212123, 7.737834314, 7.887584032, 7.193685818), D2 = c(5.78, 
6.94, 6.64, 6.13, 5.82, 6, 6.92, 7.22, 5.22, 7.45, 7.25, 6.98, 
7.33, 3.96, 4.2, 3.95, 3.98, 4.05, 4.08, 4.07, 3.95, 3.84, 4.96, 
7.33, 5.78, 5.11, 5.24, 6.91, 7.02, 6.22, 6.8, 6.2, 5.22, 6.24, 
7.04, 7.43, 6.88, 7.3, 7.59, 6.86, 7.01, 7.37, 7.23, 7.07, 7.13, 
6.8, 7.42, 6.47, 6.66, 6.86, 7.21, 7.26, 7.62, 7.4, 6.98, 7.4, 
7.28, 7.2, 7.34, 7.22, 6.8, 6.81, 5.9, 6.95, 6.67, 6.67, 6.86, 
6.89, 6.65, 5.62, 7.055, 6.94, 6.71), D3 = c(0.598836501, -0.139262067, 
-0.314710745, -0.223143551, 0.058268908, -0.116533816, -0.223143551, 
-0.616186139, 0.182321557, -1.108662625, -1.049822124, -0.94160854, 
NA, 0.470003629, 0.182321557, 0.405465108, 0.78845736, 0.09531018, 
0.09531018, 0.182321557, 0.262364264, 0.2390169, 0.378436436, 
-0.139262067, 0.727548607, 0.770108222, 0.56815069, 0.39877612, 
0.343589704, 0.42199441, 0.438254931, 0.2390169, 0.165514438, 
0.524728529, 1.928618652, 2.312535424, 2.054123734, 1.859418118, 
1.791759469, 1.348073148, 1.982379829, 1.74745921, 1.795087259, 
1.774952351, 1.458615023, 1.40854497, 1.850028377, 1.324418957, 
1.736951233, 1.266947603, 1.885553349, 2.048982334, 2.1102132, 
2.117459609, NA, 2.302585093, 2.182674796, 2.210469804, 2.646174797, 
2.624668592, 2.071913275, 1.460937904, 0.88376754, 1.617406082, 
1.648658626, 1.335001067, 1.74745921, 1.32175584, 0.737164066, 
0.587786665, 1.064710737, 1.08180517, 1.098612289), D4 = c(-1.347073648, 
0.285178942, -0.579818495, -1.660731207, -0.798507696, -1.897119985, 
-0.84397007, -0.223143551, -1.609437912, -2.302585093, -2.207274913, 
-2.525728644, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -2.659260037, -2.302585093, 
-2.659260037, -2.525728644, NA, NA, -2.407945609, -2.302585093, 
-1.832581464, -1.771956842, NA, -2.525728644, -2.302585093, NA, 
NA, -3.506557897, -2.813410717, -2.525728644, -2.407945609, -2.525728644, 
NA, -2.407945609, -2.207274913, -2.995732274, -2.813410717, -0.385662481, 
-2.407945609, -2.813410717, -2.813410717, -2.813410717, -2.659260037, 
-2.525728644, -2.525728644, -3.218875825, -3.218875825, -2.040220829, 
-3.506557897, -2.659260037, -2.659260037), D5 = c(-1.937941979, 
-1.560647748, -2.111964733, -2.590267165, -1.826350914, -2.282782466, 
-1.826350914, -1.406497068, -1.443923474, -1.714798428, -1.660731207, 
-1.021651248, -1.609437912, NA, NA, NA, NA, NA, NA, NA, -1.021651248, 
-2.525728644, -1.973281346, -1.903808973, -2.385966702, -2.688247574, 
-2.419118909, -2.353878387, -2.465104022, -2.864704011, -2.603690186, 
-2.995732274, -2.882403588, -2.430418465, 0.438254931, 0.683096845, 
0.148420005, 0.841567186, 0.636576829, -0.28103753, 0.850150929, 
0.300104592, 0.703097511, 0.451075619, 0.625938431, 0.412109651, 
0.951657876, 0.78845736, 0.559615788, 0.019802627, 0.904218151, 
0.652325186, 0.802001585, 0.506817602, NA, 0.760805829, 0.667829373, 
0.932164081, 0.806475866, 0.774727168, 0.198850859, 0.2390169, 
-0.563874845, 0.765467842, 0, NA, 0.594431208, 0.009950331, -0.476424197, 
-1.505077897, 1.057790294, 0.620576488, 0.173953307), D7 = c(1.824549292, 
2.48490665, 2.332143895, 1.840549633, 2.32238772, 1.280933845, 
1.987874348, 2.985681938, 1.335001067, 1.098612289, NA, NA, NA, 
1.547562509, 1.252762968, 1.481604541, 1.360976553, 1.30833282, 
1.131402111, 0.916290732, 1.16315081, 0.993251773, 1.223775432, 
1.098612289, 0.916290732, 0.641853886, 0.875468737, 1.029619417, 
1.16315081, 1.029619417, 1.064710737, 0.832909123, 0.693147181, 
0.78845736, 5.765191103, 5.703782475, 5.472270674, 5.877735782, 
5.945420609, 5.135798437, 5.888877958, 5.111987788, 5.683579767, 
5.846438775, 5.897153868, 5.214935758, 5.762051383, 4.709530201, 
5.488937726, 5.209486153, 5.796057751, 5.869296913, 6.150602768, 
5.828945618, NA, 6.23636959, 8.196987927, 8.067776196, 8.457443187, 
5.950642553, 5.529429088, 5.641907071, 4.65396035, 5.572154032, 
5.65248918, 5.407171771, 6.070737728, 5.416100402, 4.229749199, 
3.478158423, 5.869296913, 6.104793232, 5.332718793), D8 = c(2.028148247, 
2.370243741, 2.714694744, 2.424802726, 2.433613355, 2.163323026, 
2.424802726, 2.4765384, 2.054123734, 1.85629799, NA, NA, NA, 
2.163323026, 2.00148, 2.48490665, 2.397895273, 2.140066163, 1.609437912, 
1.960094784, 2.014903021, 2.163323026, 2.312535424, 1.704748092, 
1.458615023, 1.16315081, 1.147402453, 1.740466175, 1.85629799, 
1.677096561, 1.686398954, 1.547562509, 1.280933845, 1.360976553, 
6.035481433, 5.993961427, 5.549076085, 5.926926026, 5.768320996, 
4.905274778, 5.905361848, 5.826000107, 5.908082938, 5.774551546, 
5.831882477, 5.135798437, 4.787491743, 4.86753445, 5.293304825, 
5.081404365, 5.707110265, 5.579729826, 5.805134969, 5.955837369, 
NA, 5.743003188, 5.991464547, 3.610917913, 5.828945618, 6.018593214, 
5.505331536, NA, 4.634728988, 5.521460918, 5.488937726, 5.068904202, 
5.480638923, 5.147494477, 4.248495242, 3.663561646, 5.669880923, 
5.843544417, 4.912654886), D9 = c(-2.488914671, -1.703748592, 
-2.538307427, -3.352407217, -2.3330443, -3.270169119, -2.198225078, 
-1.64506509, -1.807888851, -1.832581464, -1.771956842, -1.560647748, 
-1.660731207, -2.407945609, -2.882403588, -2.120263536, -2.207274913, 
NA, NA, -2.207274913, NA, -2.780620894, -2.103734234, -2.343407088, 
-3.381394754, -3.575550769, -2.946942109, -2.796881415, -2.748872196, 
-3.540459449, -2.995732274, -3.863232841, -3.816712826, -2.937463365, 
0.39877612, NA, -0.130108685, 0.548121409, 0.182321557, -0.62735944, 
0.604315967, -0.0965109, 0.620576488, 0.512823626, 0.58221562, 
-0.0965109, 0.678033543, 0.476234179, -0.151986357, -0.176737179, 
NA, NA, NA, NA, NA, 0.392042088, 0.21511138, NA, NA, 0.364643114, 
-0.336872317, -0.036663984, -0.703197516, -1.145703896, NA, 0.039220713, 
-0.40947313, -0.765717873, -0.823255866, -1.589635285, 0.774727168, 
-0.068278841, -0.134674903), D10 = c(1.481604541, 1.686398954, 
1.609437912, 1.360976553, 1.335001067, 1.098612289, 1.547562509, 
1.648658626, 1.30833282, 3.218875825, NA, NA, NA, 1.223775432, 
0.741937345, 1.029619417, 1.029619417, 0.875468737, 0.641853886, 
0.530628251, 0.832909123, 0.641853886, 1.386294361, 4.122283931, 
2.370243741, 1.916922612, 1.967112357, 3.33220451, 3.377587516, 
2.677590994, 3.335769576, 2.388762789, 2.028148247, 2.862200881, 
3.914021008, NA, 3.966511191, 4.087655574, 3.864931398, 3.42751469, 
4.043051268, 3.795489189, 3.891820298, 4.016383021, 4.286341385, 
3.514526067, 3.923951576, 2.939161922, 3.496507561, NA, NA, NA, 
NA, NA, NA, 4.018183201, 3.929862924, NA, NA, 3.875359021, 3.218875825, 
3.658420247, 2.694627181, 3.511545439, NA, 3.222867846, 3.883623531, 
3.258096538, 2.557227311, 1.824549292, 3.706228092, 3.756538103, 
3.314186005), D11 = c(0.587786665, 0.875468737, 0.916290732, 
0.693147181, 0.587786665, 0.405465108, 0.832909123, 0.955511445, 
0.530628251, 2.63905733, NA, NA, NA, 0.641853886, 0.182321557, 
0.262364264, 0.09531018, 0.182321557, -0.105360516, -0.356674944, 
0.09531018, 0, 0.78845736, 2.624668592, 0.955511445, 0.530628251, 
0.587786665, 1.85629799, 1.916922612, 1.266947603, 1.85629799, 
0.993251773, 0.530628251, 1.410986974, 2.970414466, NA, 2.624668592, 
2.862200881, 2.850706502, 2.292534757, 2.917770732, 2.772588722, 
2.76000994, 2.884800713, 2.954910279, 2.406945108, 2.856470206, 
2.079441542, 2.564949357, 2.208274414, NA, NA, NA, NA, NA, 3.015534901, 
2.867898902, NA, NA, 2.970414466, 2.549445171, 2.442347035, 1.757857918, 
2.501435952, NA, 2.116255515, 2.923161581, 2.360854001, 1.435084525, 
0.832909123, 2.753660712, 2.809402695, 2.360854001), D12 = c(4.17438727, 
4.766438334, 4.559126247, 4.127134385, 4.317488114, 3.839452313, 
4.40060302, 4.945207489, 4.051784948, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("Factor", 
"I", "D1", "D2", "D3", "D4", "D5", "D7", "D8", "D9", "D10", "D11", 
"D12"), class = "data.frame", row.names = c(NA, -73L)) 
+0

什麼在你的功能中是'd'。也許,'lapply(df [3:10],函數(x)通過(cbind(df [1:2],x),df $ Site,函數(y)lm(y [,1]〜y [,3 ],data = y))'請顯示一個示例數據 – akrun

+0

在前面的代碼中有一個錯誤:'lapply(df [3:10],function(x){df2 < - cbind(df [1:2], x);由(df2 [-1],df2 [1],FUN = function(y)lm(x〜。,data = y))})根據我創建的示例進行工作 – akrun

+0

感謝您的快速響應,@akrun。函數中的「d」表示一個因變量列,當我嘗試第二個代碼時,出現「0(非NA)個案」的錯誤,這是因爲我的一些因素對於一些因變量沒有任何值我在這個數據集之前遇到過這個錯誤想法如何解決這個問題 – user2943039

回答

1

我們可以用if/else條件返回NA如果all在 'X' 的值是NA,否則做lm

lst <- lapply(example[3:10], function(x) { 
    df2 <- cbind(example[1:2],x) 
    by(df2[-1], df2[1], FUN=function(y) 
     if(all(is.na(y$x))) NA else lm(x~., data=y))}) 
+0

謝謝@akrun!讓我知道你對解決方案的看法同時想出了。 擬合< - 函數(df,id = c(「因子」,「I」)) m_c [m] (係數(mod),c(「截距」,「斜率」)) } 結果< - ddply(m.cc,。(因子(數值)) 結果< - ddply ,可變),適合)' – user2943039

+0

@ user2943039這看起來不錯。您將寬轉換爲長格式並使用其中一組選項。 – akrun

相關問題