2016-03-24 32 views
0

我是R新手,想弄清楚如何使用PCA做PCA。到目前爲止,我已經嘗試了兩種方法,但都存在問題。需要幫助着色變量和向PCA中添加省略號R

這裏是第一代碼:

library('FactoMineR') 
my.data.7 <- read.csv("Principal Component Analysis Input 2.csv", header=TRUE) 
attach(my.data.7) 
head(my.data.7) 
PCA_result_2 <- PCA(my.data.7, scale.unit=TRUE, ncp=3, graph=TRUE) 
result <- PCA_result_2$ind$coord 
write.csv(result, file="PCA ind_coord 2.csv") 
my.data.8 <- read.csv("Principal Component Analysis (second part) 2.csv") 
attach(my.data.8) 
head(my.data.8) 
pc1 <- (my.data.8$Dim1) 
pc2 <- (my.data.8$Dim2) 
pc3 <- (my.data.8$Dim3) 
acc <- (my.data.8$CDS) 
colour <- (my.data.8$Root.skin.colour) 
plot(pc1,pc3,col=c('black','yellow','orange','red','purple')[unclass(colour)],pch=19) 

情節輸出是: Dim1, Dim3 plot

我的這個問題是:1)我怎麼知道,顏色已經被正確分配, 2)我如何添加省略號。


這裏是我的第二個方法的代碼(我用這個網站尋求幫助,但我仍然堅持):

install.packages("devtools") 
library(devtools) 
install_github("fawda123/ggord") 
library(ggord) 
library('FactoMineR') 
my.data.9 <- read.csv("hello.csv") 
attach(my.data.9) 
head(my.data.9) 
woo <- PCA(my.data.9[,2:5], scale.unit=TRUE, ncp=3, graph=TRUE) 
ggord(woo, my.data.9$Root.skin.colour) 

情節: Dim1, Dim2 plot

我在這裏的問題是,我想要像以前一樣繪製第一維和第三維,而不是第一和第二維,但不知道如何指定。另外我不明白如何改變顏色。

這樣看起來像我正朝着正確的方向嗎?如果可能的話,我會非常感謝一些幫助。我不知道自己在做什麼,應該正在撰寫我的文章,但還沒有停止分析。也很抱歉,文件有怪異的名字,如「你好」,我感到沮喪。


這是你要的

> dput(my.data.8) 
structure(list(CDS = structure(1:69, .Label = c("CDS010", "CDS011", 
"CDS012", "CDS013", "CDS015", "CDS016", "CDS017", "CDS019", "CDS020", 
"CDS021", "CDS022", "CDS023", "CDS024", "CDS027", "CDS028", "CDS029", 
"CDS030", "CDS031", "CDS032", "CDS033", "CDS034", "CDS035", "CDS036", 
"CDS037", "CDS038", "CDS039", "CDS040", "CDS042", "CDS043", "CDS044", 
"CDS045", "CDS046", "CDS047", "CDS048", "CDS049", "CDS050", "CDS051", 
"CDS052", "CDS053", "CDS054", "CDS056", "CDS058", "CDS059", "CDS060", 
"CDS061", "CDS062", "CDS064", "CDS066", "CDS067", "CDS068", "CDS070", 
"CDS072", "CDS073", "CDS075", "CDS076", "CDS078", "CDS079", "CDS080", 
"CDS081", "CDS082", "CDS083", "CDS084", "CDS085", "CDS086", "CDS087", 
"CDS089", "CDS090", "CDS091", "CDS092"), class = "factor"), Dim1 = c(0.989923706, 
1.002847033, -0.323384931, 0.0280602, -2.103144589, -1.1750233, 
-0.297369615, -1.285073349, -1.18724867, -2.440381033, 2.451488481, 
0.432753586, -0.324628407, 1.83987238, 1.082150477, 1.222767528, 
0.648419317, 1.17034895, -0.959949524, 0.405826882, 3.578749912, 
1.315904789, -0.69599653, -2.650500936, 2.847954059, -1.124700789, 
-1.345309845, -1.571390397, 0.808331242, -0.126459344, 1.978484169, 
-0.372882529, -1.941508494, -1.895565455, -2.308079318, -2.734023717, 
-3.326982705, -0.62297258, 0.4400687, 0.878134622, -0.143118506, 
2.902361971, 1.852738657, 1.318157841, 1.525866109, 0.527018259, 
-2.17646324, -0.938267968, -0.663267011, -1.626999833, -0.725444227, 
4.181058153, -1.663567082, -0.797809065, -0.660857937, 1.275243335, 
-1.246799754, -0.658948097, 3.148052501, 1.22737428, 5.770370659, 
-0.659363823, 0.201377447, -0.250249239, -3.29492153, -2.525333499, 
0.451643578, -2.285229864, -2.05602107), Dim2 = c(0.114080736, 
0.189737473, 0.289738365, 0.15686147, -0.65967629, -0.618998916, 
0.752658445, -0.187202662, 0.601081452, -0.488843082, -0.461435771, 
0.376119902, 0.054640472, -0.352416385, -0.61155099, 0.287520862, 
2.072955276, 1.368287549, -1.598022058, -2.35115053, 0.362478564, 
-1.16829247, -1.161712522, 0.193574061, 0.582209805, 0.090423462, 
0.272987178, 0.762263319, 0.164563899, 1.271976678, 2.169652432, 
-0.304444502, 0.333864962, 0.086432067, 0.03375057, 0.42547905, 
-0.332663346, 0.230207958, 0.416122611, 0.807386059, -0.622165091, 
0.688807153, -0.419360229, -0.024587973, 0.099352776, 0.593489815, 
-0.571526951, -0.587510558, 1.141107254, 0.341089899, -0.234002113, 
-0.675011549, 0.523417802, 0.570074523, -0.2595101, -0.537050791, 
0.691005207, -0.259618613, -0.525167633, -0.181669151, -1.681387716, 
0.150321845, 0.528057749, 0.704124706, -1.3872153, 0.22736727, 
0.459455992, -0.278329399, -0.183119019), Dim3 = c(0.048353514, 
-0.025653037, 0.014924755, -0.00167208, -0.060333422, 0.020764933, 
0.043057079, 0.002591265, 0.02799806, -0.01339572, 0.292361681, 
0.094879935, 0.020261073, -0.147776529, -0.09613908, 0.015407622, 
0.505027604, -0.011850932, 0.162592304, 0.331023774, -0.276654985, 
0.13868844, -0.000362491, -0.124098518, -0.03418057, 0.055507617, 
-0.044387737, 0.08246021, -0.023457465, 0.070215547, 0.090226544, 
0.13499154, -0.115205136, -0.012187001, 0.016720661, -0.112492876, 
-0.156433429, 0.03202894, 0.064274887, -0.121235242, 0.014234763, 
-0.167012976, -0.063685493, 0.045759055, -0.047058619, -0.113183437, 
-0.077100994, 0.039550025, 0.003385481, 0.044618291, 0.025128582, 
0.047673827, -0.112599294, -0.096384527, 0.031407524, 0.095437746, 
0.037062126, 0.026957783, -0.181217407, 0.411980154, -0.189412218, 
-0.042853115, 0.040207038, -0.040919986, -0.082645255, -0.148945175, 
0.030315385, -0.094242334, -0.05746962), Root.skin.colour = structure(c(1L, 
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 3L, 4L, 1L, 4L, 
4L, 4L, 4L, 1L, 1L, 2L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 1L, 1L, 1L, 1L, 4L, 2L, 3L, 5L, 1L, 2L, 5L, 4L, 1L, 1L, 
1L, 1L, 2L, 4L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 5L, 4L, 1L, 1L, 
4L, 1L, 2L, 1L), .Label = c("Orange", "Purple", "Red", "White", 
"Yellow"), class = "factor")), .Names = c("CDS", "Dim1", "Dim2", 
"Dim3", "Root.skin.colour"), class = "data.frame", row.names = c(NA, 
-69L)) 
+1

不可複製。對天而言,不要使用「附加」,並逃避任何告訴你的書或教育家。 –

+0

你能告訴我們'my.data.8'的外觀。 do dput(my.data.8)' – Ananta

+0

謝謝您的回覆。它說這裏粘貼時間太長,但我已經複製了excel文件中的前幾行。 CDS CDS010 CDS011 CDS012 CDS013 CDS015 DIM1 0.989923706 1.002847033 -0.323384931 0.0280602 -2.103144589 DIM2 0.114080736 0.189737473 0.289738365 0.15686147 -0.65967629 DIM3 0.048353514 -0.025653037 0.014924755 -0.00167208 -0。060333422 根膚色 橙黃色 橙色 橙色 橙色 對不起,我不明白如何保持格式。 – user6111275

回答

0

您可以考慮使用包factoextra可視化的信息。你可以使用PCA模型的不同包的輸出(可能有一些格式化)。此外,您可以在以下網站上找到一個有用的教程,以便可視化PCA:STHDA: Principal Component Methods in R: Practical Guide。代碼與您提供的數據的一個簡短例子,以便您在下面開始。我希望這是你所需要的,並幫助你解決你的任務。

library(factoextra) 

#apply PCA to numeric part of data 
data.pca <- prcomp(data[, c("Dim1", "Dim2", "Dim3")], scale = TRUE, center = TRUE) 

str(data.pca) #in $x you can see that there are 3 components 

fviz_pca_biplot(data.pca 
       #chose components to plot, here 1 and 3 
       ,axes = c(1,3) 
       ,geom = c("point", "text") 
       ,addEllipses = TRUE 
       #color the individual points by an additional factor 
       #here contained in the Root.skin.colour column of the data 
       #for each data point 
       ,col.ind = data[,"Root.skin.colour"] 
       ,var.axes=TRUE 
)