2017-06-03 66 views
0

我在學習R.我有以下數據集,28個變量,其中5個是標籤(Class, crown, root, trunk, collar)。首先,我試圖用RandomForest來根據這個類來預測一棵樹是否生病。多標籤分類R

接着, 我需要預測該樹的一部分是生病(冠,根,衣領,後備箱)

我需要使用多標籤分類(randomForestSRC)或其他包。

如果有人可以提供一個示例如何使用多標籤分類(或將標籤類轉換爲二進制形式),我將非常感激。

預先感謝您!

dput(ML) 
structure(list(Sector = c(5L, 3L, 3L, 2L, 1L, 3L, 6L, 2L, 2L, 
5L, 3L, 4L, 5L, 1L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 3L, 5L, 4L, 2L, 
4L, 4L, 2L, 1L, 2L, 5L, 6L, 3L, 3L, 6L, 2L, 3L, 3L, 6L, 3L, 5L, 
6L, 3L, 4L, 5L, 1L, 3L, 5L, 3L, 2L, 3L, 6L, 5L), Plantation.year = c(2014L, 
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 
2009L, 2004L, 2004L, 2005L, 2004L, 2004L, 2004L, 2004L, 2009L, 
2004L, 2006L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 
2009L, 2004L, 2005L, 2004L, 2004L, 2004L, 2004L, 2004L, 2014L, 
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L), Diagnosis.year = c(2014L, 
2013L, NA, 2014L, 2013L, 2015L, 2013L, 2014L, 2013L, 2015L, 2015L, 
2014L, 2013L, 2015L, 2015L, 2015L, 2013L, 2015L, 2013L, 2013L, 
2015L, 2014L, 2013L, 2013L, 2014L, 2013L, 2014L, 2015L, 2014L, 
2014L, 2013L, 2014L, 2014L, 2013L, 2015L, 2014L, 2014L, 2013L, 
2014L, 2015L, 2015L, 2015L, 2015L, 2013L, 2015L, 2014L, 2014L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2013L), Next.diagnosis.year = c(2019L, 
NA, 2014L, 2014L, NA, 2018L, 2020L, 2014L, NA, 2017L, NA, 2014L, 
2016L, 2018L, 2020L, NA, NA, 2016L, NA, 2018L, 2018L, 2014L, 
2016L, 2014L, 2014L, NA, 2015L, NA, 2014L, NA, NA, 2014L, NA, 
NA, 2018L, 2017L, 2014L, NA, 2014L, 2020L, 2017L, 2017L, 2016L, 
NA, 2018L, 2020L, 2019L, NA, NA, NA, NA, 2017L, 2018L), Stump.diameter = structure(c(2L, 
6L, 2L, 7L, 5L, 7L, 6L, 6L, 7L, 7L, 2L, 7L, 5L, 2L, 5L, 2L, 5L, 
5L, 1L, 6L, 4L, 1L, 6L, 6L, 2L, 7L, 3L, 5L, 2L, 7L, 6L, 6L, 2L, 
10L, 2L, 7L, 6L, 2L, 5L, 2L, 5L, 2L, 6L, 5L, 5L, 2L, 6L, 1L, 
8L, 2L, 9L, 8L, 11L), .Label = c("0 Ã 10 cm", "10 Ã 20 cm", 
"100 Ã 110 cm", "110 Ã 120 cm", "20 Ã 30 cm", "30 Ã 40 cm", 
"40 Ã 50 cm", "50 Ã 60 cm", "60 Ã 70 cm", "70 Ã 80 cm", "80 Ã 90 cm" 
), class = "factor"), Species = structure(c(4L, 1L, 6L, 7L, 9L, 
9L, 5L, 8L, 1L, NA, NA, 13L, 7L, 15L, NA, 12L, 11L, 7L, 9L, 1L, 
8L, 15L, 8L, 13L, 11L, 9L, 1L, 8L, 4L, 14L, 8L, 1L, 9L, 7L, 9L, 
2L, 8L, 9L, 8L, NA, 12L, 3L, 9L, 7L, 12L, 9L, 10L, 9L, 9L, 1L, 
11L, 13L, 1L), .Label = c("acerifolia", "betulus", "campestris", 
"cordata", "excelsior", "grandiflora", "japonica", "nigra", "Other ", 
"platanoides", "pseudoplatanus", "styraciflua", "tomentosa", 
"tulipifera", "verrucosa"), class = "factor"), Traffic.frequence = structure(c(2L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 1L, 
1L, 1L, 1L, 1L), .Label = c("passages fréquents et arrêts fréquents", 
"passages fréquents ou arrêts", "quelques passages"), class = "factor"), 
    Botanical.category = structure(c(2L, 16L, 11L, 19L, 8L, 14L, 
    8L, 17L, 16L, 1L, 20L, 20L, 19L, 3L, 4L, 9L, 1L, 19L, 13L, 
    16L, 17L, 3L, 15L, 20L, 1L, 7L, 16L, 15L, 20L, 10L, 15L, 
    16L, 13L, 19L, 6L, 4L, 15L, 5L, 15L, 4L, 9L, 1L, 13L, 19L, 
    9L, 12L, 1L, 7L, 18L, 16L, 1L, 20L, 16L), .Label = c("Acer", 
    "Alnus", "Betula", "Carpinus", "Celtis", "Cercis", "Cupressus", 
    "Fraxinus", "Liquidambar", "Liriodendron", "Magnolia", "Malus", 
    "Other ", "Picea", "Pinus", "Platanus", "Populus", "Robinia", 
    "Sophora", "Tilia"), class = "factor"), PLU.ident.number = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), PLU.ProtectionCateg = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), Diagnosis.remarks = structure(c(4L, 
    4L, 4L, 4L, 4L, 2L, 4L, 1L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 
    4L, 2L, 4L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Arbre à abattre dans les 10 ans", 
    "Arbre à abattre dans les 5 ans", "Arbre d'avenir incertain", 
    "Arbre d'avenir normal"), class = "factor"), Diagnosis.renewal.priority = structure(c(3L, 
    3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
    2L, 1L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("de 1 Ã 5 ans", 
    "de 11 Ã 20 ans", "plus de 20 ans"), class = "factor"), 
    Reasoning.planting = structure(c(1L, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), .Label = "Remplacement", class = "factor"), Subcategory = structure(c(1L, 
    1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("ESP151", "ESP174" 
    ), class = "factor"), Development.stage = structure(c(2L, 
    2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 
    1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("Arbre d'espaces ouverts", 
    "Arbre de voirie"), class = "factor"), STADEDEDEVELOPPEMENT = structure(c(2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Arbre adulte", "Arbre jeune", 
    "Arbre vieillissant"), class = "factor"), Development.stage.diagnosis = structure(c(2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 
    1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Arbre adulte", "Arbre jeune", 
    "Arbre vieillissant"), class = "factor"), Caterpillar.treat.priority = structure(c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, 2L, NA, NA, 1L, 
    NA, NA, NA, NA, NA, 1L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), .Label = c("Haute", "Moyenne" 
    ), class = "factor"), Recommended.treatment = structure(c(2L, 
    NA, 2L, 8L, NA, 3L, 6L, 6L, NA, 6L, NA, 9L, 5L, 7L, 7L, NA, 
    NA, 1L, NA, 7L, 7L, 4L, 6L, 9L, 6L, NA, 6L, NA, 2L, NA, NA, 
    7L, NA, NA, 7L, 6L, 7L, NA, 7L, 7L, 7L, 7L, 7L, NA, 7L, 2L, 
    6L, NA, NA, NA, NA, 6L, 5L), .Label = c("Abattage", "Controle", 
    "Controle résistographe", "Controle tuteur, attache ou protection", 
    "Taille d'éclaircie", "Taille de bois mort", "Taille formation et mise au gabarit", 
    "Taille mise en sécurité", "Taille rideau"), class = "factor"), 
    Sidewalk = structure(c(1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 
    1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("non", 
    "oui"), class = "factor"), PLU.spatial.arrangement = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), Variety = structure(c(NA, NA, 
    NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA, NA, 4L, NA, NA, 
    NA, NA, NA, NA, NA, 1L, NA, NA, 2L, NA, 1L, NA, NA, 1L, NA, 
    NA, NA, NA, NA, 1L, NA, 1L, 4L, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), .Label = c("Austriaca", "Glauca", 
    "Italica", "Pyramidalis"), class = "factor"), Vigor = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, NA, 
    2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("vieillissement dépérissement", 
    "vigoureux", "vigueur intermédiaire"), class = "factor"), 
    Class = c(0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
    0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 
    0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), Collar = c(0L, 
    0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 0L, 0L), Crown = c(0L, 0L, 0L, 1L, 0L, 
    0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L), Root = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    Trunk = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L)), .Names = c("Sector", 
"Plantation.year", "Diagnosis.year", "Next.diagnosis.year", "Stump.diameter", 
"Species", "Traffic.frequence", "Botanical.category", "PLU.ident.number", 
"PLU.ProtectionCateg", "Diagnosis.remarks", "Diagnosis.renewal.priority", 
"Reasoning.planting", "Subcategory", "Development.stage", "STADEDEDEVELOPPEMENT", 
"Development.stage.diagnosis", "Caterpillar.treat.priority", 
"Recommended.treatment", "Sidewalk", "PLU.spatial.arrangement", 
"Variety", "Vigor", "Class", "Collar", "Crown", "Root", "Trunk" 
), class = "data.frame", row.names = c(NA, -53L)) 

回答

0

根據你上面的問題,你似乎遇到的問題是變量有標籤,而且在某些方面不是數值?處理這個問題的最簡單方法是在矩陣中爲所有5個變量添加另一個變量,這些變量是標籤,並讓它們具有相應的數字。所以Acer = 1,Alnus = 2等等(class變成class.id)

現在,使用randomforest包,分類預測變量必須被指定爲因子,否則它們將被錯誤地視爲連續的。所以要轉換這些,只需要添加代碼:

轉換因子

as.factor(class.id)

檢查,看

is.factor(class.id)

然後你可以使用你的隨機森林代碼從隨機森林包。我真的不能從你的,你在上面列出的變量,哪些變量確定樹是否/種生病與否,矩陣但在本質上搞清楚:

預測疾病基礎上的變量

sickness<-randomforest(class.id~variable1, variable2, etc., data=matrix, ntree=4000, importance=TRUE, proximity=TRUE 

這個代碼可以讓你看到的重要性值以及繪製出來

print(sickness) 
importance(sickness) 
varImpPlot(sickness) 
+0

謝謝您的回答,我試圖解決的問題有兩個問題:1。如果一棵樹生病或不使用唯一的類標籤(我是用Random Forest做的)2.我需要知道樹的哪一部分是s ick(冠,根,樹幹,衣領)。我試圖使用MLPUGS包,但它不適合,因爲我有非數字功能。 –