2012-10-23 100 views
0

雖然我正在對我的數據進行聚類,但我遇到了一個問題。我想按照疾病類型對聚類的分支着色。所以我寫了一個小腳本,應該爲我做這個。匹配一種模式,爲陰謀選擇正確的顏色

col.int<-c("green","red","blue","deeppink")[match(fit$labels,c(".SPORADIC",".HNPCC_UV",".NA_FAM",".MUTYH"))] 

但問題是,fit$labels由不僅僅是疾病種類較多,即病人id`s的。所以,如果我嘗試將SPORADIC與我的標籤名稱匹配,我只能得到NA值。 所以我想知道如何告訴腳本它應該在標籤中尋找一個模式,而不是匹配整個標籤名稱。

MYDATA如下所示:

structure(list(merge = structure(c(-1L, -2L, -26L, -32L, -40L, 
-50L, -76L, -81L, -84L, -96L, -3L, -55L, -56L, -72L, -5L, -17L, 
-19L, -27L, -36L, -37L, -42L, -44L, -54L, -67L, -85L, -87L, -90L, 
-185L, -6L, -33L, -38L, -58L, -63L, -69L, -94L, -115L, -7L, -35L, 
-75L, -83L, -10L, -14L, -112L, -15L, -20L, -49L, -65L, -68L, 
-105L, -22L, -60L, -78L, -206L, -24L, -64L, -80L, -118L, -29L, 
-30L, -77L, -163L, -31L, -86L, -97L, -98L, -103L, -109L, -141L, 
-187L, -198L, -204L, -41L, -43L, -52L, -70L, -79L, -101L, -116L, 
-92L, -95L, -121L, -161L, -125L, -145L, -193L, -216L, -127L, 
-162L, -181L, -133L, -140L, -164L, -167L, -174L, -188L, -102L, 
-214L, -12L, -170L, -156L, -104L, -11L, -142L, -202L, -53L, -51L, 
-155L, -89L, -180L, -34L, -113L, -46L, -120L, 89L, -136L, 41L, 
71L, -173L, -93L, -23L, -154L, -137L, 36L, 78L, -74L, -194L, 
79L, -111L, 86L, -28L, 61L, -110L, -178L, 10L, -117L, -4L, -150L, 
115L, 93L, 58L, 108L, -138L, -114L, 113L, -168L, -39L, -186L, 
80L, 82L, -130L, -82L, -147L, -191L, 138L, 124L, 122L, -132L, 
-200L, -106L, -189L, 92L, -176L, 120L, -160L, -48L, 106L, -124L, 
126L, -165L, -171L, -179L, 129L, 148L, -100L, 1L, 98L, -131L, 
-158L, -196L, -182L, -9L, 137L, -159L, 157L, 149L, -152L, 152L, 
-126L, -128L, 153L, 150L, 163L, 180L, 156L, 160L, -157L, 107L, 
162L, 164L, 182L, 112L, 195L, 196L, 159L, 183L, 181L, 202L, 199L, 
191L, 206L, 200L, 204L, 184L, 203L, 212L, -177L, -21L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, -25L, 11L, 12L, 13L, -8L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, -18L, 29L, 
30L, 31L, 32L, 33L, 34L, 35L, -13L, 37L, 38L, 39L, -88L, -73L, 
42L, -16L, -45L, 45L, 46L, 47L, 48L, -59L, 50L, 51L, 52L, -47L, 
54L, 55L, 56L, -99L, -62L, 59L, 60L, -66L, 62L, 63L, 64L, 65L, 
66L, 67L, 68L, 69L, 70L, -57L, -61L, -108L, -203L, -91L, 76L, 
77L, -119L, -212L, -122L, 81L, -144L, 83L, 84L, 85L, -134L, 87L, 
88L, -148L, -143L, -169L, -197L, -213L, -201L, 49L, 44L, -135L, 
94L, 53L, 28L, -192L, 72L, 75L, -215L, -183L, 14L, -153L, 57L, 
-71L, -172L, -139L, 40L, 95L, -184L, 74L, 91L, -175L, -107L, 
-211L, 97L, 102L, 109L, 101L, -149L, 90L, 116L, 110L, 99L, -208L, 
121L, 103L, 118L, 73L, -123L, 131L, -207L, 117L, 123L, 96L, 136L, 
114L, 43L, 127L, -205L, -210L, 133L, 104L, 139L, 143L, 134L, 
-195L, 132L, 142L, 125L, 130L, 145L, 146L, 140L, -190L, 100L, 
-199L, 141L, -166L, -151L, 144L, 135L, 147L, 111L, 165L, 119L, 
154L, 151L, 128L, 105L, 166L, 171L, 176L, 169L, -209L, 174L, 
161L, 167L, 172L, 155L, 158L, 170L, -129L, -146L, 168L, 173L, 
185L, 186L, 192L, 188L, 177L, 178L, 179L, 189L, 190L, 175L, 198L, 
201L, 197L, 187L, 193L, 205L, 207L, 194L, 209L, 208L, 210L, 211L, 
213L, 214L), .Dim = c(215L, 2L)), height = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.00359389038634323, 0.00406394236181465, 0.00649350649350644, 
0.00912375832219081, 0.00925925925925927, 0.0118483412322275, 
0.0145454545454545, 0.0152885509601333, 0.0166566286517327, 0.0186046511627908, 
0.0208333333333334, 0.0209267563527653, 0.0214132762312634, 0.0224057413681743, 
0.0228571428571429, 0.0256410256410257, 0.025974025974026, 0.0294117647058824, 
0.0327645443307275, 0.0328947368421053, 0.0331603326665965, 0.0356567259715686, 
0.0443686006825939, 0.0475, 0.0484094052558783, 0.0518796353224032, 
0.0534487053706255, 0.0547077905882575, 0.0563718734450903, 0.0578231292517006, 
0.0613830613830613, 0.0622751723057615, 0.0634920634920635, 0.0644327567295862, 
0.0658682634730539, 0.0668128430870732, 0.0705853974392098, 0.0751236471810454, 
0.0791918615118165, 0.0814814814814815, 0.0840010548320977, 0.0875912408759124, 
0.0936648591902225, 0.0952071705226908, 0.0975452599352908, 0.101868037484979, 
0.107997171744147, 0.109056860742254, 0.11559433104575, 0.119804400977995, 
0.124629080118694, 0.140933816340968, 0.14179814636445, 0.163254725744272, 
0.163300668687379, 0.164550876331996, 0.17258883248731, 0.1728731431966, 
0.181699994974325, 0.183783241609719, 0.189923527478743, 0.193719375296218, 
0.197832447740333, 0.211200790020614, 0.211416490486258, 0.213745390488226, 
0.214849921011058, 0.215913282172105, 0.234806629834254, 0.242424242424242, 
0.246066211608438, 0.246427111438149, 0.24999095127792, 0.252724927051683, 
0.255913595401057, 0.274475840696771, 0.286483762400918, 0.287176198199807, 
0.303808873276086, 0.306025671419987, 0.328412033190212, 0.332126921889894, 
0.341734154210338, 0.34176559352234, 0.37150466045273, 0.376790317416398, 
0.381977523157772, 0.382997925922382, 0.384551895299024, 0.386213122564166, 
0.393014885652478, 0.400243405117492, 0.406593406593407, 0.432671081677704, 
0.44812132458409, 0.46358985724391, 0.513662061556124, 0.536640659139735, 
0.550285636317328, 0.602127569602188, 0.631010533104603, 0.698728773010427, 
0.72360776175643, 0.787536353244465, 0.7988812719814, 0.814115072824207, 
0.853874433030377, 0.917561212603516, 0.940247780670057, 0.943630213146685, 
1.07562939731944, 1.21827776986843, 1.34271282928207, 1.55806846198326, 
1.81384209156158, 2.12412215470848, 2.89211680358764, 3.37020319658011, 
10.5520617141179, 59.8855594110458), order = c(106L, 29L, 99L, 
102L, 105L, 68L, 65L, 49L, 20L, 45L, 155L, 72L, 56L, 55L, 3L, 
25L, 158L, 12L, 135L, 51L, 183L, 120L, 83L, 75L, 35L, 7L, 13L, 
92L, 119L, 10L, 88L, 52L, 108L, 9L, 100L, 111L, 34L, 71L, 182L, 
209L, 152L, 200L, 39L, 210L, 130L, 114L, 112L, 14L, 73L, 95L, 
212L, 202L, 70L, 203L, 82L, 96L, 84L, 81L, 76L, 50L, 40L, 32L, 
26L, 2L, 21L, 43L, 61L, 137L, 11L, 192L, 28L, 208L, 23L, 211L, 
89L, 153L, 4L, 163L, 77L, 30L, 62L, 154L, 214L, 15L, 16L, 161L, 
121L, 122L, 167L, 197L, 115L, 94L, 69L, 63L, 58L, 38L, 33L, 6L, 
18L, 180L, 118L, 80L, 64L, 24L, 47L, 116L, 101L, 79L, 91L, 104L, 
185L, 90L, 87L, 85L, 67L, 54L, 44L, 42L, 37L, 36L, 27L, 19L, 
17L, 5L, 8L, 74L, 149L, 157L, 131L, 179L, 93L, 107L, 46L, 139L, 
1L, 177L, 53L, 215L, 132L, 168L, 205L, 216L, 193L, 145L, 125L, 
144L, 170L, 174L, 213L, 136L, 184L, 204L, 198L, 187L, 141L, 109L, 
103L, 98L, 97L, 86L, 31L, 66L, 140L, 143L, 138L, 181L, 162L, 
127L, 134L, 188L, 201L, 150L, 207L, 164L, 169L, 156L, 206L, 78L, 
60L, 22L, 59L, 191L, 110L, 142L, 41L, 57L, 194L, 133L, 148L, 
186L, 178L, 173L, 175L, 160L, 166L, 128L, 146L, 189L, 190L, 126L, 
129L, 176L, 199L, 196L, 165L, 113L, 172L, 159L, 124L, 117L, 123L, 
147L, 195L, 171L, 48L, 151L), labels = c("00_11242T1 MUTYH", 
"01_677T HNPCC_UV", "106T SPORADIC", "106TV SPORADIC", "108T SPORADIC", 
"108TV SPORADIC", "124T SPORADIC", "124TV SPORADIC", "40T SPORADIC", 
"44T SPORADIC", "44TV SPORADIC", "511T SPORADIC", "511TV SPORADIC", 
"514T SPORADIC", "514TV SPORADIC", "56T SPORADIC", "92_11145T HNPCC_UV", 
"95_549T HNPCC_UV", "97_12714T1 MUTYH", "97_7999T HNPCC_UV", 
"97_8073T2 MUTYH", "99_2221T HNPCC_UV", "R01_80418T2 NA_FAM", 
"R01_81197T HNPCC_UV", "R02_80456T2 MUTYH", "R03_80586T HNPCC_UV", 
"R04_80227T NA_FAM", "R04_80577T NA_FAM", "R04_80584T HNPCC_UV", 
"R04_81371T NA_FAM", "R04_81372T1 NA_FAM", "R04_81449T NA_FAM", 
"R05_80481T NA_FAM", "R05_80611T NA_FAM", "00-04193T HNPCC_UV", 
"00-06583T HNPCC_UV", "00-06969T HNPCC_UV", "00-09107T HNPCC_UV", 
"00-10221T HNPCC_UV", "00-11063T HNPCC_UV", "00-11960T HNPCC_UV", 
"00-11963T HNPCC_UV", "00134T HNPCC_UV", "01-01171T HNPCC_UV", 
"03-05270T HNPCC_UV", "16924T1 MUTYH", "08731T6 MUTYH", "18250T1 MUTYH", 
"20699T1 MUTYH", "20699T2 MUTYH", "4497T1 MUTYH", "4497T4 MUTYH", 
"6103T MUTYH", "80-4124T1 MUTYH", "80456T1 MUTYH", "80456T2 MUTYH", 
"90-5023T1 MUTYH", "90-5023T2 MUTYH", "90-5023T3 MUTYH", "90-5023T4 MUTYH", 
"93-03554T HNPCC_UV", "93-7083T MUTYH", "95-16316T HNPCC_UV", 
"95-5959T HNPCC_UV", "97_8073T1 MUTYH", "97-07999T HNPCC_UV", 
"97-4772T2 MUTYH", "97-4772T8 MUTYH", "97-5332T1 MUTYH", "97-5332T2 MUTYH", 
"98-04559T HNPCC_UV", "98-11489T HNPCC_UV", "99-01796T HNPCC_UV", 
"99-02872T1 HNPCC_UV", "99-02872T2 HNPCC_UV", "99-10411T HNPCC_UV", 
"99-10647T2 HNPCC_UV", "H04-00107T HNPCC_UV", "R01-80145T HNPCC_UV", 
"R01-80242T HNPCC_UV", "R01-80377T HNPCC_UV", "R01-80499T HNPCC_UV", 
"R01-80748T HNPCC_UV", "R02-80670T HNPCC_UV", "R02-80744T HNPCC_UV", 
"R02-80813T HNPCC_UV", "R02-81415T1 HNPCC_UV", "R02-81415T2 HNPCC_UV", 
"R02-81709T HNPCC_UV", "R03-80064T HNPCC_UV", "R03-80732T HNPCC_UV", 
"R04-80248T1 NA_FAM", "R04-80248T2 NA_FAM", "R04-80575T NA_FAM", 
"R04-80576T1 NA_FAM", "R04-80576T2 NA_FAM", "R04-80599T2 NA_FAM", 
"R04-80599T3 NA_FAM", "R04-80599T4 NA_FAM", "R04-81767T1 NA_FAM", 
"R05-80533T NA_FAM", "R05-80546T NA_FAM", "R05-80824T HNPCC_UV", 
"t00-21127C2 SPORADIC", "t01-11711A3 SPORADIC", "t01-11711C2 SPORADIC", 
"t01-12237A3C1 SPORADIC", "t01-12237C2 SPORADIC", "t02-10005A1 SPORADIC", 
"t02-10005C2 SPORADIC", "t02-15497A2A3 SPORADIC", "t02-15497C2 SPORADIC", 
"t02-16230A1/2/3 SPORADIC", "t02-16230C2 SPORADIC", "t02-29970A3 SPORADIC", 
"t02-29970C2 SPORADIC", "t03-07016A1A2 SPORADIC", "t03-07016C1 SPORADIC", 
"t03-26385A3 SPORADIC", "t03-26385C1C2 SPORADIC", "t05-26388A2A3 SPORADIC", 
"t05-26388C2 SPORADIC", "t05-35275A2 SPORADIC", "t05-35275C2 SPORADIC", 
"GSM288035 SPORADIC", "GSM288037 SPORADIC", "GSM288039 SPORADIC", 
"GSM288041 SPORADIC", "GSM288043 SPORADIC", "GSM288045 SPORADIC", 
"GSM288047 SPORADIC", "GSM288049 SPORADIC", "GSM288051 SPORADIC", 
"GSM288053 SPORADIC", "GSM288055 SPORADIC", "GSM288057 SPORADIC", 
"GSM288059 SPORADIC", "GSM288061 SPORADIC", "GSM288063 SPORADIC", 
"GSM288065 SPORADIC", "GSM288067 SPORADIC", "GSM288069 SPORADIC", 
"GSM288071 SPORADIC", "GSM288073 SPORADIC", "GSM288075 SPORADIC", 
"GSM288077 SPORADIC", "GSM288079 SPORADIC", "GSM288081 SPORADIC", 
"GSM288083 SPORADIC", "GSM288085 SPORADIC", "GSM288089 SPORADIC", 
"GSM288091 SPORADIC", "GSM288093 SPORADIC", "GSM288095 SPORADIC", 
"GSM288097 SPORADIC", "GSM288099 SPORADIC", "GSM288101 SPORADIC", 
"GSM288103 SPORADIC", "GSM288105 SPORADIC", "GSM288107 SPORADIC", 
"GSM288109 SPORADIC", "GSM288111 SPORADIC", "GSM288113 SPORADIC", 
"GSM288115 SPORADIC", "GSM288117 SPORADIC", "GSM288119 SPORADIC", 
"GSM288121 SPORADIC", "GSM288123 SPORADIC", "GSM288125 SPORADIC", 
"GSM288127 SPORADIC", "GSM288129 SPORADIC", "GSM288131 SPORADIC", 
"GSM288133 SPORADIC", "GSM288135 SPORADIC", "GSM288137 SPORADIC", 
"GSM288139 SPORADIC", "GSM288141 SPORADIC", "GSM288143 SPORADIC", 
"GSM288145 SPORADIC", "GSM288147 SPORADIC", "GSM288149 SPORADIC", 
"GSM288151 SPORADIC", "GSM288153 SPORADIC", "GSM288155 SPORADIC", 
"GSM288157 SPORADIC", "GSM288159 SPORADIC", "GSM288161 SPORADIC", 
"GSM288163 SPORADIC", "GSM288165 SPORADIC", "GSM288167 SPORADIC", 
"GSM288169 SPORADIC", "GSM288171 SPORADIC", "GSM288173 SPORADIC", 
"GSM288175 SPORADIC", "GSM288177 SPORADIC", "GSM288179 SPORADIC", 
"GSM288181 SPORADIC", "GSM288183 SPORADIC", "GSM288185 SPORADIC", 
"GSM288189 SPORADIC", "GSM288191 SPORADIC", "GSM288193 SPORADIC", 
"GSM288195 SPORADIC", "GSM288197 SPORADIC", "GSM288199 SPORADIC", 
"GSM288201 SPORADIC", "GSM288203 SPORADIC", "GSM288205 SPORADIC", 
"GSM288207 SPORADIC", "GSM288209 SPORADIC", "GSM288211 SPORADIC", 
"GSM288213 SPORADIC", "GSM288215 SPORADIC", "GSM288217 SPORADIC", 
"GSM288219 SPORADIC", "GSM288221 SPORADIC"), method = "ward", 
    call = hclust(d = a, method = "ward"), dist.method = NULL), .Names = c("merge", 
"height", "order", "labels", "method", "call", "dist.method"), class = "hclust") 

回答

1

嗯,使用grepl:

col.int <- ifelse(grepl("SPORADIC", fit$labels), "green", 
        ifelse(grepl("HNPCC_UV", fit$labels), "red", 
         ifelse(grepl("NA_FAM", fit$labels), "blue", 
           ifelse(grepl("MUTYH", fit$labels), "deeppink","brown")))) 
+0

謝謝m8 :) ive修改youre回答有點因爲你忘了1選項MUTYH。 – Sanshine

+0

那麼它隱藏在最後一個ifelse ...但是,這隻適用於如果你只有這四個級別... – EDi

+0

行,編輯答案... – EDi

1

看來match不能處理正則表達式(我假設在".SPORADIC".是),但我們可以做一個解決方法並用sub修改示例名稱。

match(sub("^.*?([A-Z]+)$", "\\1", fit$labels), 
     c(".SPORADIC",".HNPCC_UV",".NA_FAM",".MUTYH")) 
+0

感謝您的信息。但我仍然只有NA值。但無論如何感謝Backlin – Sanshine