2013-11-04 69 views
2

在最後一個問題中,他們指出,作爲可重複例子的一部分,較少的數據將會容易閱讀和理解。在再次提問的路上,我試圖通過dput(head(data))縮短數據,但我得到的結果與我做的dput(data)dput(data[1:6, ])甚至dput(data)[1:6, ]相同(在這最後一種情況下,我也得到整個dput後的第6行數據)如何縮短輸入的長度

有沒有簡單的方法來做到這一點?在dput選項我沒有找到任何東西,必須有一個解決方案,以避免手動刪除我不想顯示。

這裏是整個dput數據:

>dput(data) 
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 71L, 72L, 76L, 77L, 78L, 
83L, 87L, 88L, 89L, 93L, 96L, 97L, 101L, 103L, 104L, 105L, 106L, 
109L, 111L, 113L, 114L, 116L), .Label = c("GO:0000746", "GO:0000910", 
"GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399", "GO:0006412", 
"GO:0006457", "GO:0006464", "GO:0006468", "GO:0006486", "GO:0006520", 
"GO:0006725", "GO:0006766", "GO:0006810", "GO:0006811", "GO:0006839", 
"GO:0006897", "GO:0006950", "GO:0006970", "GO:0006974", "GO:0006979", 
"GO:0006986", "GO:0006997", "GO:0007005", "GO:0007010", "GO:0007029", 
"GO:0007031", "GO:0007033", "GO:0007034", "GO:0007049", "GO:0007059", 
"GO:0007114", "GO:0007124", "GO:0007126", "GO:0007165", "GO:0009408", 
"GO:0009409", "GO:0015031", "GO:0016044", "GO:0016050", "GO:0016070", 
"GO:0016071", "GO:0016072", "GO:0016192", "GO:0016567", "GO:0016568", 
"GO:0016570", "GO:0019725", "GO:0030435", "GO:0031505", "GO:0032196", 
"GO:0032989", "GO:0042221", "GO:0042254", "GO:0042594", "GO:0043543", 
"GO:0044255", "GO:0044257", "GO:0044262", "GO:0045333", "GO:0046483", 
"GO:0048193", "GO:0051169", "GO:0051186", "GO:0051276", "GO:0070271", 
"GO:0000278", "GO:0000902", "GO:0002181", "GO:0005975", "GO:0006325", 
"GO:0006353", "GO:0006360", "GO:0006366", "GO:0006383", "GO:0006397", 
"GO:0006401", "GO:0006414", "GO:0006418", "GO:0006470", "GO:0006605", 
"GO:0006629", "GO:0006865", "GO:0006869", "GO:0006873", "GO:0006887", 
"GO:0006914", "GO:0008033", "GO:0008213", "GO:0008643", "GO:0009311", 
"GO:0009451", "GO:0015931", "GO:0016197", "GO:0023052", "GO:0031399", 
"GO:0032543", "GO:0042255", "GO:0042273", "GO:0042274", "GO:0043144", 
"GO:0043934", "GO:0045454", "GO:0051052", "GO:0051321", "GO:0051603", 
"GO:0051604", "GO:0051726", "GO:0055086", "GO:0070647", "GO:0000054", 
"GO:0001403", "GO:0006352", "GO:0006354", "GO:0006364", "GO:0006413", 
"GO:0006417", "GO:0006497", "GO:0008380", "GO:0009072", "GO:0051049", 
"GO:0061025", "GO:0071554"), class = "factor"), GOdesc = structure(c(16L, 
17L, 23L, 19L, 58L, 62L, 59L, 37L, 39L, 40L, 38L, 3L, 4L, 67L, 
60L, 27L, 30L, 20L, 51L, 48L, 46L, 49L, 52L, 33L, 29L, 18L, 21L, 
34L, 64L, 63L, 2L, 14L, 1L, 43L, 28L, 56L, 47L, 45L, 41L, 9L, 
65L, 54L, 31L, 55L, 66L, 42L, 12L, 26L, 7L, 57L, 22L, 61L, 6L, 
44L, 53L, 50L, 35L, 8L, 10L, 5L, 11L, 25L, 24L, 32L, 15L, 13L, 
36L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), .Label = c("cell budding", "cell cycle", 
"cellular amino acid and metabolic process", "cellular aromatic compound metabolic process", 
"cellular carbohydrate metabolic process", "cellular component morphogenesis", 
"cellular homeostasis", "cellular lipid metabolic process", "cellular membrane organization", 
"cellular protein catabolic process", "cellular respiration", 
"chromatin modification", "chromosome organization and biogenesis", 
"chromosome segregation", "cofactor metabolic process", "conjugation", 
"cytokinesis", "cytoskeleton organization and biogenesis", "DNA metabolic process", 
"endocytosis", "ER organization and biogenesis", "fungal-type cell wall organization", 
"generation of precursor metabolites and energy", "golgi vesicle transport", 
"heterocycle metabolic process", "histone modification", "ion transport", 
"meiosis", "mitchondrion organization", "mitochondrial transport", 
"mRNA metabolic process", "nuclear transport", "nucleus organization", 
"peroxisome organization", "protein acylation", "protein complex biogenesis", 
"protein folding", "protein glycosylation", "protein modification process", 
"protein phosphorylation", "protein transport", "protein ubiquitination", 
"pseudohyphal growth", "response to chemical stimulus", "response to cold", 
"response to DNA damage stimulus", "response to heat", "response to osmotic stress", 
"response to oxidative stress", "response to starvation", "response to stress", 
"response to unfolded protein", "ribosome biogenesis", "RNA metabolic process", 
"rRNA metabolic process", "signal transduction", "sporulation resulting in formation of a cellular spore", 
"transcription", "translation", "transport", "transposition", 
"tRNA metabolic process", "vacuolar transport", "vacuole organizations", 
"vesicle organization", "vesicle-mediated transport", "vitamin metabolic process" 
), class = "factor"), GSA_p33_SC = c(NA, -1, NA, NA, NA, NA, 
NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, 
-1, -1, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA, NA, NA, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA), GSA_p38_SC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA), GSA_p38_X33 = c(NA, 
1, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, -1, NA, NA, 1, NA, NA), GSA_p52_SC = c(NA, NA, NA, NA, 
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 
-1, -1, NA, NA, NA), GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, 
NA, NA, NA), GSA_p64_SC = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, 1, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
1, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, -1, NA, -1, -1, 
NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, -1, 1, 
-1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA 
), GSA_p64_X33 = c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, 
NA, NA, NA, NA, -1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, -1, -1), GSA_SC_X33 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, 1, -1, NA, -1, NA, NA, NA, -1, 1, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
-89L), class = "data.frame") 

縮短的版本可能是這樣的:

structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L), 
.Label = c("GO:0000746", "GO:0000910", "GO:0006091", "GO:0006259", 
"GO:0006351", "GO:0006399"), class = "factor"), 
GOdesc = structure(c(16L,17L, 23L, 19L, 58L, 62L), 
.Label = c("cell budding", "cell cycle", 
    "cellular amino acid and metabolic process", "cellular aromatic compound 
metabolic process", "cellular carbohydrate metabolic process", "cellular 
component morphogenesis"), class = "factor"), 
GSA_p33_SC = c(NA, -1, NA, NA, NA, NA), 
GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
GSA_p38_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), 
GSA_p52_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA), 
GSA_p64_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p64_X33 = c(1, NA, NA, NA, NA, NA), 
GSA_SC_X33 = c(NA, NA, NA, NA, NA, NA)), 
.Names = c("GOterm", "GOdesc", 
    "GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
    "GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
    -6L), class = "data.frame")) 

回答

3

,一切額外芬克是從factor水平。如果你知道你的問題仍然是可重複的下探這些級別後,那麼你可以考慮(等待它)droplevels

> dput(droplevels(head(data))) 
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746", 
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399" 
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L, 
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process", 
"generation of precursor metabolites and energy", "transcription", 
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA, 
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
    GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1, 
    NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
6L), class = "data.frame") 

這更容易在下面的例子證明:

x <- factor("A", levels = LETTERS) 
x 
# [1] A 
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
dput(x) 
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H", 
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
# "V", "W", "X", "Y", "Z"), class = "factor") 
dput(droplevels(x)) 
# structure(1L, .Label = "A", class = "factor") 
+0

所以'dput'的長度是由於水平?我的意思是,這適用於我,但有一個更一般的解決方案? – Llopis

+0

@Llopis,看看我的編輯,但總而言之,是的,你的'dput'的長度是如此之大,因爲'因子'變量中的'levels'。另一種方法是創建一個小例子,它不一定使用您的原始數據,但仍然會重現您嘗試解決的問題。處理這些樣本數據有時也有助於您自己解決問題,因爲在創建樣本數據時,您必須考慮問題可能出在哪裏。 – A5C1D2H2I1M1N2O1R2T1

0

另一種縮短的方法是在dput之前將列轉換爲character。然後可以用as.data.frame讀回數據,並保留因子水平。

第一子集

> data2 <- data[sample(nrow(data), 4), ] 

然後dput爲字符

> d <- dput(lapply(data2, as.character)) 
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914", 
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis" 
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_, 
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA, 
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA, 
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA, 
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33", 
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC", 
"GSA_p64_X33", "GSA_SC_X33")) 

而且讀回

> as.data.frame(d)