2015-04-20 59 views
0

我有一個包含超過50,000行Tweets的列表。現在,我已經導出的井號標籤從該列表中,但現在我堅持了幾千行主題標籤看起來像這樣將一條線上的多個主題標籤連接起來

hashtag1; hashtag2; hashtag3; hashtag4

因爲我想做一個聯合hashtag分析我正在尋找一種方法來將這些多個hashtags相互連接,而無需手動將這些行轉換爲無向邊緣。例如:

hashtag1; hashtag2
hashtag1; hashtag3
hashtag1; hashtag4
hashtag2; hashtag3
hashtag2; hashtag4
hashtag3; hashtag4

那麼,你在有一個想法如何完成這項任務(例如通過R)?我是一名R-noob,對其他語言的「精通」甚至更少,但我渴望學習。

structure(list(V1 = structure(c(1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 
 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 7L, 8L, 8L, 9L, 10L, 
 
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 13L, 
 
13L, 13L, 13L, 14L, 14L), .Label = c("profitkapital", "resupply", 
 
"robotik", "rudidutschke", "russland", "sanktionen", "sanktionieren", 
 
"schiller", "siegertyp", "snowden", "sockeleinkommen", "solidarity", 
 
"sozialismus", "sozialphilosoph"), class = "factor"), V2 = structure(c(4L, 
 
3L, 2L, 7L, 7L, 7L, 7L, 17L, 6L, 8L, 9L, 10L, 10L, 11L, 12L, 
 
13L, 18L, 18L, 1L, 15L, 15L, 14L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
 
4L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 16L, 16L), .Label = c("alltag", 
 
"arbeit", "bbq", "bge", "blockupy", "deutschland", "digitalisierung", 
 
"griechenland", "grundeinkommen", "hartziv", "kenfm", "kirche", 
 
"kopf", "kraft", "marx", "negt", "piraten", "sanktion"), class = "factor"), 
 
    V3 = structure(c(1L, 3L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 
 
    4L, 4L, 13L, 10L, 13L, 4L, 14L, 14L, 7L, 6L, 6L, 15L, 8L, 
 
    8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 12L, 12L, 11L, 11L, 
 
    11L, 11L, 9L, 9L), .Label = c("", "abitur", "bbqrub", "bge", 
 
    "brd", "brecht", "deutschen", "fsa", "grundeinkommen", "hartziv", 
 
    "linkezukunft", "ows", "vatikan", "widerspruch", "würde" 
 
    ), class = "factor"), V4 = structure(c(1L, 3L, 6L, 1L, 1L, 
 
    1L, 1L, 1L, 8L, 1L, 2L, 1L, 9L, 5L, 9L, 10L, 4L, 4L, 7L, 
 
    3L, 3L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    12L, 12L, 1L, 1L, 1L, 1L, 3L, 3L), .Label = c("", "bank", 
 
    "bge", "eilantrag", "haarp", "job", "jobcentern", "merkel", 
 
    "pastor", "probleme", "super", "unibrennt"), class = "factor"), 
 
    V5 = structure(c(1L, 3L, 5L, 1L, 1L, 1L, 1L, 1L, 7L, 1L, 
 
    10L, 1L, 2L, 9L, 2L, 4L, 8L, 8L, 6L, 1L, 1L, 6L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
 
    1L, 1L), .Label = c("", "bge", "bgenation", "fliegen", "geld", 
 
    "hartziv", "hitler", "sg", "ttip", "vorbild"), class = "factor"), 
 
    V6 = structure(c(1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 
 
    1L, 1L, 8L, 4L, 8L, 7L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
 
    1L), .Label = c("", "altersarmut", "antifa", "bge", "deeznuts", 
 
    "holocaust", "klatsch", "sex"), class = "factor"), V7 = structure(c(1L, 
 
    1L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
 
    4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "bge", 
 
    "cia", "hartz", "spanishrevolution", "wahre"), class = "factor"), 
 
    V8 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 
 
    1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 
 
    1L), .Label = c("", "cityoflondon", "grund", "peace"), class = "factor"), 
 
    V9 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
 
    1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
 
    1L), .Label = c("", "bge", "occupy", "rothschild"), class = "factor"), 
 
    V10 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
 
    1L), .Label = c("", "ard", "gezi"), class = "factor"), V11 = structure(c(1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "refugeeswelcome", 
 
    "zdf"), class = "factor"), V12 = structure(c(1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nolegida", 
 
    "wdr"), class = "factor"), V13 = structure(c(1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nopegida", 
 
    "swr"), class = "factor"), V14 = structure(c(1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
 
    2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "nocastor", 
 
    "zukunft"), class = "factor")), .Names = c("V1", "V2", "V3", 
 
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", 
 
"V14"), class = "data.frame", row.names = c(NA, -41L))

回答

0

你可以嘗試包combinatcombn至極將產生情侶排列

library(combinat) 
combn(c("hashtag1", "hashtag2", "hashtag3", "hashtag4"), 2) 
    [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  
[1,] "hashtag1" "hashtag1" "hashtag1" "hashtag2" "hashtag2" "hashtag3" 
[2,] "hashtag2" "hashtag3" "hashtag4" "hashtag3" "hashtag4" "hashtag4" 
+0

您好,感謝您的答覆的! :)我嘗試了你的建議,並覺得這是繼續(以某種方式)的正確方法。然而,如果應用到我的數據文件,我會得到一個表,一列,所有內容都是「NA」。然後,我試圖用「,2)」來欺騙,並用「min」或「fun」替換它,但由於hashtags不是數字,所以它們不起作用。 你有什麼進一步的建議嗎? :) – quaak

+0

你可以編輯你的文章和'輸入'你的數據或至少一部分? –

+0

你的意思是這樣的? (抱歉,從未這樣做過) – quaak

相關問題