2015-11-10 131 views
0

我必須在txt文件中繪製出每個字的頻率。到目前爲止,我有包含每個單詞的詞典以及出現在txt文件中的頻率。爲了繪圖,我必須將該字典轉換爲一個新的字典(我假設)來計算每個頻率的數字。例如,如果5個單詞在txt文件中出現3次,那麼這些單詞需要是單個詞典分組,這會將頻率繪製爲x軸,並將該頻率的詞數繪製在y軸上。Python Pyplot字出現頻率

我現在根本就沒有工作:

def plot(word_dict): 
    new_dict = {} 
    for value in word_dict.values(): 
     if value in word_dict: 
      new_dict += 1 
     else: 
      new_dict = 1 
     y = new_dict[value] 
     x = word_dict[value] 
    pyplot.plot(x, y) 
    pyplot.show() 

數據的樣本:

{'bangs': 1, 'sees': 1, 'stuff,': 1, 'Knox....': 1, 'Well': 1, 'about': 2, 'your': 1, 'blocks.': 1, 'what': 4, 'beetles....': 1, 'Boom': 1, 'blue': 1, 'paddled': 1, 'mixed': 1, 'fox': 5, 'Through': 1, 'on': 16, 'trick,': 2, 'When': 4, '...a': 1, 'silly': 1, 'band.': 2, 'come.': 3, "We'll": 2, 'likes': 2, 'slick,': 1, 'comes?': 1, 'chick': 1, 'goo,': 1, "it's": 2, 'then,': 1, 'muddled': 1, 'Now': 3, 'not': 1, 'flew,': 1, 'If,': 1, 'sneeze.': 1, 'bottled': 1, 'paddle': 4, 'called': 1, 'Goo-Goose,': 1, 'Blue': 2, 'Come': 1, 'fox.': 1, 'can': 3, 'poodle,': 1, 'this': 7, "Sue's": 4, 'Ben': 5, 'is': 7, 'goes': 1, 'to': 10, 'Crow': 4, 'cheese': 2, 'quick': 5, 'sir.': 27, 'easy,': 1, 'Clocks': 2, 'Fox': 6, 'Stop': 2, 'up': 1, 'be': 1, 'Well...': 1, 'hose': 2, 'Rose': 1, 'three': 4, 'Freezy': 2, 'New': 3, 'hate': 1, 'broom': 2, 'quite': 1, 'duck': 3, 'we': 1, 'done,': 1, 'tick.': 2, "can't": 5, 'beetles?': 1, 'well,': 2, 'box.': 4, "That's": 4, 'Do,': 1, 'say': 4, 'chicks': 5, '...': 1, 'enough,': 1, 'brick': 1, 'lot': 1, 'You': 4, 'sick': 2, 'that': 1, 'goo.': 4, 'Gooey': 1, 'made': 3, 'new': 5, 'noodles...': 1, 'Knox,': 6, 'for': 2, 'muddle.': 1, 'Bricks': 1, 'Luck': 4, 'Bim': 5, 'minute,': 1, 'brings': 2, 'bottle': 4, 'duddled': 1, "I'll": 3, 'come': 2, 'battles': 1, 'clocks': 2, 'such': 2, 'Then': 1, 'in': 19, 'sir....': 1, 'Two': 1, 'Knox.': 2, "Luke's": 1, 'lakes.': 4, 'trees': 3, "isn't": 2, 'band!': 4, 'our': 1, 'And': 2, 'blubber!': 1, 'another': 1, 'sews': 9, "bottle's": 1, "Crow's": 3, 'Step': 1, 'What': 1, 'grows': 1, 'like': 1, 'ticks': 2, 'too': 1, 'trick': 4, 'Fox,': 3, 'goo': 2, 'chewing!': 1, 'blocks': 3, 'fleas': 3, 'a': 24, 'lakes': 2, "don't": 2, 'those': 1, 'Luke': 4, 'sorry,': 1, 'tocks,': 2, 'Whose': 1, 'you': 3, 'Here': 1, 'tricks': 2, "poodle's": 1, 'they': 3, 'that.': 1, 'doing.': 1, 'Gluey.': 2, 'eating': 1, 'sir!': 1, 'breeze': 2, 'My': 4, 'tweetle': 11, 'these': 5, 'puddle,': 2, 'chewy': 1, 'tongue': 3, 'talk': 1, 'with': 11, 'beetles': 6, 'noodle': 2, 'make': 5, 'who': 1, 'lame,': 1, 'flew.': 1, "I'm": 1, 'Fox!': 2, 'Nose': 1, 'the': 7, 'I': 9, "crow's": 2, 'Thank': 1, 'easy': 2, 'likes.': 2, 'battle': 7, 'licks': 4, 'goes.': 1, 'socks': 4, 'lead': 1, 'muddle': 1, 'shame,': 1, 'Please,': 1, 'fight,': 1, 'fun,': 1, 'chew,': 2, 'fuddled': 1, 'Broom': 1, 'No,': 1, 'Hose': 1, 'something': 2, 'find': 3, 'know': 1, 'Who': 4, 'call...': 1, 'First,': 1, 'Gooey.': 2, 'Look,': 2, 'fight': 1, 'This': 1, "Luck's": 1, 'poor': 2, 'now.': 6, 'freeze.': 2, 'game': 4, "Ben's": 5, 'it!': 2, 'Joe': 5, 'their': 2, 'you,': 1, 'Box': 1, 'bands.': 2, 'it': 3, 'bands': 1, 'bricks': 5, "here's": 1, "Let's": 3, 'Sue': 5, 'when': 2, 'clocks,': 2, 'breaks.': 2, 'puddle': 8, 'Socks': 4, 'sir,': 6, 'an': 2, "Bim's": 5, 'Pig': 2, 'now....': 1, 'battle.': 4, 'Slow': 5, 'sew': 2, 'blew.': 1, 'bring': 1, 'game,': 1, 'AND...': 3, 'and': 16, 'brooms.': 1, 'way.': 2, 'booms.': 1, 'lots': 1, 'clock': 1, 'comes.': 4, 'please....': 1, 'then...': 1, '...they': 2, 'say....': 1, 'beetle': 7, 'nose.': 1, 'slow,': 1, 'or': 1, 'Six': 2, 'AND': 1, 'block': 1, 'broom.': 4, 'do': 6, 'it,': 1, 'some.': 2, 'Duck': 1, 'sir?': 2, 'grows.': 1, 'this,': 1, 'Very': 2, 'Big': 2, 'whose': 3, 'noodle-eating': 1, 'chew': 2, 'choose': 2, 'Mr.': 13, 'band': 2, "Here's": 2, 'it.': 2, 'call': 3, 'dumb': 1, 'have': 2, 'so': 2, 'Goo-Goose': 1, 'say.': 2, 'socks.': 5, "trees'": 1, 'poodle': 3, 'socks,': 4, 'my': 1, 'While': 1, 'play.': 2, 'Chicks': 3, 'stack.': 4, 'rose': 2, 'freezy': 1, 'clothes.': 3, 'makes': 1, 'little': 1, 'paddles': 3, 'box': 2, 'all': 1, 'free': 2, 'blocks,': 1, 'Do': 1, 'blab': 1, 'THIS': 1, 'thing': 1, 'bends': 2, 'bent': 2, 'Knox': 8, 'socks?': 2, 'tock.': 2, 'wuddled': 1, 'much': 1, 'takes': 2, 'bends.': 2, 'wait': 1, 'see': 1, 'rubber.': 1, 'of': 4, 'clothes?': 2, 'mouth': 3, 'bottle...': 1, 'too,': 1, 'blibber': 1, 'Try': 2, 'where': 1, "won't": 2, 'get': 1} 
+0

你能發表你的數據樣本嗎? –

回答

0

使用從集合庫的一個Counter

由於要計數的值是您的word_dict(即每個字的頻率)的值。您需要初始化Counter實例,如freq = Counter(word_dict.values())。然後,您可以使用c.keys()c.values爲您的繪圖提取x和y系列。

+0

謝謝,但我不允許使用櫃檯 – carroll

0

看起來好像您正在試圖沿x軸繪製字符串,即您正在使用的鍵。這不是如何pyplot的作品。您需要針對數字矢量(通常是numpy數組)繪製values。完成此操作後,您可以使用xticks命令重新標記您的獨立(x)載體。

x = numpy.linspace(0,len(new_dict.keys)-1,len(new_dict.keys)) 
pyplot.xticks(x, new_dict.keys) 
0

假設你的意思是倒車鍵,值,你可以這樣做:

>>> di={'bangs': 1, 'sees': 1, 'stuff,': 1, 'Knox....': 1, 'Well': 1, 'about': 2, 'your': 1, 'blocks.': 1, 'what': 4, 'beetles....': 1, 'Boom': 1, 'blue': 1, 'paddled': 1, 'mixed': 1, 'fox': 5, 'Through': 1, 'on': 16, 'trick,': 2, 'When': 4, '...a': 1, 'silly': 1, 'band.': 2, 'come.': 3, "We'll": 2, 'likes': 2, 'slick,': 1, 'comes?': 1, 'chick': 1, 'goo,': 1, "it's": 2, 'then,': 1, 'muddled': 1, 'Now': 3, 'not': 1, 'flew,': 1, 'If,': 1, 'sneeze.': 1, 'bottled': 1, 'paddle': 4, 'called': 1, 'Goo-Goose,': 1, 'Blue': 2, 'Come': 1, 'fox.': 1, 'can': 3, 'poodle,': 1, 'this': 7, "Sue's": 4, 'Ben': 5, 'is': 7, 'goes': 1, 'to': 10, 'Crow': 4, 'cheese': 2, 'quick': 5, 'sir.': 27, 'easy,': 1, 'Clocks': 2, 'Fox': 6, 'Stop': 2, 'up': 1, 'be': 1, 'Well...': 1, 'hose': 2, 'Rose': 1, 'three': 4, 'Freezy': 2, 'New': 3, 'hate': 1, 'broom': 2, 'quite': 1, 'duck': 3, 'we': 1, 'done,': 1, 'tick.': 2, "can't": 5, 'beetles?': 1, 'well,': 2, 'box.': 4, "That's": 4, 'Do,': 1, 'say': 4, 'chicks': 5, '...': 1, 'enough,': 1, 'brick': 1, 'lot': 1, 'You': 4, 'sick': 2, 'that': 1, 'goo.': 4, 'Gooey': 1, 'made': 3, 'new': 5, 'noodles...': 1, 'Knox,': 6, 'for': 2, 'muddle.': 1, 'Bricks': 1, 'Luck': 4, 'Bim': 5, 'minute,': 1, 'brings': 2, 'bottle': 4, 'duddled': 1, "I'll": 3, 'come': 2, 'battles': 1, 'clocks': 2, 'such': 2, 'Then': 1, 'in': 19, 'sir....': 1, 'Two': 1, 'Knox.': 2, "Luke's": 1, 'lakes.': 4, 'trees': 3, "isn't": 2, 'band!': 4, 'our': 1, 'And': 2, 'blubber!': 1, 'another': 1, 'sews': 9, "bottle's": 1, "Crow's": 3, 'Step': 1, 'What': 1, 'grows': 1, 'like': 1, 'ticks': 2, 'too': 1, 'trick': 4, 'Fox,': 3, 'goo': 2, 'chewing!': 1, 'blocks': 3, 'fleas': 3, 'a': 24, 'lakes': 2, "don't": 2, 'those': 1, 'Luke': 4, 'sorry,': 1, 'tocks,': 2, 'Whose': 1, 'you': 3, 'Here': 1, 'tricks': 2, "poodle's": 1, 'they': 3, 'that.': 1, 'doing.': 1, 'Gluey.': 2, 'eating': 1, 'sir!': 1, 'breeze': 2, 'My': 4, 'tweetle': 11, 'these': 5, 'puddle,': 2, 'chewy': 1, 'tongue': 3, 'talk': 1, 'with': 11, 'beetles': 6, 'noodle': 2, 'make': 5, 'who': 1, 'lame,': 1, 'flew.': 1, "I'm": 1, 'Fox!': 2, 'Nose': 1, 'the': 7, 'I': 9, "crow's": 2, 'Thank': 1, 'easy': 2, 'likes.': 2, 'battle': 7, 'licks': 4, 'goes.': 1, 'socks': 4, 'lead': 1, 'muddle': 1, 'shame,': 1, 'Please,': 1, 'fight,': 1, 'fun,': 1, 'chew,': 2, 'fuddled': 1, 'Broom': 1, 'No,': 1, 'Hose': 1, 'something': 2, 'find': 3, 'know': 1, 'Who': 4, 'call...': 1, 'First,': 1, 'Gooey.': 2, 'Look,': 2, 'fight': 1, 'This': 1, "Luck's": 1, 'poor': 2, 'now.': 6, 'freeze.': 2, 'game': 4, "Ben's": 5, 'it!': 2, 'Joe': 5, 'their': 2, 'you,': 1, 'Box': 1, 'bands.': 2, 'it': 3, 'bands': 1, 'bricks': 5, "here's": 1, "Let's": 3, 'Sue': 5, 'when': 2, 'clocks,': 2, 'breaks.': 2, 'puddle': 8, 'Socks': 4, 'sir,': 6, 'an': 2, "Bim's": 5, 'Pig': 2, 'now....': 1, 'battle.': 4, 'Slow': 5, 'sew': 2, 'blew.': 1, 'bring': 1, 'game,': 1, 'AND...': 3, 'and': 16, 'brooms.': 1, 'way.': 2, 'booms.': 1, 'lots': 1, 'clock': 1, 'comes.': 4, 'please....': 1, 'then...': 1, '...they': 2, 'say....': 1, 'beetle': 7, 'nose.': 1, 'slow,': 1, 'or': 1, 'Six': 2, 'AND': 1, 'block': 1, 'broom.': 4, 'do': 6, 'it,': 1, 'some.': 2, 'Duck': 1, 'sir?': 2, 'grows.': 1, 'this,': 1, 'Very': 2, 'Big': 2, 'whose': 3, 'noodle-eating': 1, 'chew': 2, 'choose': 2, 'Mr.': 13, 'band': 2, "Here's": 2, 'it.': 2, 'call': 3, 'dumb': 1, 'have': 2, 'so': 2, 'Goo-Goose': 1, 'say.': 2, 'socks.': 5, "trees'": 1, 'poodle': 3, 'socks,': 4, 'my': 1, 'While': 1, 'play.': 2, 'Chicks': 3, 'stack.': 4, 'rose': 2, 'freezy': 1, 'clothes.': 3, 'makes': 1, 'little': 1, 'paddles': 3, 'box': 2, 'all': 1, 'free': 2, 'blocks,': 1, 'Do': 1, 'blab': 1, 'THIS': 1, 'thing': 1, 'bends': 2, 'bent': 2, 'Knox': 8, 'socks?': 2, 'tock.': 2, 'wuddled': 1, 'much': 1, 'takes': 2, 'bends.': 2, 'wait': 1, 'see': 1, 'rubber.': 1, 'of': 4, 'clothes?': 2, 'mouth': 3, 'bottle...': 1, 'too,': 1, 'blibber': 1, 'Try': 2, 'where': 1, "won't": 2, 'get': 1} 

new_di={} 
for k, v in di.items(): 
    new_di.setdefault(v, []).append(k) 

>>> new_di 
{1: ['What', 'game,', 'Whose', 'Thank', 'Broom', 'goo,', 'bring', 'fuddled', 'hate', 'Hose', 'then,', 'sneeze.', 'Here', 'sir....', 'Please,', '...', 'it,', 'get', 'Goo-Goose', 'bands', 'muddle', 'nose.', 'Goo-Goose,', 'sorry,', 'not', "I'm", 'little', 'No,', 'like', 'THIS', 'poodle,', 'Knox....', 'Bricks', 'blibber', 'chick', 'where', 'Rose', 'see', 'noodle-eating', 'call...', 'fun,', 'blue', 'chewing!', 'clock', 'lots', 'slow,', 'sir!', 'chewy', 'goes', 'beetles?', 'Do', 'goes.', 'flew.', 'Box', 'be', 'we', 'eating', 'this,', 'stuff,', "poodle's", 'Duck', 'Well...', 'then...', 'quite', 'minute,', 'Step', 'doing.', 'wait', 'brooms.', 'bottle...', 'thing', 'bangs', 'mixed', 'fight,', 'makes', 'or', 'grows.', 'duddled', 'all', 'too,', 'Two', 'Gooey', 'Boom', 'another', 'If,', 'done,', 'your', '...a', 'First,', 'now....', 'fight', 'muddle.', "trees'", 'too', 'lot', 'enough,', 'blew.', 'brick', 'This', 'Come', 'easy,', 'that', 'Well', "Luke's", 'those', "here's", 'say....', 'up', 'you,', 'freezy', 'silly', 'flew,', 'wuddled', 'dumb', 'my', 'called', 'lame,', 'sees', 'Do,', 'comes?', "Luck's", 'blubber!', 'rubber.', 'shame,', 'paddled', 'Then', 'blab', 'battles', 'booms.', 'bottled', 'please....', 'Through', 'grows', 'muddled', 'that.', 'our', 'who', 'much', 'slick,', 'Nose', 'blocks,', "bottle's", 'While', 'beetles....', 'noodles...', 'lead', 'fox.', 'AND', 'blocks.', 'block', 'talk', 'know'], 2: ['Blue', "don't", 'choose', 'clocks', 'band.', 'tock.', 'Big', 'broom', 'some.', "crow's", 'easy', 'it.', 'it!', 'Try', 'tocks,', 'Pig', 'Clocks', "isn't", 'likes', 'sew', 'chew', 'bends', 'Very', 'box', 'puddle,', 'Knox.', 'band', 'Six', 'for', 'ticks', '...they', "Here's", 'hose', 'And', 'free', 'say.', 'come', 'about', 'chew,', 'likes.', 'Freezy', 'way.', 'tick.', 'rose', 'cheese', 'bent', 'takes', 'their', "it's", "We'll", 'Fox!', 'brings', 'noodle', 'clocks,', 'Gooey.', 'Gluey.', 'sir?', 'when', 'breaks.', 'have', 'an', 'well,', 'something', 'clothes?', 'bends.', 'Stop', 'trick,', 'sick', 'poor', "won't", 'bands.', 'goo', 'play.', 'socks?', 'such', 'tricks', 'freeze.', 'breeze', 'so', 'lakes', 'Look,'], 3: ['find', 'Now', 'mouth', 'trees', 'they', 'Chicks', 'fleas', 'New', 'come.', 'whose', 'AND...', 'tongue', 'poodle', 'duck', 'call', 'Fox,', "I'll", 'made', 'can', 'paddles', 'it', 'clothes.', "Let's", 'you', 'blocks', "Crow's"], 4: ['goo.', 'band!', 'game', 'socks', 'battle.', 'My', 'lakes.', 'broom.', 'what', 'paddle', "Sue's", 'of', 'When', 'Socks', 'three', 'box.', 'licks', "That's", 'trick', 'socks,', 'say', 'comes.', 'You', 'stack.', 'Luke', 'Who', 'Luck', 'Crow', 'bottle'], 5: ['chicks', 'Bim', 'quick', 'Sue', 'fox', 'Joe', 'new', "Bim's", "can't", 'bricks', 'socks.', "Ben's", 'Ben', 'Slow', 'make', 'these'], 6: ['Fox', 'Knox,', 'do', 'now.', 'sir,', 'beetles'], 7: ['beetle', 'battle', 'this', 'is', 'the'], 8: ['Knox', 'puddle'], 9: ['sews', 'I'], 10: ['to'], 11: ['tweetle', 'with'], 13: ['Mr.'], 16: ['and', 'on'], 19: ['in'], 24: ['a'], 27: ['sir.']} 
0

我不知道你用什麼tokenizing您的數據,但一個快速的解決方案可以使用nltk

下面是它是如何做一個小例子:

# necessary imports 
from nltk import FreqDist # used later to plot and get count 
from nltk.tokenize import word_tokenize # tokenizes our sentence by word 

# sample text 
text = 'this is a super long text, that has some random words in it. It is not really 
     that long, but could be very long.' 
tknz = word_tokenize(text) # tokenizes the text into ('this', 'is',...) 
fdist = FreqDist(tknz) # creates frequency distribution from the tokenized words 

從,你可以簡單地做fdis.plot()這給:

enter image description here

從這裏你有一個matplotlib劇情有你可以編輯,只需要幾行就可以獲得。

您可以找到有關FreqDisthere的更多信息。它也表現得像一本字典:

>>> fdist.items() 
dict_items([(',', 2), ('in', 1), ('a', 1), ('very', 1), ('really', 1), ('be', 1), ...])