from collections import Counter 
import re 

def openfile(filename): 
fh = open(filename, "r+") 
str = fh.read() 
return str 

def removegarbage(str): 
str = re.sub(r'\W+', ' ', str) 
str = str.lower() 
return str 

def getwordbins(words): 
cnt = Counter() 
for word in words: 
    cnt[word] += 1 
return cnt 

def main(filename, topwords): 
    txt = openfile(filename) 
    txt = removegarbage(txt) 
    words = txt.split(' ') 
    bins = getwordbins(words) 
    for key, value in bins.most_common(topwords): 
    print key,value 

    main('filename.txt', 10) 

interesting_words = ["ipsum","dolor"] 

some_text = """ 
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec viverra consectetur sapien, sed posuere sem rhoncus quis. Mauris sit amet ligula et nulla ultrices commodo sed sit amet odio. Nullam vel lobortis nunc. Donec semper sem ut est convallis posuere adipiscing eros lobortis. Nullam tempus rutrum nulla vitae pretium. Proin ut neque id nisi semper faucibus. Sed sodales magna faucibus lacus tristique ornare. 

d = Counter(some_text.split()) 
final_list = filter(lambda item:item[0] in interesting_words,d.items()) 




# def function if desired 
# you may have the filepath/specific words etc as parameters 

f = open("filename.txt") 
for line in f: 
    # you can remove punctuation, translate them to spaces, 
    # now any interesting words will be surrounded by spaces and 
    # you can detect them 
    line = line.translate(maketrans(".,!? ","  ")) 
    words = line.split() # splits on any number of whitespaces 
    for word in words: 
     if word == specificword: 
      # of use a list of specific words: 
      # if word in specificwordlist: 
      print word 
      # you could also append the words to some list, 
      # create a dictionary etc 

from collections import Counter 
import re 

def words(filename): 
    regex = re.compile(r'\w+') 
    with open(filename) as f: 
     for line in f: 
      for word in regex.findall(line): 
       yield word.lower() 


wordcount = Counter(words('filename.txt'))    
for word in ['foo', 'bar']: 
    print word, wordcount[word] 

words_to_count = set(['foo', 'bar']) 
wordcount = Counter(word for word in words('filename.txt') 
        if word in words_to_count)    
print wordcount.items() 




if 'pie' in words_in_my_dict: do something 


def get_word_counts(words_to_count, filename): 
    words = filename.split(' ') 
    for word in words: 
     if word in words_to_count: 
      words_to_count[word] += 1 
    return words_to_count 

if __name__ == '__main__': 

    fake_file_contents = (
     "Alice's Adventures in Wonderland (commonly shortened to " 
     "Alice in Wonderland) is an 1865 novel written by English" 
     " author Charles Lutwidge Dodgson under the pseudonym Lewis" 
     " Carroll.[1] It tells of a girl named Alice who falls " 
     "down a rabbit hole into a fantasy world populated by peculiar," 
     " anthropomorphic creatures. The tale plays with logic, giving " 
     "the story lasting popularity with adults as well as children." 
     "[2] It is considered to be one of the best examples of the literary " 
     "nonsense genre,[2][3] and its narrative course and structure, " 
     "characters and imagery have been enormously influential[3] in " 
     "both popular culture and literature, especially in the fantasy genre." 

    words_to_count = { 
     'alice' : 0, 
     'and' : 0, 
     'the' : 0 

    print get_word_counts(words_to_count, fake_file_contents) 


{'and': 4, 'the': 5, 'alice': 0} 




如果要統計所有的話,然後找到這個任務的一組特定的,字典是仍然很大(快!) 。


def get_all_word_counts(filename): 
    words = filename.split(' ') 

    word_counts = {} 
    for word in words: 
     if word not in word_counts:  #If not already there 
      word_counts[word] = 0 # add it in. 
     word_counts[word] += 1   #Increment the count accordingly 
    return word_counts 


