2013-10-24 118 views
-4

爲什麼我在這個腳本上出現'invalid syntax'錯誤?我正在使用Python 3.3.2 Shell,當我試圖運行模塊時,只會彈出一個'無效語法'。Python:無效的語法錯誤

import os, re, csv 
import email, email.message, email.header 

MAILDIR = 'mails' 
FEATURES = ['from', 'domain', 'ip', 'country', 'content-type', 'charset', 'reply', 'recipients', 'images', 'urls'] 

try: 
    geoip = False 
    import pygeoip 
    gi = pygeoip.GeoIP('GeoIP.dat', pygeoip.MEMORY_CACHE) 
    geoip = True 
except ImportError: 
    pass 
except IOError: 
    print 'GeoIP.dat not found' 

class Mail(email.message.Message): 
    def count_html_tag(self, tag): 
     tags = 0 
     for part in email.iterators.typed_subpart_iterator(self, 'text', 'html'): 
      html = part.get_payload(decode=True) 
      tags += html.lower().count('<' + tag) 
     return tags 

def feature(self, feature): 
    # content-type: plain, html, multipart 
    if feature == 'content-type': 
     if self.is_multipart(): 
      return 'multipart' 
     else: 
      return self.get_content_subtype() 

    # charset: list of charsets (if multipart) 
    elif feature == 'charset': 
     return ','.join(sorted(set(self.get_charsets()) - {None})) 

    # from: claimed email address of sender 
    elif feature == 'from': 
     addr = self.get('from', self.get('sender', '')) 
     return email.utils.parseaddr(addr)[1] 

    # domain: domain name of sender 
    elif feature == 'domain': 
     return self.feature('from').split('@', 1)[1] 

    # ip: possible ip address of sender 
    elif feature == 'ip': 
     if 'x-original-ip' in self: 
      return self['x-original-ip'] 
     else: 
      for received in reversed(self.get_all('received')): 
       m = re.search('\[([0-9.]*)\]', received) 
       if m and m.group(1)[:3] not in ('127', '192', '172', '10.'): 
        return m.group(1) 
      else: 
       return '' 

    # country: country of sender, based on ip 
    elif feature == 'country': 
     if geoip: 
      ip = self.feature('ip') 
      return gi.country_code_by_addr(ip) if ip else '' 
     else: 
      return '' 

    # recipient: number of known recipients 
    elif feature == 'recipients': 
     fields = ('to', 'cc', 'resent-to', 'resent-cc') 
     return sum(len(email.utils.getaddresses(self.get_all(field, []))) for field in fields) 

    # reply: re, fw 
    elif feature == 'reply': 
     subject = email.header.decode_header(self.get('Subject', ''))[0][0] 
     m = re.match('(re|fw)d?:', subject.lower()) 
     return m.group(1) if m else '' 

    # images: number of images in html 
    elif feature == 'images': 
     return self.count_html_tag('img') 

    # images: number of urls in html 
    elif feature == 'urls': 
     return self.count_html_tag('a') 

    else: 
     raise KeyError() 

def features(self, lst=FEATURES): 
    return {feature: self.feature(feature) for feature in lst} 

def main(): 
    with open('features.csv', 'wb') as csvfile: 
     writer = csv.DictWriter(csvfile, FEATURES) 
     writer.writeheader() 
     for mpath in os.listdir(MAILDIR): 
      with open(os.path.join(MAILDIR, mpath)) as mfile: 
       mail = email.message_from_file(mfile, Mail) 
      writer.writerow(mail.features(FEATURES)) 

if __name__ == '__main__': 
    main() 

在此先感謝!我對Python的

+1

要知道,錯誤信息也會告訴你錯誤所在。 (相反,你不要告訴我們錯誤在哪裏) – Sneftel

+1

@Ben作爲一個經驗法則,無論何時涉及到python 3,我總是檢查它們在哪裏打印! :P –

回答

4

還挺新的第16行:

... 
except IOError: 
    print 'GeoIP.dat not found' # Print is a function 

這應該是print(....)