2014-02-23 48 views
1

過去幾天我在終端上運行以下腳本。我沒有做任何改變。以前,它一直工作完美,現在我有錯誤:現在我有錯誤:Python正在工作 - 沒有改變 - 將不再工作。發生了什麼事?

Traceback (most recent call last): 
    File "GetAlexRanking.py", line 193, in <module> 
    print("%s:%d" % (p.__class__.__name__, p.get_rank(url 
TypeError: %d format: a number is required, not NoneType 

這裏是代碼的全面性。應該可以將它保存在.py文件中,使用命令python filename.py www.google.com從終端shell運行它以打印Google的一些流量統計信息。然而,這將不再適用於我。這裏有什麼問題 ?

import struct 
import sys 
import urllib 
import urllib2 
import httplib 
import re 
import xml.etree.ElementTree 

class RankProvider(object): 
    """Abstract class for obtaining the page rank (popularity) 
from a provider such as Google or Alexa. 

""" 
    def __init__(self, host, proxy=None, timeout=30): 
     """Keyword arguments: 
host -- toolbar host address 
proxy -- address of proxy server. Default: None 
timeout -- how long to wait for a response from the server. 
Default: 30 (seconds) 

""" 
     self._opener = urllib2.build_opener() 
     if proxy: 
      self._opener.add_handler(urllib2.ProxyHandler({"http": proxy})) 

     self._host = host 
     self._timeout = timeout 

    def get_rank(self, url): 
     """Get the page rank for the specified URL 

Keyword arguments: 
url -- get page rank for url 

""" 
     raise NotImplementedError("You must override get_rank()") 


class AlexaTrafficRank(RankProvider): 
    """ Get the Alexa Traffic Rank for a URL 

""" 
    def __init__(self, host="xml.alexa.com", proxy=None, timeout=30): 
     """Keyword arguments: 
host -- toolbar host address: Default: joolbarqueries.google.com 
proxy -- address of proxy server (if required). Default: None 
timeout -- how long to wait for a response from the server. 
Default: 30 (seconds) 

""" 
     super(AlexaTrafficRank, self).__init__(host, proxy, timeout) 

    def get_rank(self, url): 
     """Get the page rank for the specified URL 

Keyword arguments: 
url -- get page rank for url 

""" 
     query = "http://%s/data?%s" % (self._host, urllib.urlencode((
      ("cli", 10), 
      ("dat", "nsa"), 
      ("ver", "quirk-searchstatus"), 
      ("uid", "20120730094100"), 
      ("userip", "192.168.0.1"), 
      ("url", url)))) 

     response = self._opener.open(query, timeout=self._timeout) 
     if response.getcode() == httplib.OK: 
      data = response.read() 

      element = xml.etree.ElementTree.fromstring(data) 
      for e in element.iterfind("SD"): 
       popularity = e.find("POPULARITY") 
       if popularity is not None: 
        return int(popularity.get("TEXT")) 


class GooglePageRank(RankProvider): 
    """ Get the google page rank figure using the toolbar API. 
Credits to the author of the WWW::Google::PageRank CPAN package 
as I ported that code to Python. 

""" 
    def __init__(self, host="toolbarqueries.google.com", proxy=None, timeout=30): 
     """Keyword arguments: 
host -- toolbar host address: Default: toolbarqueries.google.com 
proxy -- address of proxy server (if required). Default: None 
timeout -- how long to wait for a response from the server. 
Default: 30 (seconds) 

""" 
     super(GooglePageRank, self).__init__(host, proxy, timeout) 
     self._opener.addheaders = [("User-agent", "Mozilla/4.0 (compatible; \ 
GoogleToolbar 2.0.111-big; Windows XP 5.1)")] 

    def get_rank(self, url): 
     # calculate the hash which is required as part of the get 
     # request sent to the toolbarqueries url. 
     ch = '6' + str(self._compute_ch_new("info:%s" % (url))) 

     query = "http://%s/tbr?%s" % (self._host, urllib.urlencode((
      ("client", "navclient-auto"), 
      ("ch", ch), 
      ("ie", "UTF-8"), 
      ("oe", "UTF-8"), 
      ("features", "Rank"), 
      ("q", "info:%s" % (url))))) 

     response = self._opener.open(query, timeout=self._timeout) 
     if response.getcode() == httplib.OK: 
      data = response.read() 
      match = re.match("Rank_\d+:\d+:(\d+)", data) 
      if match: 
       rank = match.group(1) 
       return int(rank) 

    @classmethod 
    def _compute_ch_new(cls, url): 
     ch = cls._compute_ch(url) 
     ch = ((ch % 0x0d) & 7) | ((ch/7) << 2); 

     return cls._compute_ch(struct.pack("<20L", *(cls._wsub(ch, i * 9) for i in range(20)))) 

    @classmethod 
    def _compute_ch(cls, url): 
     url = struct.unpack("%dB" % (len(url)), url) 
     a = 0x9e3779b9 
     b = 0x9e3779b9 
     c = 0xe6359a60 
     k = 0 

     length = len(url) 

     while length >= 12: 
      a = cls._wadd(a, url[k+0] | (url[k+1] << 8) | (url[k+2] << 16) | (url[k+3] << 24)); 
      b = cls._wadd(b, url[k+4] | (url[k+5] << 8) | (url[k+6] << 16) | (url[k+7] << 24)); 
      c = cls._wadd(c, url[k+8] | (url[k+9] << 8) | (url[k+10] << 16) | (url[k+11] << 24)); 

      a, b, c = cls._mix(a, b, c) 

      k += 12 
      length -= 12 

     c = cls._wadd(c, len(url)); 

     if length > 10: c = cls._wadd(c, url[k+10] << 24) 
     if length > 9: c = cls._wadd(c, url[k+9] << 16) 
     if length > 8: c = cls._wadd(c, url[k+8] << 8) 
     if length > 7: b = cls._wadd(b, url[k+7] << 24) 
     if length > 6: b = cls._wadd(b, url[k+6] << 16) 
     if length > 5: b = cls._wadd(b, url[k+5] << 8) 
     if length > 4: b = cls._wadd(b, url[k+4]) 
     if length > 3: a = cls._wadd(a, url[k+3] << 24) 
     if length > 2: a = cls._wadd(a, url[k+2] << 16) 
     if length > 1: a = cls._wadd(a, url[k+1] << 8) 
     if length > 0: a = cls._wadd(a, url[k]) 

     a, b, c = cls._mix(a, b, c); 

     # integer is always positive 
     return c 

    @classmethod 
    def _mix(cls, a, b, c): 
     a = cls._wsub(a, b); a = cls._wsub(a, c); a ^= c >> 13; 
     b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 8) % 4294967296; 
     c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >>13; 
     a = cls._wsub(a, b); a = cls._wsub(a, c); a ^= c >> 12; 
     b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 16) % 4294967296; 
     c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >> 5; 
     a = cls._wsub(a, b); a = cls._wsub(a, c); a ^= c >> 3; 
     b = cls._wsub(b, c); b = cls._wsub(b, a); b ^= (a << 10) % 4294967296; 
     c = cls._wsub(c, a); c = cls._wsub(c, b); c ^= b >> 15; 

     return a, b, c 

    @staticmethod 
    def _wadd(a, b): 
     return (a + b) % 4294967296 

    @staticmethod 
    def _wsub(a, b): 
     return (a - b) % 4294967296 


if __name__ == "__main__": 
    url = sys.argv[1] 
    providers = (AlexaTrafficRank(), GooglePageRank(),) 

    print("Traffic stats for: %s" % (url)) 
    for p in providers: 
     print("%s:%d" % (p.__class__.__name__, p.get_rank(url))) 

回答

2

在您的兩個get_rank實例方法,你也行:

if response.getcode() == httplib.OK: 

然後另一個功能特定if。如果同時評估True(加上,在AlexaTrafficRank,element.iterfind("SD")必須具有值),則該方法將只有return值;否則,他們將隱含return None,因此你看到的錯誤。

+0

@jonrharpe謝謝你。所以我可以簡單地通過刪除突出顯示的if語句來刪除問題,或者除此之外還有更多的問題嗎?另外,你認爲可能會導致它突然變藍?非常感謝你:) –

+0

@SimonKiely在使用在線資源時有很多可能會出錯(或只是改變)。我並不是說你應該刪除'if's,但是你需要在方法內部或者調用者內部處理它們不會成爲'True'的可能性。 – jonrsharpe