2011-06-20 49 views
0
import httplib 
import urlparse 

def getUrl(url): 
    try: 
    parts = urlparse.urlsplit(url) 
    server = parts[1] 
    path = parts[2] 
    obj = httplib.HTTPConnection(server,80) 
    obj.connect() 
    obj.putrequest('HEAD',path) 
    obj.putheader('Accept','*/*') 
    obj.endheaders() 
    response = obj.getresponse() 
    contentType = response.getheader("content-type", "unknown") 
    obj.close() 
    if response.status !=200: 
     print 'Error' 
    else: 
     print 'Awesome' 
    except Exception, e: 
    print e 

我寫了上面的代碼來檢查給定的URL是否有效。但不知何故,當我測試它時,對於每個無效的url,它都會引發異常。無效網址拋出異常 - python

>>> getUrl('http://www.idlebrfffain.com') 
[Errno -2] Name or service not known 

Python版本:

[email protected]:~$ python -V 
Python 2.6.4 

誰能幫我找出究竟是錯?

+3

由於無法解析域,所以會引發異常。爲什麼這是你的代碼的問題? –

+0

我確切的問題是我想我的代碼跳到if條件時,無法解析域。有沒有其他的方法可以完成這項工作,我的目標是驗證給定的url是否有效。 – LearnCode

+0

'if'與捕捉異常無關。再試一次。 –

回答

2

你必須趕上socket.error

import httplib, socket 
import urlparse 

def getUrl(url): 
    parts = urlparse.urlsplit(url) 
    server = parts[1] 
    path = parts[2] 
    obj = httplib.HTTPConnection(server,80) 

    try: 
     obj.connect() 
    except socket.gaierror: 
     print "Host %s does not exist" % server 
     return 
    except socket.error: 
     print "Cannot connect to %s:%s." % (server, 80) 
     return 

    obj.putrequest('HEAD',path) 
    obj.putheader('Accept','*/*') 
    obj.endheaders() 
    response = obj.getresponse() 
    contentType = response.getheader("content-type", "unknown") 
    obj.close() 
    if response.status !=200: 
     print 'Error' 
    else: 
     print 'Awesome' 


getUrl('http://www.idlebrfffain.com') # not a registered domain 
getUrl('http://8.8.8.8') # not a http server 

只有try: except:圍繞特定行且僅當你知道會發生什麼。 Python會向您顯示未捕獲異常的追溯,因此您可以輕鬆找出問題所在。

2

這應該發生。正在拋出異常,因爲無法解析URL。這是在您的if response.status != 200線之前拋出的,該線將控制轉向您的except塊。

您需要花一些時間研究Exceptions的工作方式。這裏有一個你可以嘗試的例子。

def getUrl(url): 
    status = None 
    try: 
     # do your normal stuff... 
     status = response.status 
    except Exception, e: 
     # do whatever you want here... 
     pass 
    finally: 
     if status != 200: 
      print "Error" 
     else: 
      print "Awesome" 
+1

我會這麼做'else',而不是'finally'。 –

1
#The following code validates a url. This is a 2 step process, to do that. First I validate the domain and next the path attached to the domain. 
from urlparse import urlparse 
import urllib2 
import socket 
class ValidateURL: 
    def __init__(self, url): 
     self._url = url 

    def startActivity(self): 
     self._parts = urlparse(self._url) 
     a = self._checkDomain(self._parts[1]) 
     if a: 
      b = self._checkUrl(self._url) 
      if b == 1: 
       print self._url,' is valid' 
      else: 
       print 'The path ',self._parts[2],' is not valid' 
     else: 
      print self._parts[1],' domain does not exist' 

    #Checks whether the domain is right or not 
    def _checkDomain(self,domain): 
     x = 1 
     try: 
      socket.gethostbyname_ex(domain) 
     except socket.gaierror: 
      x = 0 
     except socket.error: 
      x = 0 
     finally: 
      return x 

    #Checks whether the path is right or not 
    def _checkUrl(self,url): 
     x = 1 
     self._req = urllib2.Request(url) 
     try: 
      urllib2.urlopen(self._req) 
     except urllib2.URLError, e: 
      #print e 
      x = 0 
     finally: 
      return x 

if __name__ == "__main__": 
    valid = ValidateURL('http://stackoverflow.com/questions/invalid-urls-throw-an-exception-python') 
    valid.startActivity() 
    valid1 = ValidateURL('http://stackoverflow.com/questions/6414417/invalid-urls-throw-an-exception-python') 
    valid1.startActivity() 

希望我推導出的解決方案是明智的。