2011-06-23 21 views
3

任何人都可以用我下面的Python代碼來幫助我嗎?這是一些開源,我稍微修改了自己的使用。我試圖從運行NTLM身份驗證的Windows服務器訪問網頁。起初我的問題是保持一個持久的連接,以便我不會得到HTTP 401錯誤。現在我已經過去了,但是會發生一個http 302重定向錯誤以及在響應的set-cookie中發回的cookie。因此我添加了一個cookie處理程序,但那沒有做任何事情。此外,從服務器返回的「位置」字段包含我首先提交的原始網址。我不明白這一點。爲什麼從服務器返回的重定向位置字段與我提交的URL完全相同?無法使用Python檢索NTLM認證網頁,因爲HTTP 302

import urllib2 
import httplib, socket 
import cookielib 
import ntlm 
from ntlm import ntlm 

class AbstractNtlmAuthHandler: 

    httplib.HTTPConnection.debuglevel = 1 
    url1 = "" 

    def __init__(self, password_mgr=None): 
     if password_mgr is None: 
     password_mgr = HTTPPasswordMgr() 
     self.passwd = password_mgr 
     self.add_password = self.passwd.add_password 

    def http_error_authentication_required(self, auth_header_field, req, fp, headers): 
     auth_header_value = headers.get(auth_header_field, None) 
     if auth_header_field: 
     if 'ntlm' in auth_header_value.lower(): 
     if auth_header_value is not None and 'ntlm' in auth_header_value.lower(): 
      fp.close() 
      return self.retry_using_http_NTLM_auth(req, auth_header_field, None, headers) 

    def retry_using_http_NTLM_auth(self, req, auth_header_field, realm, headers): 

     print req.get_full_url() 
     print "\n\n" 

     #user, pw = self.passwd.find_user_password(realm, req.get_full_url()) 
     user, pw = self.passwd.find_user_password(realm, url1) 
     if pw is not None: 
    # ntlm secures a socket, so we must use the same socket for the complete handshake 
    headers = dict(req.headers) 
    headers.update(req.unredirected_hdrs) 
    auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(user) 

    if req.headers.get(self.auth_header, None) == auth: 
     return None 
    headers[self.auth_header] = auth 

    host = req.get_host() 
    if not host: 
     raise URLError('no host given') 
    h = None 
    if req.get_full_url().startswith('https://'): 
     h = httplib.HTTPSConnection(host) # will parse host:port 
    else: 
     h = httplib.HTTPConnection(host) # will parse host:port 
     # we must keep the connection because NTLM authenticates the connection, not single requests 
     headers["Connection"] = "Keep-Alive" 
     headers = dict((name.title(), val) for name, val in headers.items()) 
     h.request(req.get_method(), req.get_selector(), req.data, headers) 
     r = h.getresponse() 
     r.begin() 
     r._safe_read(int(r.getheader('content-length'))) 
     if r.getheader('set-cookie'): 
      # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out) 
      headers['Cookie'] = r.getheader('set-cookie') 
     r.fp = None # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open) 
     auth_header_value = r.getheader(auth_header_field, None) 
     (ServerChallenge, NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE (auth_header_value[5:]) 
     user_parts = user.split('\\', 1) 
     DomainName = user_parts[0].upper() 
     UserName = user_parts[1] 
     auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, UserName, DomainName, pw, NegotiateFlags) 
     headers[self.auth_header] = auth 
     headers["Connection"] = "Close" 
     headers = dict((name.title(), val) for name, val in headers.items()) 
     try: 
      h.request(req.get_method(), req.get_selector(), req.data, headers) 
      # none of the configured handlers are triggered, for example redirect-responses are not handled! 
      return h.getresponse() 
     except socket.error, err: 
      raise URLError(err) 
     else: 
     return None 


class HTTPNtlmAuthHandler(AbstractNtlmAuthHandler, urllib2.BaseHandler): 

    auth_header = 'Authorization' 

    def http_error_401(self, req, fp, code, msg, headers): 
     return self.http_error_authentication_required('www-authenticate', req, fp, headers) 


class ProxyNtlmAuthHandler(AbstractNtlmAuthHandler, urllib2.BaseHandler): 
    auth_header = 'Proxy-authorization' 
    def http_error_407(self, req, fp, code, msg, headers): 
     return self.http_error_authentication_required('proxy-authenticate', req, fp, headers) 


if __name__ == "__main__": 
    url = 'HTTP WEB ADDRESS HERE' 
    url1 = url 

    user = 'USERNAME' 
    password = 'PASSWORD' 
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' 
    data = "" 
    headers = { 'User-Agent' : user_agent } 


    passman = urllib2.HTTPPasswordMgrWithDefaultRealm() 
    passman.add_password(None, url, user, password) 


    cookie_jar = cookielib.CookieJar() 
    cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) 

    redirect = urllib2.HTTPRedirectHandler() 
    auth_basic = urllib2.HTTPBasicAuthHandler(passman) 
    auth_digest = urllib2.HTTPDigestAuthHandler(passman) 
    auth_NTLM = HTTPNtlmAuthHandler(passman) 

    opener = urllib2.build_opener(cookie_handler, auth_NTLM, auth_basic, auth_digest, redirect) 
    urllib2.install_opener(opener) 

    req = urllib2.Request(url, data, headers) 
    response = urllib2.urlopen(req) 
+1

要回答你的問題的最後一部分,另一個請求:它要求你重新提交該頁面你的要求,但這次*與* cookie數據(因此它知道你已經通過了身份驗證,並且可以爲你提供有權查看的內容)。 – ewall

+0

請原諒我的無知,但不應該讓python的cookie處理程序爲我照顧這件事?我假設我必須手動重新提交呢? : -/ – Jay

+0

我看到代碼在服務器的響應中收到它時保存了cookie,這很好。我認爲下一步是按照服務器建議(301代碼)重新加載頁面 - 但是這次您存儲的cookie包含在請求中。 – ewall

回答

4

一旦您從服務器獲得響應,請檢查它是否爲302狀態。如果是302點的狀態得到coookies並與cookie信息

if(response.code==302): 
     header={'Cookie':response.headers['Set-Cookie']} 
     req=urllib2.Request(thesameurl,None,header) 
     response=urllib2.urlopen(req) 
response.read()