您可能需要設定一個URL開啓
def createOpener(self):
handlers = []
cj = MyCookieJar();
cj.set_policy(cookielib.DefaultCookiePolicy(rfc2965=True))
cjhdr = urllib2.HTTPCookieProcessor(cj)
handlers.append(cjhdr)
opener = urllib2.build_opener(*handlers)
opener.addheaders = [('User-Agent', self.getUserAgent()),
('Host', 'google.com')]
return opener
在餅乾罐是
class MyCookieJar(cookielib.CookieJar):
def _cookie_from_cookie_tuple(self, tup, request):
name, value, standard, rest = tup
version = standard.get('version', None)
if version is not None:
version = version.replace('"', '')
standard["version"] = version
return cookielib.CookieJar._cookie_from_cookie_tuple(self, tup, request)
此時你創建的揭幕戰和獲取數據讀取URL處理程序,如:
def fetchURL(self, url, data=None, headers={}):
request = urllib2.Request(url, data, headers)
self.opener = self.createOpener()
urlHandle = self.opener.open(request)
return urlHandle.read()
最好有一個User-Agent
列表並從中讀取:
with open(ffpath) as f:
USER_AGENTS_LIST = f.read().splitlines()
,並從它那裏得到
index = random.randint(0,len(USER_AGENTS_LIST)-1)
uA=USER_AGENTS_LIST[index]
隨機之一,有用戶代理的列表,看看here。
這只是有沒有任何外部框架做這個想法。
嘗試請求包http://docs.python-requests.org/en/master/user/quickstart/#cookies它比urllib更容易 –