我試圖解開很多我在urlSet中使用的URL。以下代碼大部分時間都適用。但有時需要很長時間才能完成。例如,我在urlSet中有2950。 stderr告訴我2900完成了,但getUrlMapping沒有完成。python grequests需要很長時間才能完成
def getUrlMapping(urlSet):
# get the url mapping
urlMapping = {}
#rs = (grequests.get(u) for u in urlSet)
rs = (grequests.head(u) for u in urlSet)
res = grequests.imap(rs, size = 100)
counter = 0
for x in res:
counter += 1
if counter % 50 == 0:
sys.stderr.write('Doing %d url_mapping length %d \n' %(counter, len(urlMapping)))
urlMapping[ getOriginalUrl(x) ] = getGoalUrl(x)
return urlMapping
def getGoalUrl(resp):
url=''
try:
url = resp.url
except:
url = 'NULL'
return url
def getOriginalUrl(resp):
url=''
try:
url = resp.history[0].url
except IndexError:
url = resp.url
except:
url = 'NULL'
return url