-2
該代碼從文件中讀取urls,並將其推送到分配給線程的隊列中,並執行第三方web api調用以獲得轉到全局列表的結果。 當我執行這個程序的某個時候,它會進行到最後並結束處理(打印完成),有時它會卡住並且永遠不會結束處理。爲什麼它不會結束有時
看起來像是有一個異常(「我們未能到達服務器」),它擁有該進程並永遠不會結束。我相信這是線程問題。
任何機構都可以弄清楚什麼是問題。謝謝你在前進
下面是代碼
import threading
import Queue
import hmac
import hashlib
import base64
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import sys
import httplib, urllib, time, random, os
import json
from urlparse import urlparse
import time
#Number of threads
n_thread = 50
#Create queue
queue = Queue.Queue()
domainBlacklistDomain=[]
urlList=[]
def checkBlackList(domain,line):
testUrl = 'https://test.net'
apiToken = 'aaaaa'
secretKey = 'bbbb'
signature_data = 'GET\n/v1/blacklist/lookup\nurl='+domain+'\n\n\n'
digest = hmac.new(secretKey, signature_data, hashlib.sha1).digest()
digest_base64 = base64.encodestring(digest)
req = urllib2.Request('https://test.net/v1/blacklist/lookup?url='+domain)
req.add_header('Authorization', 'Test' + apiToken + ':' + digest_base64)
req.add_header('Connection', 'Keep-Alive')
try:
page = urlopen(req)
length = str(page.info())
if length.find("Content-Length: 0") != -1:
url=str(line.strip())
urlList.append(url)
else:
json_data=json.load(page)
domainBlacklistDomain.append(json_data['url'])
if int(json_data['score']) >10:
print json_data['url']
except HTTPError, e:
print 'The server couldn\'t fulfill the request.'
except URLError, e:
print 'We failed to reach a server.'
class ThreadClass(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
#Assign thread working with queue
self.queue = queue
def run(self):
while True:
#Get from queue job
host = self.queue.get()
parsed_uri = urlparse(host)
domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
if "\n" in domain:
domain=domain.replace('\n', '').replace('\r', '')
if domain not in domainBlacklistDomain:
checkBlackList(domain,host):
else:
if domain not in domainBlacklistDomain:
checkBlackList(domain,host):
#signals to queue job is done
self.queue.task_done()
#Create number process
for i in range(n_thread):
t = ThreadClass(queue)
t.setDaemon(True)
#Start thread
t.start()
#Read file line by line
hostfile = open("result_url.txt","r")
for line in hostfile:
#Put line to queue
queue.put(line)
#wait on the queue until everything has been processed
queue.join()
fo=open("final_result.txt","w+b")
for item in urlList:
fo.write("%s\n" %item)
print "done??"