如何在所有線程完成後退出程序？

#!/usr/bin/env python 

import threading 
import urllib, sys,os 
import Queue 


concurrent = 200 
queue = Queue.Queue(concurrent*2) 

try: 
    aim = sys.argv[1].lower() 
    dic = open(sys.argv[2],'r') 

except: 
    print "Usage: %s url wordlist" % sys.argv[0] 
    sys.exit(1) 

class Scanner(threading.Thread): 
    def __init__(self,queue): 
     threading.Thread.__init__(self) 
     self.queue=queue 

    def run(self): 

     while True: 

      self.path = self.queue.get() 
      self.geturl = urllib.urlopen(aim+'/'+self.path) 
      self.status = self.geturl.getcode() 
      self.url = aim+self.path 
      self.result = self.url+'=>'+str(self.status) 
      print self.result 
      self.writeresult(self.result) 
      self.queue.task_done() 



    def writeresult(self,result): 

     fp = open('result.txt','a+') 
     fp.write(result+'\n') 
     fp.close() 


def main():   

    for i in range(concurrent): 
     t = Scanner(queue) 
     t.setDaemon(True) 
     t.start() 

    for path in dic.readlines(): 
     queue.put(path.strip()) 

    queue.join() 

if __name__ == '__main__': 
    main()

這是一個Python程序掃描網站的目錄，當掃描完成，它甚至不能與CTRL + C 我想知道當它完成掃描如何自動退出程序退出。如何在所有線程完成後退出程序？

，當它在工藝，它也會出現一些這樣的問題：

Exception in thread Thread-130: 
Traceback (most recent call last): 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 551, in __bootstrap_inner 
    self.run() 
    File "tt.py", line 28, in run 
    self.geturl = urllib.urlopen(aim+'/'+self.path) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 86, in urlopen 
    return opener.open(url) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 207, in open 
    return getattr(self, name)(url) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 344, in open_http 
    h.endheaders(data) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 954, in endheaders 
    self._send_output(message_body) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 814, in _send_output 
    self.send(msg) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 776, in send 
    self.connect() 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 757, in connect 
    self.timeout, self.source_address) 
    File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 553, in create_connection 
    for res in getaddrinfo(host, port, 0, SOCK_STREAM): 
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known

來源

2013-10-13 user2876146

由於您的Scanner.Run方法停留在無限循環（「while True：」），您爲什麼會期望您的程序退出？ – selbie

我想要一些練習，所以我嘗試了這一點，並改變了很多。它會給你帶來一整套結果嗎？你將需要用你原來的參數閱讀替換路徑。

有了這些線程，也許你會得到未處理的異常導致缺少結果？我添加了一種機制，在讀取過程中捕獲任何錯誤並將其傳遞給結果編寫者。
我想從多個線程附加到文件是好的，但我添加了一個作家線程更乾淨管理文件
大部分分配給自己的是不必要的
，如果你仍然得到插槽錯誤，請檢查在結果文件和路徑看你想怎麼處理這些結果，如果在所有
我不是專家，所以不要把這個作爲最佳實踐

import threading 
import urllib 
import Queue 

concurrent = 5 

aim = 'http://edition.cnn.com' 
paths = ['2013/10/12/opinion/kazin-tea-party/index.html?hpt=hp_t5', 
     '2013/10/11/opinion/opinion-hay-nobel-opcw/index.html?hpt=hp_t5', 
     '2013/10/11/opinion/rosin-women-in-charge/index.html?hpt=hp_t5', 
     'some invalid path', 
     '2013'] # also an invalid path 


def main(): 
    work_q = Queue.Queue() 
    result_q = Queue.Queue() 

    # start the scanners and the result writer 
    scanners = [Scanner(work_q, result_q) for i in range(concurrent)] 
    for s in scanners: 
     s.start() 
    results_file_path = 'results.txt' 
    result_writer = ResultWriter(result_q, 'results.txt') 
    result_writer.start() 
    # send all the work and wait for it to be completed 
    for path in paths: 
     work_q.put(path.strip()) 
    work_q.join() 
    # tell everyone to stop 
    # you could just kill the threads but you writer needs to close the file 
    for s in scanners: 
     work_q.put(Scanner.STOP_TOKEN) 
    result_q.put(ResultWriter.STOP_TOKEN) # make sure file gets closed 
    # wait for everyone to actually stop 
    for s in scanners: 
     s.join() 
    result_writer.join() 
    print 'the scan has finished and results are in {}'.format(results_file_path) 


class Scanner(threading.Thread): 
    STOP_TOKEN = '<<stop>>' 

    def __init__(self, work_q, result_q): 
     threading.Thread.__init__(self) 
     self.work_q = work_q 
     self.result_q = result_q 

    def run(self): 
     while True: 
      path = status = None # reset in case of error 
      try: 
       try: 
        path = self.work_q.get(timeout=0.00001) 
       except Queue.Empty: 
        continue 
       if path == self.STOP_TOKEN: 
        break # stop looking for work 
       get_url = urllib.urlopen(aim + '/' + path) 
       status = get_url.getcode() 
      except Exception as e: 
       status = 'unhandled error ({})'.format(e) 
      self.result_q.put((path, status)) 
      self.work_q.task_done() 


class ResultWriter(threading.Thread): 
    STOP_TOKEN = '<<stop>>' 

    def __init__(self, result_q, results_file_path): 
     threading.Thread.__init__(self) 
     self.result_q = result_q 
     self.results_file_path = results_file_path 

    def run(self): 
     with open(self.results_file_path, 'w') as results_file: 
      while True: 
       try: 
        result = self.result_q.get(timeout=0.00001) 
       except Queue.Empty: 
        continue 
       if result == self.STOP_TOKEN: 
        break # stop looking for results 
       path, status = result 
       results_file.write('{}=>{}\n'.format(path, status)) 


if __name__ == '__main__': 
    main()

來源

2013-10-13 17:36:53 KobeJohn

您是專家，謝謝 – user2876146

程序，因爲它是，當所有線程都完成，將關閉。但輕鬆擺脫所有這些錯誤，在你的函數運行，從類中，而真正的後：聖誕老人，放在一個嘗試，隨後的一切：除了：子句這樣

try: 
    code 
except: 
    pass

它不完全最乾淨的方式來做到這一點，但考慮到你以後的工作，它會完成這項工作，並且會讓你擺脫這些例外，這意味着某些URL已超時。

來源

2013-10-13 14:58:05

我已經騎上了這些例外，但是在我的path.txt中，有730個目錄可以掃描，但是在result.txt中只有660個，你能幫我解決嗎？並且我想打印「掃描已完成」之類的內容，並在掃描完成後退出程序。 – user2876146

如何在所有線程完成後退出程序？

回答

相關問題