2013-08-27 188 views
0

我用這個腳本Python的HTTP請求

from twisted.internet import reactor, threads 
from urlparse import urlparse 
import httplib 
import itertools 


concurrent = 200 
finished=itertools.count(1) 
reactor.suggestThreadPoolSize(concurrent) 

def getStatus(ourl): 
    url = urlparse(ourl) 
    conn = httplib.HTTPConnection(url.netloc) 
    conn.request("HEAD", url.path) 
    res = conn.getresponse() 
    return res.status 

def processResponse(response,url): 
    print response, url 
    processedOne() 

def processError(error,url): 
    print "error", url#, error 
    processedOne() 

def processedOne(): 
    if finished.next()==added: 
     reactor.stop() 

def addTask(url): 
    req = threads.deferToThread(getStatus, url) 
    req.addCallback(processResponse, url) 
    req.addErrback(processError, url) 

added=0 
for url in open('urllist.txt'): 
    added+=1 
    addTask(url.strip()) 

try: 
    reactor.run() 
except KeyboardInterrupt: 
    reactor.stop() 

當我嘗試運行腳本$蟒蛇test.py

它只是打印網址不做捲曲或發送HTTP請求..

我怎麼能發送的每一個

感謝

+0

哪裏都是你的函數添加到'反應堆'? – dg123

+0

@ user1436026你能解釋更多請 – SimpleojbC

+1

爲什麼你在這裏使用'httplib'而不是Twisted的HTTP代碼?或者,如果你想爲每個連接使用'httplib'和一個線程,你爲什麼要使用Twisted並啓動一個反應堆? – abarnert

回答

0

這應該工作,如果,如果你的URL的格式不包含的「http:// 「不過, 如果他們確實包含的‘http://’有在評論

import httplib 

def requester(url): 
    host = url.split('/')[0] 
    #if urls do contain 'http://' --> host = url.split('/')[2].replace('http://','') 
    req = url[url.find(host)+len(host):] 
    conn = httplib.HTTPConnection(host) 
    conn.request("HEAD","/"+req) 
    response = conn.getresponse() 
    print response.status, response.reason 

    #if you want data... 
    #data = response.read() 
    #print data 

for url in open(urls.txt): 
    try: 
     requester(url) 
    except Error,e: 
     print Error, e 

而且應該是一個解決方案,我建議檢查出httplib

+0

這就是我所尋找的,非常感謝 – SimpleojbC

0

測試代碼HTTP或捲曲的過程中,我們inlineCallbacksdeferToThread。還使用defer.gatherResults知道當所有的deferreds已被處理(而不是在OP櫃檯方式):

from twisted.internet import reactor, defer, utils 
from twisted.internet.threads import deferToThread 
from urlparse import urlparse 
import httplib 

threadDeferred = deferToThread.__get__ 

@threadDeferred 
def get_url_head(url_arg): 
    url = urlparse(url_arg) 
    conn = httplib.HTTPConnection(url.netloc) 
    conn.request("HEAD", url.path) 
    res = conn.getresponse() 
    conn.close() 
    return res.status 

@defer.inlineCallbacks 
def check_url(sem,url_arg): 
    yield sem.acquire() 
    try: 
    result = yield get_url_head(url_arg) 
    defer.returnValue(result) 
    finally: 
    sem.release() 

@defer.inlineCallbacks 
def run(reactor,SEMAPHORE_SIZE=10): 
    sem = defer.DeferredSemaphore(SEMAPHORE_SIZE) 
    deferreds = [] 
    failed_urls = [] 
    responded_urls = [] 
    with open('urllist.txt','r') as f: 
    for line in f: 
     url_arg = line.strip() 
     d = check_url(sem,url_arg) 
     d.addCallback(processResult,url_arg,responded_urls).addErrback(processErr,url_arg,failed_urls) 
     deferreds.append(d) 
    res = yield defer.gatherResults(deferreds) 
    # Do something else with failed_urls and responded_urls 
    reactor.callLater(0,reactor.stop) 

def main(): 
    from twisted.internet import reactor 
    reactor.callWhenRunning(run,reactor) 
    reactor.run() 

def processResult(result,url_arg,responded_urls): 
    print "Reponse %s from %s" % (result,url_arg) 
    responded_urls.append((url_arg,result)) 

def processErr(err,url_arg,failed_urls): 
    print "Error checking %s: %s" % (url_arg,repr(err.value)) 
    failed_urls.append((url_arg,err.value)) 

if __name__ == '__main__': 
    main() 
+0

我用它,迅速結果非常感謝 – SimpleojbC