我有一個腳本,它從文件中收集給定URL的response header
信息。我現在通過的Argument/Input File
來自外部。 Execution Method: python collect.py <Input.txt>
。Python:命令行參數問題
輸入文件:
1,http://www.example.com
2,http://www.blahblah.com
3,......
現在,我希望與URL傳遞ID作爲一個參數一樣,
python collect.py 1,http://www.example.com
和執行結果並將其寫入到outfile。
#!/usr/bin/python
import subprocess
import json
import sys
import httplib
import urlparse
import pickle
import sys
class HeaderFetcher:
def __init__(self,url,headers={'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}):
self.report = {}
self.initial_url = url
self.request_headers = headers
def fetch(self):
self.fetchheaders(self.initial_url,self.request_headers)
def fetchheaders(self,url,req_headers):
try:
u = urlparse.urlparse(url)
scheme = u.scheme
ServerConnection = None
con = None
if 'http' == scheme:
ServerConnection = httplib.HTTPConnection
elif 'https' == scheme:
ServerConnection = httplib.HTTPSConnection
if None != ServerConnection:
con = ServerConnection(u.hostname,u.port,timeout=10)
con.request("GET",url,None,req_headers)
res = con.getresponse()
#print res
self.report[url] = res.getheaders()
if 301 == res.status or 302 == res.status:
redirect_url = res.getheader('Location')
if not redirect_url in self.report:
if len(self.report.keys())<40:
self.fetchheaders(redirect_url,req_headers)
except:
test="Error In Loading"
#print test
def process(infile='Input.txt'):
#f = open('Input.txt','r')
f = open(sys.argv[1],"r")
agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
finalJson = {}
for line in f.readlines():
App=line.strip().split(',')
#print "Processing " + App[1]
App_URL=App[1]
ua=agents
fetcher = HeaderFetcher(App_URL,{'User-Agent':ua})
fetcher.fetch()
url=fetcher.report
keys = list(url)
headerJson = {}
for k in keys:
headers=url[k]
for header in headers:
headerJson[header[0]] = header[1]
finalJson[App[0]] = headerJson
header_final=json.dumps(finalJson,indent=4)
#server_details = json.loads(response.read(header_final))
#print header_final
#json_data=open(header_final)
#server_details = json.loads(header_final)
server_details=json.loads(unicode(header_final),"ISO-8859-1")
with open("Headers_Final_List.txt",'wb') as f :
for appid, headers in server_details.iteritems():
htypes = [h for h in headers if h in (
'x-powered-by','server','x-aspnet-version','x-aspnetmvc-version')]
headers='{},{}'.format(appid, ','.join(htypes))
f.write(headers+'\n')
f.close()
if __name__ == '__main__':
process(sys.argv[-1])
有關如何從命令行解析單個參數的任何建議。
你能解釋一下哪些不適用於你當前的代碼? – Stuart
在將參數傳遞給'open'之前,在參數上做一個'split'。 –