我有下面的預期輸出。嘗試讀取URL,能夠成功讀取, 然而,當我嘗試捕獲塊「Combo」下的數據時,上午遇到錯誤,有關如何解決此問題的任何輸入?閱讀URL後捕獲一塊數據
# Version YYYYMMDD
version = "20121112"
# File type to be output to logs
# Should be changed to exe before building the exe.
fileType = "py"
# Import sys to read command line arguments
import sys, getopt
#import pdb
#pdb.set_trace()
import argparse
import urllib
import urllib2
import getpass
import re
def update (url):
print url
authhost = 'https://login.company.com'
# Siteminder test server
user = getpass.getuser()
password = getpass.getpass()
realm = None
# handle the authentication and cookies
cookiehand = urllib2.HTTPCookieProcessor()
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(user=user,
passwd=password,
uri=authhost,
realm=realm)
auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
opener = urllib2.build_opener(auth_handler, cookiehand)
urllib2.install_opener(opener)
#make the request
req = urllib2.Request(url=url)
try:
f = urllib2.urlopen(req)
txt = f.read()
f.close()
except urllib2.HTTPError, e:
txt = ''
print 'An error occured connecting to the wiki. No wiki page will be generated.'
return '<font color=\"red\">QWiki</font>'
# Find the start tag of the textarea with Regular Expressions
print txt
p = re.compile('<Combo[^>]*>')
m = p.search(txt)
(tagStart, tagEnd) = m.span()
# Find the end of the textarea
endTag = txt.index("</textarea>")
def main():
#For logging
print "test"
parser = argparse.ArgumentParser(description='This is the update.py script created by test')
parser.add_argument('-u','--url',action='store',dest='url',default=None,help='<Required> url link',required=True)
results = parser.parse_args()# collect cmd line args
url = results.url
#print url
update(url)
if __name__ == '__main__':
main()
電流輸出: -
C:\Dropbox\scripts>python announce_update.py --u "http://qwiki.company.com/component/w/index.php?title=Test1&action=raw"
test
http://qwiki.company.com/component/w/index.php?title=Test1&action=raw
Password:
==== <font color="#008000">Combo</font> ====
{| border="1" cellspacing="1" cellpadding="1"
|-
! bgcolor="#67B0F9" scope="col" | test1
! bgcolor="#67B0F9" scope="col" | test2
! bgcolor="#67B0F9" scope="col" | test3
! bgcolor="#67B0F9" scope="col" | test4
|-
| [http:link.com]
|}
==== <font color="#008000">COde:</font> ====
Traceback (most recent call last):
File "announce_update.py", line 66, in <module>
main()
File "announce_update.py", line 64, in main
update(url)
File "announce_update.py", line 52, in update
(tagStart, tagEnd) = m.span()
AttributeError: 'NoneType' object has no attribute 'span'
預期輸出: -
{| border="1" cellspacing="1" cellpadding="1"
|-
! bgcolor="#67B0F9" scope="col" | test1
! bgcolor="#67B0F9" scope="col" | test2
! bgcolor="#67B0F9" scope="col" | test3
! bgcolor="#67B0F9" scope="col" | test4
|-
| [http:link.com]
|}
考慮'美麗的湯'或另一個HTML解析庫。正則表達式不適合這個任務。 – mpen 2013-03-02 07:50:41