1
下面的代碼工作正常我的機器上,但它是在該行發出一個錯誤出錯無效字面對於int()與底座10:「XBB」
soup = BeautifulSoup(html)
當它在另一臺機器上運行。它將解析一系列雅虎體育活躍NBA球員的名單,並將他們的名字和職位存儲到一個文本文件中。
from bs4 import BeautifulSoup
import urllib2
'''
scraping the labeled data from yahoo sports
'''
def scrape(filename):
base_url = "http://sports.yahoo.com/nba/players?type=position&c=NBA&pos="
positions = ['G', 'F', 'C']
players = 0
with open(filename, 'w') as names:
for p in positions:
html = urllib2.urlopen(base_url + p).read()
soup = BeautifulSoup(html) #throws the error!
table = soup.find_all('table')[9]
cells = table.find_all('td')
for i in xrange(4, len(cells) - 1, 3):
names.write(cells[i].find('a').string + '\t' + p + '\n')
players += 1
print "...success! %r players downloaded." % players
它拋出的錯誤是:
Traceback (most recent call last):
File "run_me.py", line 9, in <module>
scrapenames.scrape('namelist.txt')
File "/Users/brapse/Downloads/bball/scrapenames.py", line 15, in scrape
soup = BeautifulSoup(html)
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/site-packages/bs4/__init__.py", line 100, in __init__
self._feed()
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/site-packages/bs4/__init__.py", line 113, in _feed
self.builder.feed(self.markup)
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/site-packages/bs4/builder/_htmlparser.py", line 46, in feed
super(HTMLParserTreeBuilder, self).feed(markup)
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/HTMLParser.py", line 108, in feed
self.goahead(0)
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/HTMLParser.py", line 171, in goahead
self.handle_charref(name)
File "/usr/local/Cellar/python/2.6.5/lib/python2.6/site-packages/bs4/builder/_htmlparser.py", line 58, in handle_charref
self.handle_data(unichr(int(name)))
ValueError: invalid literal for int() with base 10: 'xBB'