錯誤在Python刮時，需要繞行

import requests 
from bs4 import BeautifulSoup 
import csv 
from urlparse import urljoin 
import urllib2 

outfile = open("./battingall.csv", "wb") 
writer = csv.writer(outfile) 
base_url = 'http://www.baseball-reference.com' 
player_url = 'http://www.baseball-reference.com/players/' 
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] 
players = 'shtml' 
gamel = '&t=b&year=' 
game_logs = 'http://www.baseball-reference.com/players/gl.cgi?id=' 
years = ['2015','2014','2013','2012','2011','2010','2009','2008'] 

drounders = [] 
for dround in alphabet: 
    drounders.append(player_url + dround) 

urlz = [] 
for ab in drounders: 
    data = requests.get(ab) 
    soup = BeautifulSoup(data.content) 
    for link in soup.find_all('a'): 
     if link.has_attr('href'): 
      urlz.append(base_url + link['href']) 

yent = [] 
for ant in urlz: 
    for d in drounders: 
     for y in years: 
      if players in ant: 
       if len(ant) < 60: 
        if d in ant: 
         yent.append(game_logs + ant[44:-6] + gamel + y) 

for j in yent: 
    try: 
     data = requests.get(j) 
     soup = BeautifulSoup(data.content) 
     table = soup.find('table', attrs={'id': 'batting_gamelogs'}) 
     tablea = j[52:59] 
     tableb= soup.find("b", text='Throws:').next_sibling.strip() 
     tablec= soup.find("b", text='Height:').next_sibling.strip() 
     tabled= soup.find("b", text='Weight:').next_sibling.strip() 
     list_of_rows = [] 
     for row in table.findAll('tr'): 
      list_of_cells = [] 
      list_of_cells.append(tablea) 
      list_of_cells.append(j[len(j)-4:]) 
      list_of_cells.append(tableb) 
      list_of_cells.append(tablec) 
      list_of_cells.append(tabled) 
      for cell in row.findAll('td'): 
       text = cell.text.replace('&nbsp;', '').encode("utf-8") 
       list_of_cells.append(text) 
      list_of_rows.append(list_of_cells) 
     print list_of_rows 
     writer.writerows(list_of_rows) 
    except (AttributeError,NameError): 
     pass

當我運行這段代碼來獲得gamelog擊球數據我不斷收到一個錯誤：錯誤在Python刮時，需要繞行

Traceback (most recent call last): 
    File "battinggamelogs.py", line 44, in <module> 
    data = requests.get(j) 
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-  packages/requests/api.py", line 65, in get 
    return request('get', url, **kwargs) 
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site- packages/requests/api.py", line 49, in request 
    response = session.request(method=method, url=url, **kwargs) 
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/sessions.py", line 461, in request 
    resp = self.send(prep, **send_kwargs) 
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/sessions.py", line 573, in send 
    r = adapter.send(request, **kwargs) 
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests/adapters.py", line 415, in send 
    raise ConnectionError(err, request=request) 
requests.exceptions.ConnectionError: ('Connection aborted.',  BadStatusLine("''",))

我需要一種方法來繞過這個錯誤繼續下去。我認爲錯誤出現的原因是因爲沒有表格來獲取數據。

來源

2015-06-30 William Bernard

它看起來像請求剛剛超時。嘗試導航到瀏覽器中的確切網址，看看會發生什麼。 – That1Guy

您可以將您的requests.get()塊封裝在try/except中。您需要捕獲正在生成的requests.exceptions.ConnectionError。

for ab in drounders: 
    try: 
     data = requests.get(ab) 
     soup = BeautifulSoup(data.content) 
     for link in soup.find_all('a'): 
      if link.has_attr('href'): 
       urlz.append(base_url + link['href']) 
    except requests.exceptions.ConnectionError: 
     pass

這是因爲連接本身存在問題，而不是因爲表中沒有數據。你甚至沒有那麼遠。

注意：這是通過簡單地使用pass（因爲你也在代碼塊中稍後做）完全吃掉例外。做這樣的事情可能會更好：

except requests.exceptions.ConnectionError: 
    print("Failed to open {}".format(ab))

這會在控制檯上爲您提供哪些URL失敗的消息。

來源

2015-06-30 19:08:34 Andy

回答

相關問題