我試過了這兩種不同的方式,無法讓它們工作。我正試圖在「團隊統計」內抓取此網頁上的統計數據:http://www.cbssports.com/nfl/gametracker/boxscore/[email protected]/。我需要指定的統計類別前的數字:「NET YARDS RUSHING」。以下是我未嘗試過的嘗試。使用python刮掉表格中的數據2.7
第一種方式:
import pickle
import math
import os
import urllib2
from lxml import etree
from bs4 import BeautifulSoup
from urllib import urlopen
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Color, PatternFill, Font, Border
from openpyxl.styles import colors
from openpyxl.cell import Cell
Last Two Game info Home [H] or Away [A]
favLastGM = 'H' #Higher week number 2
favLastGM2 = 'A' #Lower week number 1
#Game Info (Favorite) Last Game Played - CBS Sports (Change Every Week)
favPrevGMInfoUrl = 'http://www.cbssports.com/nfl/gametracker/boxscore/[email protected]/'
response8 = urllib2.urlopen(favPrevGMInfoUrl)
htmlparser8 = etree.HTMLParser()
tree8 = etree.parse(response8,htmlparser8)
#FAVORITE
if favLastGM == 'A': #This Gives Opposite of Away Team Net Rushing Yards - SO HOME Net Rushing Yards
text = tree8.xpath('//td[contains(text(),"Net Yards Rushing")]/parent::td/following-sibling::td[1]/text()')
if text:
favDef_rushYards_L2_1 = int(text[0].strip())
print("test"),
print favDef_rushYards_L2_1
print ("Enter: Total Rushing Yards Allowed from Favored Team Defense for last game played: "),
print favDef_rushYards_L2_1
elif favLastGM == 'H': #This Gives Opposite of Home Team Net Rushing Yards - SO AWAY Net Rushing Yards
text = tree8.xpath('//td[contains(text(),"Net Yards Rushing")]/parent::td/following-sibling::td[0]/text()')
if text:
favDef_rushYards_L2_1 = int(text[0].strip())
print("test"),
print favDef_rushYards_L2_1
print ("Enter: Total Rushing Yards Allowed from Favored Team Defense for last game played: "),
print favDef_rushYards_L2_1
else:
print("***************************************************")
print("NOT A VALID ENTRY - favLastGM !")
print("***************************************************")
方式二:
import pickle
import math
import os
import urllib2
from lxml import etree
from bs4 import BeautifulSoup
from urllib import urlopen
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Color, PatternFill, Font, Border
from openpyxl.styles import colors
from openpyxl.cell import Cell
#Last Two Game info Home [H] or Away [A]
favLastGM = 'H' #Higher week number 2
favLastGM2 = 'A' #Lower week number 1
#Game Info (Favorite) Last Game Played - CBS Sports (Change Every Week)
favPrevGMInfoUrl = 'http://www.cbssports.com/nfl/gametracker/boxscore/[email protected]/'
favPrevGMhtml2 = urlopen(favPrevGMInfoUrl).read()
favPrevGMsoup2 = BeautifulSoup(favPrevGMhtml2)
favPrevGM2Reg = favPrevGMsoup2.find("table", { "class" : "team-stats" })
favPrevGM2Reg2 = []
if favLastGM == 'A': #This Gives Opposite of Away Team Net Rushing Yards - SO HOME Net Rushing Yards
rush = 'Net Yards Rushing'
for row in favPrevGM2Reg.findAll("tr"):
if rush in row.findNext('td'): #Change Year for every new season
for item in row.findAll("td"):
favPrevGM2Reg.append(item.text)
favDef_rushYards_L2_1 = float(favPrevGM2Reg[1])
print ("Enter: Total Rushing Yards Allowed from Favored Team Defense for last game played: "),
print favDef_rushYards_L2_1
elif favLastGM == 'H': #This Gives Opposite of Home Team Net Rushing Yards - SO AWAY Net Rushing Yards
rush = 'Net Yards Rushing'
for row in favPrevGM2Reg.findAll("tr"):
if rush in row.findNext('td'): #Change Year for every new season
for item in row.findAll("td"):
favPrevGM2Reg.append(item.text)
favDef_rushYards_L2_1 = float(favPrevGM2Reg[0])
print ("Enter: Total Rushing Yards Allowed from Favored Team Defense for last game played: "),
print favDef_rushYards_L2_1
else:
print("***************************************************")
print("NOT A VALID ENTRY - favLastGM !")
print("***************************************************")