2017-08-15 164 views
0

有人請向我解釋爲什麼我運行此代碼時得到空白返回?我只是試圖用美麗的湯打印html標籤的內容。代碼如下。Web刮 - 空白返回

感謝

import urllib3 
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 
from bs4 import BeautifulSoup 
http = urllib3.PoolManager() 

def stats(): 
url = 'https://www.flashscore.com.au/football/usa/mls/results/' 
response = http.request('GET', url) 
soup = BeautifulSoup(response.data,'lxml') 
right_table=soup.find('div',{'class':'fs-table tournament-page'}) 
print(right_table.text) 

stats()  
+0

的問題是,該網頁被客戶端渲染,所以div最初沒有內容。需要不同的技術:https://stackoverflow.com/questions/2148493/scrape-html-generated-by-javascript-with-python –

+0

頁面使用JavaScript和WebSocket來加載和獲取並呈現數據。有關如何呈現它,請參閱我對https://stackoverflow.com/questions/45259232/scraping-google-finance-beautifulsoup/的回答。 –

+0

@ Error-SyntacticalRemorse完成,謝謝你的提示 –

回答

0

您可以獲取並使用PyQT5處理多個網址,爲您在您的評論問這樣的:

from PyQt5.QtGui import * 
from PyQt5.QtCore import * 
from PyQt5.QtWebKit import * 
from PyQt5.QtWebKitWidgets import QWebPage 
from PyQt5.QtWidgets import QApplication 
import bs4 as bs 
import sys 


class Render(QWebPage): 
    def __init__(self): 
     super(Render, self).__init__() 
     self.mainFrame().loadFinished.connect(self.handleLoadFinished) 

    def start(self, urls): 
     self._urls = iter(urls) 
     self.fetchNext() 

    def fetchNext(self): 
     try: 
      url = next(self._urls) 
     except StopIteration: 
      return False 
     else: 
      self.mainFrame().load(QUrl(url)) 
     return True 

    def processCurrentPage(self): 
     print (self.mainFrame().url().toString()) 
     result = self.mainFrame().toHtml() 
     soup = bs.BeautifulSoup(result, 'lxml') 
     right_table = soup.find('div', {'class': 'fs-table tournament-page'}) 
     print(right_table.text) 

    def handleLoadFinished(self): 
     self.processCurrentPage() 
     if not self.fetchNext(): 
      app.quit() 


if __name__ == '__main__': 
    urls = ["https://www.flashscore.com.au/football/usa/mls/results/", "https://www.flashscore.com.au/football/usa/mls/fixtures/"] 
    app = QApplication(sys.argv) 
    renderer = Render() 
    renderer.start(urls) 
    sys.exit(app.exec_())