2
我現在正在嘗試編寫一些代碼來從某些Java呈現的頁面中抓取網頁內容。我通過使用PyQt5從網上找到了一些例子。但是,當我用PyQt5.5.7安裝我的Python 3.5但未能導入其模塊(ImportError:無法導入名稱'QWebPage')時。我附上以下代碼以供參考。非常感謝,如果任何人都可以建議該怎麼做來解決這個問題,或者任何其他方式來取消Java渲染的網頁內容。無法從Python 3.5中的PyQt5導入QWebPage
# standard imports
import sys
# third-party imports
import requests
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display
from PyQt5.QtWebEngineWidgets import QWebPage
from PyQt5.QtWidgets import QApplication
class Render(QWebPage):
"""Render HTML with PyQt5 WebKit."""
def __init__(self, html):
self.html = None
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setHtml(html)
self.app.exec_()
def _loadFinished(self, result):
self.html = self.mainFrame().toHtml()
self.app.quit()
url = 'https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/'
# get the raw HTML
source_html = requests.get(url).text
# return the JavaScript rendered HTML
with Display(visible=0, size=(800, 600)):
rendered_html = Render(source_html).html
# get the BeautifulSoup
soup = BeautifulSoup(rendered_html, 'html.parser')
print('title is %r' % soup.select_one('title').text)