2016-06-10 75 views
6

如何用PyQt5 v5.6 QWebEngineView「呈現」HTML?如何用PyQt5的QWebEngineView「呈現」HTML

我以前使用PyQt5 v5.4.1 QWebPage執行任務,但嘗試更新的QWebEngineView是suggested

下面是實現(通常按預期工作,但無限期地掛起一些網站和情況的傾向):

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebKitWidgets import QWebPage 

    class Render(QWebPage): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebPage.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.mainFrame().setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      self.html = self.mainFrame().toHtml() 
      self.app.quit() 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 

以下是我在使用QWebEngineView嘗試。首先,安裝和設置PyQt5 v5.6在Ubuntu:

# install PyQt5 v5.6 wheel from PyPI 
pip3 install --user pyqt5 

# link missing resources 
ln -s ../resources/icudtl.dat ../resources/qtwebengine_resources.pak ../resources/qtwebengine_resources_100p.pak ../resources/qtwebengine_resources_200p.pak ../translations/qtwebengine_locales ~/.local/lib/python3.5/site-packages/PyQt5/Qt/libexec/ 

現在的Python的...在分段錯誤的結果如下:

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      # what's going on here? how can I get the HTML from toHtml? 
      self.page().toHtml(self.callable) 
      self.app.quit() 

     def callable(self, data): 
      self.html = data 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 

麻煩似乎在於調用異步toHtml()。它看起來應該相當簡單,但我對如何處理它感到不知所措。我發現在C++環境下它已經是discussed,但我不確定如何將它轉換爲Python。我怎樣才能得到HTML?

回答

7

相當多的關於該主題的討論是在下面的線程發出:https://riverbankcomputing.com/pipermail/pyqt/2015-January/035324.html

新QWebEngine接口採用的事實 底層鉻引擎是異步的帳號。因此,我們必須將異步API轉換爲同步API。

這裏是如何看起來:

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtCore import QEventLoop 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      while self.html is None: 
       self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents) 
      self.app.quit() 

     def _callable(self, data): 
      self.html = data 

     def _loadFinished(self, result): 
      self.page().toHtml(self._callable) 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 
4

正如你指出的那樣,Qt5.4依賴於異步調用。沒有必要使用Loop(如你的答案所示),因爲你唯一的錯誤是在toHtml調用完成之前調用quit

def render(source_html): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, html): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.setHtml(html) 
      self.app.exec_() 

     def _loadFinished(self, result): 
      # This is an async call, you need to wait for this 
      # to be called before closing the app 
      self.page().toHtml(self.callable) 

     def callable(self, data): 
      self.html = data 
      # Data has been stored, it's safe to quit the app 
      self.app.quit() 

    return Render(source_html).html 

import requests 
sample_html = requests.get(dummy_url).text 
print(render(sample_html)) 
1

由六個& Veehmot答案是偉大的,但我發現我的目的是不充分的,因爲它沒有擴大,我想刮的頁面的下拉元素。 稍作修改修復此問題:

def render(url): 
    """Fully render HTML, JavaScript and all.""" 

    import sys 
    from PyQt5.QtCore import QEventLoop,QUrl 
    from PyQt5.QtWidgets import QApplication 
    from PyQt5.QtWebEngineWidgets import QWebEngineView 

    class Render(QWebEngineView): 
     def __init__(self, url): 
      self.html = None 
      self.app = QApplication(sys.argv) 
      QWebEngineView.__init__(self) 
      self.loadFinished.connect(self._loadFinished) 
      self.load(QUrl(url)) 
      while self.html is None: 
       self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents) 
      self.app.quit() 

     def _callable(self, data): 
      self.html = data 

     def _loadFinished(self, result): 
      self.page().toHtml(self._callable) 

    return Render(url).html 


print(render(dummy_url))