2013-04-13 33 views
0

我想從網站下載動態生成的圖像。該網站有JavaScript代碼,並點擊按鈕轉到上一張圖片和下一張圖片。我檢查了Chrome中的http請求和響應。除了圖像名稱之外,請求幾乎相同(它的數字增加如:000001.jpg,000002.jpg)。現在我可以訪問第一張圖片,並通過使用自定義的QNetworkAccessManager繼承QWebView來將其保存到磁盤。我重載的createRequest功能:如何通過子類化QNetworkAccessManager使用createRequest創建請求?

import sys,urllib,time,os 
from PyQt4.QtCore import * 
from PyQt4.QtGui import * 
from PyQt4.QtWebKit import * 
from PyQt4.QtNetwork import * 
from PIL import Image 

class NetworkAccessManager(QNetworkAccessManager): 
    def __init__(self,old_manager): 
    QNetworkAccessManager.__init__(self) 
    self.old_manager = old_manager 
    self.setCache(old_manager.cache()) 
    self.setCookieJar(old_manager.cookieJar()) 
    self.setProxy(old_manager.proxy()) 
    self.setProxyFactory(old_manager.proxyFactory()) 
    self.imreply=None 
    self.reqstr=None 
    self.otherreply=None 
    self.current_req=None 
    self.cnt=0 
    self.jpgName="test.jpg" 
    self.first=True 
    self.ba=QByteArray() 
    self.ba.clear() 

    def createRequest(self, operation, request, data): 
     req = request.url().toString() 
     if req.contains(QString("zoom=")) and req.contains(QString("ss2jpg")) and not req.contains(QString("pi=2")): 
      strreq=str(req) 
      l=strreq.find("jid=") 
      r=strreq.find(".jpg&a") 
      self.jpgName=strreq[l+5:r+4] 
      self.jpgcnt=int(strreq[l+5:r]) 
      print self.jpgName,self.jpgcnt 
      self.imreply=QNetworkAccessManager.createRequest(self,operation, request, data) 
      self.connect(self.imreply,SIGNAL("readyRead()"),self.saveImage) 
      return self.imreply 
     elif req.contains(QString("uf=ssr")): 
      strreq=str(req) 
      self.reqstr=strreq 
      self.current_req=request 
      r=strreq.find("?") 
      self.jpgcnt=int(strreq[r-6:r]) 
      self.otherreply=QNetworkAccessManager.createRequest(self,operation, request, data) 
      return self.otherreply 
     else: 
      return QNetworkAccessManager.createRequest(self,operation, request, data) 

    def saveImage(self): 
     if self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/jpeg")) or self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/png")): 
      contentLen,flag = QString(self.imreply.rawHeader("Content-Length")).toInt() 
      self.ba=self.ba.append(self.imreply.readAll()) 
      if self.ba.size() == contentLen: 
      #self.ba=self.imreply.readAll() 
      im=QImage.fromData(self.ba) 
      im.save(self.jpgName) 
      im=Image.open(self.jpgName) 
      print "saving image",contentLen,self.jpgName 
      im.save(self.jpgName) 
      self.ba.clear() 
      self.emit(SIGNAL("nextPage()")) 

class dxWebView(QWebView): 
    def __init__(self): 
     QWebView.__init__(self) 

    def clickNext(self): 
     manager=self.page().networkAccessManager() 
     if manager.cnt<50: 
      nextreq=manager.current_req 
      nexturl=manager.reqstr.replace(str(manager.jpgcnt),str(manager.jpgcnt+1)) 
      print "next url",nexturl 
      nextreq.setUrl(QUrl(nexturl)) 
      manager.get(QNetworkRequest(nextreq)) 
      manager.cnt=manager.cnt+1 

def main(): 
    app=QApplication(sys.argv) 
    QWebSettings.globalSettings().setAttribute(QWebSettings.PluginsEnabled, True); 
    view=dxWebView() 
    old_manager=view.page().networkAccessManager() 
    new_manager=NetworkAccessManager(old_manager) 
    view.page().setNetworkAccessManager(new_manager) 
    QObject.connect(new_manager,SIGNAL("nextPage()"),view.clickNext) 
    url="http://www.yishuleia.cn/DrsPath.do?kid=686A67696A6F6A673134343438303337&username=gdnz2&spagenum=201&pages=50&fid=14813857&a=3fc3e380601ced0f08749c964294120e&btime=2013-04-03&etime=2013-04-23&template=bookdsr1&firstdrs=http%3A%2F%2Fbook.duxiu.com%2FbookDetail.jsp%3FdxNumber%3D000008299393%26d%3D592DC22226A893A958A6578E7D039A43" 
    view.load(QUrl(url)) 
    view.show() 
    sys.exit(app.exec_()) 

if __name__=='__main__': 
    main() 

當保存第一個圖像,該clickNext被觸發,qnetworkaccessmanager發送下一個請求予以幫助我找到了manager.get(nextreq)沒有work.The HTTP分析儀沒有篩選任何http請求和響應。我在clickNext函數中錯了嗎?這個怎麼做?謝謝!

回答

1

這樣的QNetworkAccessManager是QWebPage對象的一部分,並且每當有用於從所提供的HTML的資源(和包含的任何的JavaScript)的任何請求被調用的createRequest()方法。根據我的理解,clickNext()函數不會真正以您需要的方式訪問網頁的實際DOM。

如果你的目標是建立一個可以下載這些照片的應用程序,你可以運行自動點擊進入「下一步」形象在網站上一些簡單的JavaScript。然後,如您所做的那樣,您可以觀察請求是否在重載的createRequest()函數中加載圖像。

相關問題