Python 如何通过子类化QNetworkAccessManager使用createRequest伪造请求?
我想从网站下载动态生成的图像。该网站有javascript代码,点击按钮可转到上一张图片和下一张图片。我在chrome中检查了http请求和响应。除了图像名之外,请求几乎是相同的(它以数字形式增加,如:Python 如何通过子类化QNetworkAccessManager使用createRequest伪造请求?,python,qt,webkit,qtwebkit,qnetworkaccessmanager,Python,Qt,Webkit,Qtwebkit,Qnetworkaccessmanager,我想从网站下载动态生成的图像。该网站有javascript代码,点击按钮可转到上一张图片和下一张图片。我在chrome中检查了http请求和响应。除了图像名之外,请求几乎是相同的(它以数字形式增加,如:000001.jpg,000002.jpg)。现在,我可以通过使用定制的QNetworkAccessManager子类化QWebView来访问第一个映像并将其保存到磁盘。我重载createRequest函数: import sys,urllib,time,os from PyQt4.QtCore
000001.jpg,000002.jpg
)。现在,我可以通过使用定制的QNetworkAccessManager子类化QWebView来访问第一个映像并将其保存到磁盘。我重载createRequest函数:
import sys,urllib,time,os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
from PIL import Image
class NetworkAccessManager(QNetworkAccessManager):
def __init__(self,old_manager):
QNetworkAccessManager.__init__(self)
self.old_manager = old_manager
self.setCache(old_manager.cache())
self.setCookieJar(old_manager.cookieJar())
self.setProxy(old_manager.proxy())
self.setProxyFactory(old_manager.proxyFactory())
self.imreply=None
self.reqstr=None
self.otherreply=None
self.current_req=None
self.cnt=0
self.jpgName="test.jpg"
self.first=True
self.ba=QByteArray()
self.ba.clear()
def createRequest(self, operation, request, data):
req = request.url().toString()
if req.contains(QString("zoom=")) and req.contains(QString("ss2jpg")) and not req.contains(QString("pi=2")):
strreq=str(req)
l=strreq.find("jid=")
r=strreq.find(".jpg&a")
self.jpgName=strreq[l+5:r+4]
self.jpgcnt=int(strreq[l+5:r])
print self.jpgName,self.jpgcnt
self.imreply=QNetworkAccessManager.createRequest(self,operation, request, data)
self.connect(self.imreply,SIGNAL("readyRead()"),self.saveImage)
return self.imreply
elif req.contains(QString("uf=ssr")):
strreq=str(req)
self.reqstr=strreq
self.current_req=request
r=strreq.find("?")
self.jpgcnt=int(strreq[r-6:r])
self.otherreply=QNetworkAccessManager.createRequest(self,operation, request, data)
return self.otherreply
else:
return QNetworkAccessManager.createRequest(self,operation, request, data)
def saveImage(self):
if self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/jpeg")) or self.imreply.header(QNetworkRequest.ContentTypeHeader).toString().contains(QString("image/png")):
contentLen,flag = QString(self.imreply.rawHeader("Content-Length")).toInt()
self.ba=self.ba.append(self.imreply.readAll())
if self.ba.size() == contentLen:
#self.ba=self.imreply.readAll()
im=QImage.fromData(self.ba)
im.save(self.jpgName)
im=Image.open(self.jpgName)
print "saving image",contentLen,self.jpgName
im.save(self.jpgName)
self.ba.clear()
self.emit(SIGNAL("nextPage()"))
class dxWebView(QWebView):
def __init__(self):
QWebView.__init__(self)
def clickNext(self):
manager=self.page().networkAccessManager()
if manager.cnt<50:
nextreq=manager.current_req
nexturl=manager.reqstr.replace(str(manager.jpgcnt),str(manager.jpgcnt+1))
print "next url",nexturl
nextreq.setUrl(QUrl(nexturl))
manager.get(QNetworkRequest(nextreq))
manager.cnt=manager.cnt+1
def main():
app=QApplication(sys.argv)
QWebSettings.globalSettings().setAttribute(QWebSettings.PluginsEnabled, True);
view=dxWebView()
old_manager=view.page().networkAccessManager()
new_manager=NetworkAccessManager(old_manager)
view.page().setNetworkAccessManager(new_manager)
QObject.connect(new_manager,SIGNAL("nextPage()"),view.clickNext)
url="http://www.yishuleia.cn/DrsPath.do?kid=686A67696A6F6A673134343438303337&username=gdnz2&spagenum=201&pages=50&fid=14813857&a=3fc3e380601ced0f08749c964294120e&btime=2013-04-03&etime=2013-04-23&template=bookdsr1&firstdrs=http%3A%2F%2Fbook.duxiu.com%2FbookDetail.jsp%3FdxNumber%3D000008299393%26d%3D592DC22226A893A958A6578E7D039A43"
view.load(QUrl(url))
view.show()
sys.exit(app.exec_())
if __name__=='__main__':
main()
导入系统、urllib、时间、操作系统
从PyQt4.QtCore导入*
从PyQt4.QtGui导入*
从PyQt4.QtWebKit导入*
从PyQt4.qt网络导入*
从PIL导入图像
类NetworkAccessManager(QNetworkAccessManager):
定义初始(自我,旧经理):
QNetworkAccessManager.\uuuuu初始化\uuuuuuuuuu(自)
self.old_manager=old_manager
self.setCache(旧的\u manager.cache())
self.setCookieJar(旧的\u manager.cookieJar())
self.setProxy(旧的\u manager.proxy())
self.setProxyFactory(旧的\u manager.proxyFactory())
self.imreply=None
self.reqstr=None
self.otherreply=None
自身当前需求=无
self.cnt=0
self.jpgName=“test.jpg”
self.first=True
self.ba=QByteArray()
self.ba.clear()
def createRequest(自身、操作、请求、数据):
req=request.url().toString()
如果请求包含(QString(“zoom=”)和请求包含(QString(“ss2jpg”))而非请求包含(QString(“pi=2”):
strreq=str(需要)
l=strreq.find(“jid=”)
r=strreq.find(“.jpg&a”)
self.jpgName=streq[l+5:r+4]
self.jpgcnt=int(streq[l+5:r])
打印self.jpgName,self.jpgcnt
self.imreply=QNetworkAccessManager.createRequest(self、操作、请求、数据)
self.connect(self.imreply,SIGNAL(“readyRead()”),self.saveImage)
返回self.imreply
elif请求包含(QString(“uf=ssr”):
strreq=str(需要)
self.reqstr=strreq
self.current_req=请求
r=strreq.find(“?”)
self.jpgcnt=int(streq[r-6:r])
self.otherreply=QNetworkAccessManager.createRequest(self、操作、请求、数据)
返回self.otherreply
其他:
返回QNetworkAccessManager.createRequest(self、操作、请求、数据)
def saveImage(自我):
如果self.imreply.header(QNetworkRequest.ContentTypeHeader).toString()包含(QString(“图像/jpeg”))或self.imreply.header(QNetworkRequest.ContentTypeHeader).toString()包含(QString(“图像/png”):
contentLen,flag=QString(self.imreply.rawHeader(“内容长度”)).toInt()
self.ba=self.ba.append(self.imreply.readAll())
如果self.ba.size()==contentLen:
#self.ba=self.imreply.readAll()
im=QImage.fromData(self.ba)
im.save(self.jpgName)
im=Image.open(self.jpgName)
打印“保存图像”,contentLen,self.jpgName
im.save(self.jpgName)
self.ba.clear()
self.emit(信号(“nextPage()”)
类dxWebView(QWebView):
定义初始化(自):
QWebView.\uuuuu初始化\uuuuuuu(自)
def单击下一步(自我):
manager=self.page().networkAccessManager()
如果manager.cnt也是这样,则QNetworkAccessManager是QWebPage对象的一部分,并且每当从呈现的HTML(及其包含的任何javascript)请求资源时,都会调用createRequest()方法。据我所知,clickNext()函数实际上无法以您所需的方式访问网页的实际DOM
如果您的目标是构建一个可以下载所有这些图片的应用程序,那么您可以在站点上运行一些简单的javascript,自动点击“下一幅”图片。然后,正如您所做的那样,在重载的createRequest()函数中观察加载图像的请求