Python PyQt类不适用于第二次使用
我正在使用PyQt完全加载一个页面(包括JS),并使用BeautifulSoup获取它的内容。在第一次迭代时工作正常,但在第二次迭代后,它崩溃了。我对Python的了解不多,对PyQt的了解更少,所以欢迎提供任何帮助 从…借来的类Python PyQt类不适用于第二次使用,python,python-3.x,beautifulsoup,pyqt4,Python,Python 3.x,Beautifulsoup,Pyqt4,我正在使用PyQt完全加载一个页面(包括JS),并使用BeautifulSoup获取它的内容。在第一次迭代时工作正常,但在第二次迭代后,它崩溃了。我对Python的了解不多,对PyQt的了解更少,所以欢迎提供任何帮助 从…借来的类 该示例崩溃,因为RenderPage类试图为尝试加载的每个url创建一个新的QApplication和事件循环 相反,只应创建一个QApplication,并且QWebPage子类应在处理每个页面后加载一个新url,而不是使用for循环 下面是重新编写的示例,该示例应
该示例崩溃,因为
RenderPage
类试图为尝试加载的每个url创建一个新的QApplication
和事件循环
相反,只应创建一个QApplication
,并且QWebPage
子类应在处理每个页面后加载一个新url,而不是使用for循环
下面是重新编写的示例,该示例应满足您的要求:
import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt4 import QtCore, QtGui, QtWebKit
class WebPage(QtWebKit.QWebPage):
def __init__(self):
QtWebKit.QWebPage.__init__(self)
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def process(self, items):
self._items = iter(items)
self.fetchNext()
def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.mainFrame().load(QtCore.QUrl(self._url))
except StopIteration:
return False
return True
def handleLoadFinished(self):
self._func(self._url, self.mainFrame().toHtml())
if not self.fetchNext():
print('# processing complete')
QtGui.qApp.quit()
def funcA(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
def funcB(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
if __name__ == '__main__':
items = [
('http://stackoverflow.com', funcA),
('http://google.com', funcB),
]
signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quit\n')
app = QtGui.QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())
import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt4 import QtCore, QtGui, QtWebKit
class WebPage(QtWebKit.QWebPage):
def __init__(self):
QtWebKit.QWebPage.__init__(self)
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def process(self, items):
self._items = iter(items)
self.fetchNext()
def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.mainFrame().load(QtCore.QUrl(self._url))
except StopIteration:
return False
return True
def handleLoadFinished(self):
self._func(self._url, self.mainFrame().toHtml())
if not self.fetchNext():
print('# processing complete')
QtGui.qApp.quit()
def funcA(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
def funcB(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
if __name__ == '__main__':
items = [
('http://stackoverflow.com', funcA),
('http://google.com', funcB),
]
signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quit\n')
app = QtGui.QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())