Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/360.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python PyQt类不适用于第二次使用_Python_Python 3.x_Beautifulsoup_Pyqt4 - Fatal编程技术网

Python PyQt类不适用于第二次使用

Python PyQt类不适用于第二次使用,python,python-3.x,beautifulsoup,pyqt4,Python,Python 3.x,Beautifulsoup,Pyqt4,我正在使用PyQt完全加载一个页面(包括JS),并使用BeautifulSoup获取它的内容。在第一次迭代时工作正常,但在第二次迭代后,它崩溃了。我对Python的了解不多,对PyQt的了解更少,所以欢迎提供任何帮助 从…借来的类 该示例崩溃,因为RenderPage类试图为尝试加载的每个url创建一个新的QApplication和事件循环 相反,只应创建一个QApplication,并且QWebPage子类应在处理每个页面后加载一个新url,而不是使用for循环 下面是重新编写的示例,该示例应

我正在使用PyQt完全加载一个页面(包括JS),并使用BeautifulSoup获取它的内容。在第一次迭代时工作正常,但在第二次迭代后,它崩溃了。我对Python的了解不多,对PyQt的了解更少,所以欢迎提供任何帮助

从…借来的类


该示例崩溃,因为
RenderPage
类试图为尝试加载的每个url创建一个新的
QApplication
和事件循环

相反,只应创建一个
QApplication
,并且
QWebPage
子类应在处理每个页面后加载一个新url,而不是使用for循环

下面是重新编写的示例,该示例应满足您的要求:

import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt4 import QtCore, QtGui, QtWebKit

class WebPage(QtWebKit.QWebPage):
    def __init__(self):
        QtWebKit.QWebPage.__init__(self)
        self.mainFrame().loadFinished.connect(self.handleLoadFinished)

    def process(self, items):
        self._items = iter(items)
        self.fetchNext()

    def fetchNext(self):
        try:
            self._url, self._func = next(self._items)
            self.mainFrame().load(QtCore.QUrl(self._url))
        except StopIteration:
            return False
        return True

    def handleLoadFinished(self):
        self._func(self._url, self.mainFrame().toHtml())
        if not self.fetchNext():
            print('# processing complete')
            QtGui.qApp.quit()


def funcA(url, html):
    print('# processing:', url)
    # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
    # do stuff with soup...

def funcB(url, html):
    print('# processing:', url)
    # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
    # do stuff with soup...

if __name__ == '__main__':

    items = [
        ('http://stackoverflow.com', funcA),
        ('http://google.com', funcB),
        ]

    signal.signal(signal.SIGINT, signal.SIG_DFL)
    print('Press Ctrl+C to quit\n')
    app = QtGui.QApplication(sys.argv)
    webpage = WebPage()
    webpage.process(items)
    sys.exit(app.exec_())
import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt4 import QtCore, QtGui, QtWebKit

class WebPage(QtWebKit.QWebPage):
    def __init__(self):
        QtWebKit.QWebPage.__init__(self)
        self.mainFrame().loadFinished.connect(self.handleLoadFinished)

    def process(self, items):
        self._items = iter(items)
        self.fetchNext()

    def fetchNext(self):
        try:
            self._url, self._func = next(self._items)
            self.mainFrame().load(QtCore.QUrl(self._url))
        except StopIteration:
            return False
        return True

    def handleLoadFinished(self):
        self._func(self._url, self.mainFrame().toHtml())
        if not self.fetchNext():
            print('# processing complete')
            QtGui.qApp.quit()


def funcA(url, html):
    print('# processing:', url)
    # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
    # do stuff with soup...

def funcB(url, html):
    print('# processing:', url)
    # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
    # do stuff with soup...

if __name__ == '__main__':

    items = [
        ('http://stackoverflow.com', funcA),
        ('http://google.com', funcB),
        ]

    signal.signal(signal.SIGINT, signal.SIG_DFL)
    print('Press Ctrl+C to quit\n')
    app = QtGui.QApplication(sys.argv)
    webpage = WebPage()
    webpage.process(items)
    sys.exit(app.exec_())