Python 无法使用PyQt5获取HTML

Python 无法使用PyQt5获取HTML,python,pyqt5,Python,Pyqt5,我使用PyQt5获取html 它很好用 但是,我无法获取URL的第二页 我该怎么办 import sys from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets class WebPage(QtWebEngineWidgets.QWebEnginePage): def __init__(self): super(WebPage, self).__init__() self.loadFinished.

我使用
PyQt5
获取html

它很好用

但是,我无法获取URL的第二页

我该怎么办

import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

class WebPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super(WebPage, self).__init__()
        self.loadFinished.connect(self.handleLoadFinished)
        self.data = []

    def start(self, urls):
        self._urls = iter(urls)
        self.fetchNext()

    def fetchNext(self):
        try:
            url = next(self._urls)
        except StopIteration:
            return False
        else:
            self.load(QtCore.QUrl(url))
        return True

    def processCurrentPage(self, html):
        url = self.url().toString()
        self.data.append(html)
        if not self.fetchNext():
            QtWidgets.qApp.quit()

    def handleLoadFinished(self):
        self.toHtml(self.processCurrentPage)

if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    first_result = webpage.data #empty, why?
    new_urls = [i for i in urls if needToGetHtml(i)]
    webpage.start(new_urls)
    sys.exit(app.exec_())
修改:

if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    ret = app.exec_()
    first_result = webpage.data
    new_urls = [i for i in urls if needToGetHtml(i)]
    webpage.start(new_urls)
    ret = app.exec_()
    second_result = webpage.data
    sys.exit(ret)

请记住,Qt是异步的,因此根据您提供的代码,数据将在eventloop停止后获得:

if not self.fetchNext():
    QtWidgets.qApp.quit()

谢谢,艾伦斯。我改了密码。但是PyQt5是不稳定的。有时它会崩溃。代码
网页.start(URL)
会使程序崩溃,为什么?@Chan很奇怪,我已经测试了你的代码,它工作正常。您的环境有哪些特点:OS、python版本、pyqt5版本等?在使用PyQt5 5.15的Linux上,我没有遇到任何问题(我使用Python3.8.5并在控制台上运行),我使用Windows10、Python3.8.5。奇怪的是,当我打印得到的html时,程序并没有崩溃。我的代码有问题吗?或者PyQt5不稳定?@Chan您是使用控制台(CMD)执行代码还是某些IDE?
if __name__ == '__main__':
    urls = ['https://www.hello.com/p1', 'https://www.haha.com/p1']
    app = QtWidgets.QApplication(sys.argv)
    webpage = WebPage()
    webpage.start(urls)
    ret = app.exec_()
    print(webpage.data)
    sys.exit(ret)