Python 3.x 检查路径是否在web刮片中动态生成

Python 3.x 检查路径是否在web刮片中动态生成,python-3.x,web-scraping,beautifulsoup,scrapy,python-requests,Python 3.x,Web Scraping,Beautifulsoup,Scrapy,Python Requests,我正在从trip.com上抓取数据。这是一个酒店挂牌网站。在单击搜索按钮输入详细信息后,搜索结果将显示在一个新选项卡中,并动态生成结果。当我在网站上滚动时,会下载并显示更多结果。现在,据我所知,要动态生成数据并对其进行刮取,我需要获得有关动态返回JSON值的API头的信息。但这里的问题是,我正在抓取的这个站点的genrates是动态的头参数,也是加密格式的。我的意思是这是我的请求URL: Request URL: https://www.trip.com/restapi/soa2/16709/j

我正在从trip.com上抓取数据。这是一个酒店挂牌网站。在单击搜索按钮输入详细信息后,搜索结果将显示在一个新选项卡中,并动态生成结果。当我在网站上滚动时,会下载并显示更多结果。现在,据我所知,要动态生成数据并对其进行刮取,我需要获得有关动态返回JSON值的API头的信息。但这里的问题是,我正在抓取的这个站点的genrates是动态的头参数,也是加密格式的。我的意思是这是我的请求URL:

Request URL: https://www.trip.com/restapi/soa2/16709/json/rateplan?testab=ec23b14de9ad450c7b74612efc288bfdd523314036afe19b5fe135f206284aab
这是我的请求头:

:authority: www.trip.com
:method: POST
:path: /restapi/soa2/16709/json/rateplan?testab=ec23b14de9ad450c7b74612efc288bfdd523314036afe19b5fe135f206284aab
:scheme: https
accept: application/json
accept-encoding: gzip, deflate, br
accept-language: en-GB,en-US;q=0.9,en;q=0.8
cache-control: no-cache
content-length: 1697
content-type: application/json
cookie: ibulanguage=EN; cookiePricesDisplayed=USD; ibu_online_home_language_match={"isFromTWNotZh":false,"isFromIPRedirect":false,"isFromLastVisited":false,"isRedirect":false,"isShowSuggestion":false,"lastVisited":""}; _abtest_userid=55c19cf3-dcd6-4f4a-bfba-5965c52ac66c; _tp_search_latest_channel_name=hotels; _RF1=45.115.185.74; _RSG=BJ4Q9HdNV80BpEgEyf8ZZ9; _RDG=286d5feba1bdad2eee089fc228174f22ec; _RGUID=021f5e74-4968-44cb-98e3-229f0ea8eccb; ibulocale=en_us; g_state={"i_p":1600591022929,"i_l":3}; Union=AllianceID=1078337&SID=2036545&OUID=ctag.hash.d23ecf76442c&SourceID=&AppID=&OpenID=&Expires=1602581159329&createtime=1599989159; IBU_TRANCE_LOG_URL=/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(; librauuid=3lSNuDO18464CG5a; intl_ht1=h4%3D724_762871; hotel=762871; hotelhst=1164390341; _bfa=1.1599889636407.b231b.1.1599996200640.1600004365027.18.57; _bfs=1.1; _bfi=p1%3D10320668147%26p2%3D10320668147%26v1%3D57%26v2%3D56; IBU_TRANCE_LOG_P=22266407054
origin: https://www.trip.com
p: 22266407054
pid: 584e7499-4df6-45dd-8242-94cb5dec36c5
pragma: no-cache
referer: https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36

现在,当我在站点中向下滚动时,testab参数的值是动态生成的。但我无法理解testab值是如何生成的。它是由B加密其余的请求头信息生成的。仅供参考,我有所有的请求头信息,除了“path”值。因此,如果该值是通过加密生成的,那么如何继续删除该值。此外,我不能在这里使用selenuim或任何基于浏览器的抓取。

使用文件中的以下JavaScript随机生成
testab

改编自eyllanesc的作者

输出链接(和一些警告),例如

根据评论更新 你只需要抓起饼干并提出请求。下面是非常快速和肮脏的代码

import requests
import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineProfile
from PyQt5.QtNetwork import QNetworkCookie


class WebEngineUrlRequestInterceptor(QWebEngineUrlRequestInterceptor):
    def __init__(self, on_network_call):
        super().__init__()
        self.on_network_call = on_network_call

    def interceptRequest(self, info):
        if info.requestUrl().url().startswith('https://www.trip.com/restapi/soa2/16709/json/rateplan?testab='):
            self.on_network_call(info)
            sys.exit()


class MyWebEnginePage(QWebEnginePage):
    def acceptNavigationRequest(self, url, _type, isMainFrame):
        return QWebEnginePage.acceptNavigationRequest(self, url, _type, isMainFrame)


def on_network_call(info):
    print(info.requestUrl().url())
    headers = {
        'authority': 'www.trip.com',
        'pragma': 'no-cache',
        'cache-control': 'no-cache',
        'accept': 'application/json',
        'dnt': '1',
        'p': '99783168614',
        'pid': '256f8038-1c06-4173-99b5-880dc120042f',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
        'content-type': 'application/json',
        'origin': 'https://www.trip.com',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty',
        'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    }

    data = '{"checkIn":"2020-09-15","checkOut":"2020-09-16","priceType":"0","adult":2,"popularFacilityType":"","hotelUniqueKey":"H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty","child":0,"roomNum":1,"masterHotelId":762871,"age":"","cityId":"724","hotel":"762871","versionControl":[{"key":"RoomCardVersionB","value":"T"}],"signInRoomKey":"","signInType":0,"filterCondition":null,"unAvailableRoomInfo":null,"minPriceRoomKey":"","Head":{"Locale":"en-XX","Currency":"USD","AID":"","SID":"","ClientID":"1600039009299.2v21ry","OUID":"","CAID":"","CSID":"","COUID":"","TimeZone":"1","PageID":"10320668147","HotelExtension":{"WebpSupport":true,"Qid":"","hasAidInUrl":false,"group":"TRIP","PID":"256f8038-1c06-4173-99b5-880dc120042f","hotelUuidKey":"S96K39i7Te47IA7idYlfYp6E3YLpemawnOWOYhgjs6wZFv0lEPYtNjoSwHSybpjsY1pKL4KazvlLjFYoTvU1YQByTZjUBvc9ed7YG9jHZy5Y1fekTv0NEghwGqWbsenZi8BwMYtY5OInLeo9YmDvFSeDrNbeUZjnkwDfY7bwzSEkY1dRSYX0INbWBYaqYonikdikSiXNj5Y5bjSQi4gYBkwPoJoGRcaYT7woY0ZR7fwa7W6XW4hR7BRqpJT4JMfy9SEcbRgaE4ZEaY4FyfQK11xomETtvc1KQtY3aWGBr90yBXET9vSOvhkyg1E9DJGYUaRkNwG3W9fW6QWf7iDOv5DWqbWFHvfSYHdvdtvOYaXjOcwLkvthjUYAqR9ZwqdjAHW53eZPROqWzSJ3PWPYPnRgqwmFW43jDSePDRBPWtcY3niTYHpRqLwUgWz6WPURD1RUZJ8bJ73ytTEFlWGmW6G","hotelUuid":"dhX4uhn0MdpHusaD"},"Frontend":{"vid":"1600039009299.2v21ry","sessionID":2,"pvid":6},"P":"99783168614","Device":"PC","Version":"0"}}'

    r = requests.post(info.requestUrl().url(), cookies=to_cookie_dict(), data=data, headers=headers)
    print(r.json())


def on_cookie_added(cookie):
    for c in cookies:
        if c.hasSameIdentifier(cookie):
            return
    cookies.append(QNetworkCookie(cookie))


def to_cookie_dict():
    cookie_dict = {}
    for c in cookies:
        cookie_dict[bytearray(c.name()).decode()] = bytearray(c.value()).decode()
    print(cookie_dict)
    return cookie_dict


if __name__ == "__main__":
    app = QApplication(sys.argv)
    browser = QWebEngineView()
    interceptor = WebEngineUrlRequestInterceptor(on_network_call)
    profile = QWebEngineProfile()
    cookie_store = profile.cookieStore()
    cookie_store.cookieAdded.connect(on_cookie_added)
    cookies = []
    profile.setRequestInterceptor(interceptor)
    page = MyWebEnginePage(profile, browser)
    url = 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA('
    page.setUrl(QUrl(url))
    browser.setPage(page)
    browser.show()
    sys.exit(app.exec_())

多亏了Eril marimon和eyllanesc的作者,
testab
值是使用文件中的以下JavaScript随机生成的

改编自eyllanesc的作者

输出链接(和一些警告),例如

根据评论更新 你只需要抓起饼干并提出请求。下面是非常快速和肮脏的代码

import requests
import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineProfile
from PyQt5.QtNetwork import QNetworkCookie


class WebEngineUrlRequestInterceptor(QWebEngineUrlRequestInterceptor):
    def __init__(self, on_network_call):
        super().__init__()
        self.on_network_call = on_network_call

    def interceptRequest(self, info):
        if info.requestUrl().url().startswith('https://www.trip.com/restapi/soa2/16709/json/rateplan?testab='):
            self.on_network_call(info)
            sys.exit()


class MyWebEnginePage(QWebEnginePage):
    def acceptNavigationRequest(self, url, _type, isMainFrame):
        return QWebEnginePage.acceptNavigationRequest(self, url, _type, isMainFrame)


def on_network_call(info):
    print(info.requestUrl().url())
    headers = {
        'authority': 'www.trip.com',
        'pragma': 'no-cache',
        'cache-control': 'no-cache',
        'accept': 'application/json',
        'dnt': '1',
        'p': '99783168614',
        'pid': '256f8038-1c06-4173-99b5-880dc120042f',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
        'content-type': 'application/json',
        'origin': 'https://www.trip.com',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty',
        'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    }

    data = '{"checkIn":"2020-09-15","checkOut":"2020-09-16","priceType":"0","adult":2,"popularFacilityType":"","hotelUniqueKey":"H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty","child":0,"roomNum":1,"masterHotelId":762871,"age":"","cityId":"724","hotel":"762871","versionControl":[{"key":"RoomCardVersionB","value":"T"}],"signInRoomKey":"","signInType":0,"filterCondition":null,"unAvailableRoomInfo":null,"minPriceRoomKey":"","Head":{"Locale":"en-XX","Currency":"USD","AID":"","SID":"","ClientID":"1600039009299.2v21ry","OUID":"","CAID":"","CSID":"","COUID":"","TimeZone":"1","PageID":"10320668147","HotelExtension":{"WebpSupport":true,"Qid":"","hasAidInUrl":false,"group":"TRIP","PID":"256f8038-1c06-4173-99b5-880dc120042f","hotelUuidKey":"S96K39i7Te47IA7idYlfYp6E3YLpemawnOWOYhgjs6wZFv0lEPYtNjoSwHSybpjsY1pKL4KazvlLjFYoTvU1YQByTZjUBvc9ed7YG9jHZy5Y1fekTv0NEghwGqWbsenZi8BwMYtY5OInLeo9YmDvFSeDrNbeUZjnkwDfY7bwzSEkY1dRSYX0INbWBYaqYonikdikSiXNj5Y5bjSQi4gYBkwPoJoGRcaYT7woY0ZR7fwa7W6XW4hR7BRqpJT4JMfy9SEcbRgaE4ZEaY4FyfQK11xomETtvc1KQtY3aWGBr90yBXET9vSOvhkyg1E9DJGYUaRkNwG3W9fW6QWf7iDOv5DWqbWFHvfSYHdvdtvOYaXjOcwLkvthjUYAqR9ZwqdjAHW53eZPROqWzSJ3PWPYPnRgqwmFW43jDSePDRBPWtcY3niTYHpRqLwUgWz6WPURD1RUZJ8bJ73ytTEFlWGmW6G","hotelUuid":"dhX4uhn0MdpHusaD"},"Frontend":{"vid":"1600039009299.2v21ry","sessionID":2,"pvid":6},"P":"99783168614","Device":"PC","Version":"0"}}'

    r = requests.post(info.requestUrl().url(), cookies=to_cookie_dict(), data=data, headers=headers)
    print(r.json())


def on_cookie_added(cookie):
    for c in cookies:
        if c.hasSameIdentifier(cookie):
            return
    cookies.append(QNetworkCookie(cookie))


def to_cookie_dict():
    cookie_dict = {}
    for c in cookies:
        cookie_dict[bytearray(c.name()).decode()] = bytearray(c.value()).decode()
    print(cookie_dict)
    return cookie_dict


if __name__ == "__main__":
    app = QApplication(sys.argv)
    browser = QWebEngineView()
    interceptor = WebEngineUrlRequestInterceptor(on_network_call)
    profile = QWebEngineProfile()
    cookie_store = profile.cookieStore()
    cookie_store.cookieAdded.connect(on_cookie_added)
    cookies = []
    profile.setRequestInterceptor(interceptor)
    page = MyWebEnginePage(profile, browser)
    url = 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA('
    page.setUrl(QUrl(url))
    browser.setPage(page)
    browser.show()
    sys.exit(app.exec_())

感谢Eril marimon和eyllanesc的作者

非常感谢您的回复。答案很有用。这解决了我获取url的一半问题。但问题是,我不能再次使用此URL从服务器获取响应。所以我想要的是如何从这个请求URL返回响应。我已经添加了非常快速和肮脏的代码来解决您的问题。我刚刚注意到
p
pid
标题发生了变化,但您似乎可以忽略它们,并且仍然有效。如果你在今天之后运行,记得更改日期。该死的兄弟,你太棒了,非常感谢!!!只需要最后一条帮助/信息。因此,此页面是动态生成的,即当您向下滚动时,将加载越来越多的数据,那么我如何使用PyQT灌输这种向下滚动直到页面结束的行为呢?这实际上是另外一个或两个问题,首先您需要从
interceptRequest()中删除
sys.exit()
这将允许它继续加载页面,但在其中一个URL上会遇到超时,您必须修复此问题。然后在页面加载后,您需要在页面上运行JavaScript以向下滚动页面。因此,如果我是你,我会把这些问题作为单独的问题发布,也许在发布第二个问题之前先得到超时问题的答案。但是要确保这些问题还没有被问到,否则它们会被标记为重复的问题。感谢您的回答。答案很有用。这解决了我获取url的一半问题。但问题是,我不能再次使用此URL从服务器获取响应。所以我想要的是如何从这个请求URL返回响应。我已经添加了非常快速和肮脏的代码来解决您的问题。我刚刚注意到
p
pid
标题发生了变化,但您似乎可以忽略它们,并且仍然有效。如果你在今天之后运行,记得更改日期。该死的兄弟,你太棒了,非常感谢!!!只需要最后一条帮助/信息。因此,此页面是动态生成的,即当您向下滚动时,将加载越来越多的数据,那么我如何使用PyQT灌输这种向下滚动直到页面结束的行为呢?这实际上是另外一个或两个问题,首先您需要从
interceptRequest()中删除
sys.exit()
这将允许它继续加载页面,但在其中一个URL上会遇到超时,您必须修复此问题。然后在页面加载后,您需要在页面上运行JavaScript以向下滚动页面。因此,如果我是你,我会把这些问题作为单独的问题发布,也许在发布第二个问题之前先得到超时问题的答案。确保这些问题还没有被问过,否则会被标记为重复问题
https://www.trip.com/restapi/soa2/16709/json/rateplan?testab=15feb5b1067d2e4e2b979fe97830d884c5e3a07*e145f7¼(5955400Z380ac6a6
import requests
import sys
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInterceptor
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineProfile
from PyQt5.QtNetwork import QNetworkCookie


class WebEngineUrlRequestInterceptor(QWebEngineUrlRequestInterceptor):
    def __init__(self, on_network_call):
        super().__init__()
        self.on_network_call = on_network_call

    def interceptRequest(self, info):
        if info.requestUrl().url().startswith('https://www.trip.com/restapi/soa2/16709/json/rateplan?testab='):
            self.on_network_call(info)
            sys.exit()


class MyWebEnginePage(QWebEnginePage):
    def acceptNavigationRequest(self, url, _type, isMainFrame):
        return QWebEnginePage.acceptNavigationRequest(self, url, _type, isMainFrame)


def on_network_call(info):
    print(info.requestUrl().url())
    headers = {
        'authority': 'www.trip.com',
        'pragma': 'no-cache',
        'cache-control': 'no-cache',
        'accept': 'application/json',
        'dnt': '1',
        'p': '99783168614',
        'pid': '256f8038-1c06-4173-99b5-880dc120042f',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
        'content-type': 'application/json',
        'origin': 'https://www.trip.com',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty',
        'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    }

    data = '{"checkIn":"2020-09-15","checkOut":"2020-09-16","priceType":"0","adult":2,"popularFacilityType":"","hotelUniqueKey":"H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA(sec-fetch-dest:%20empty","child":0,"roomNum":1,"masterHotelId":762871,"age":"","cityId":"724","hotel":"762871","versionControl":[{"key":"RoomCardVersionB","value":"T"}],"signInRoomKey":"","signInType":0,"filterCondition":null,"unAvailableRoomInfo":null,"minPriceRoomKey":"","Head":{"Locale":"en-XX","Currency":"USD","AID":"","SID":"","ClientID":"1600039009299.2v21ry","OUID":"","CAID":"","CSID":"","COUID":"","TimeZone":"1","PageID":"10320668147","HotelExtension":{"WebpSupport":true,"Qid":"","hasAidInUrl":false,"group":"TRIP","PID":"256f8038-1c06-4173-99b5-880dc120042f","hotelUuidKey":"S96K39i7Te47IA7idYlfYp6E3YLpemawnOWOYhgjs6wZFv0lEPYtNjoSwHSybpjsY1pKL4KazvlLjFYoTvU1YQByTZjUBvc9ed7YG9jHZy5Y1fekTv0NEghwGqWbsenZi8BwMYtY5OInLeo9YmDvFSeDrNbeUZjnkwDfY7bwzSEkY1dRSYX0INbWBYaqYonikdikSiXNj5Y5bjSQi4gYBkwPoJoGRcaYT7woY0ZR7fwa7W6XW4hR7BRqpJT4JMfy9SEcbRgaE4ZEaY4FyfQK11xomETtvc1KQtY3aWGBr90yBXET9vSOvhkyg1E9DJGYUaRkNwG3W9fW6QWf7iDOv5DWqbWFHvfSYHdvdtvOYaXjOcwLkvthjUYAqR9ZwqdjAHW53eZPROqWzSJ3PWPYPnRgqwmFW43jDSePDRBPWtcY3niTYHpRqLwUgWz6WPURD1RUZJ8bJ73ytTEFlWGmW6G","hotelUuid":"dhX4uhn0MdpHusaD"},"Frontend":{"vid":"1600039009299.2v21ry","sessionID":2,"pvid":6},"P":"99783168614","Device":"PC","Version":"0"}}'

    r = requests.post(info.requestUrl().url(), cookies=to_cookie_dict(), data=data, headers=headers)
    print(r.json())


def on_cookie_added(cookie):
    for c in cookies:
        if c.hasSameIdentifier(cookie):
            return
    cookies.append(QNetworkCookie(cookie))


def to_cookie_dict():
    cookie_dict = {}
    for c in cookies:
        cookie_dict[bytearray(c.name()).decode()] = bytearray(c.value()).decode()
    print(cookie_dict)
    return cookie_dict


if __name__ == "__main__":
    app = QApplication(sys.argv)
    browser = QWebEngineView()
    interceptor = WebEngineUrlRequestInterceptor(on_network_call)
    profile = QWebEngineProfile()
    cookie_store = profile.cookieStore()
    cookie_store.cookieAdded.connect(on_cookie_added)
    cookies = []
    profile.setRequestInterceptor(interceptor)
    page = MyWebEnginePage(profile, browser)
    url = 'https://www.trip.com/hotels/mumbai-hotel-detail-762871/grand-hyatt-mumbai/?checkIn=2020-09-14&checkOut=2020-09-15&cityId=724&adult=2&children=0&ages=&crn=1&travelpurpose=0&curr=USD&showtotalamt=0&hoteluniquekey=H4sIAAAAAAAAAOPaycjFK8Fk8B8GGIWYOBilFjNyfJl7U12Iy9DE0sTczNzQwMhgCrNFs44jAwgcaHDwBDMKWh0CeCYxSnKCeef3OAiC6AbVnQ5OrBxr_SRYZjB-P663gpFxIyNEY5LDDkamE4x-C5j-PnnDvIuJleM1uwTTISA9SVCC5RQTwyUmhltMDI-YGF4xMXxiYvgFVdHEzNDFzDCJGaJuFjPDImYGIRaQG6UUjMxTjI0NE00tzYzMTSwT00B0qplJYpKxUXKiuaW5ArdG16GPv1iNGKyYpRjdPBiD2Iwd3SyMXKJkuJg9_YIE4xpqS16d2m4vxRwa7KKoqyj_JSdM2iGJNTVPNyIi4x1LAWMXI5MA4yRGTo7m3U8-Mp5gTAYA1R43aDgBAAA('
    page.setUrl(QUrl(url))
    browser.setPage(page)
    browser.show()
    sys.exit(app.exec_())