HTML值在使用Python单击Selenium按钮后不会更改_Python_Html_Selenium_Web Scraping_Beautifulsoup

HTML值在使用Python单击Selenium按钮后不会更改

python html selenium web-scraping

HTML值在使用Python单击Selenium按钮后不会更改,python,html,selenium,web-scraping,beautifulsoup,Python,Html,Selenium,Web Scraping,Beautifulsoup,我正在使用Soup和Selenium访问此页面，并试图获得所有包装类型的价格和评级列表下面是我的代码： import requests import time from bs4 import BeautifulSoup from selenium import webdriver headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}

我正在使用Soup和Selenium访问此页面，并试图获得所有包装类型的价格和评级列表

下面是我的代码：

import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver

headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}

# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
    test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(5)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    for brand, product, id, auto_ship, price, rating in zip(test.findAll('span', attrs={'itemprop': 'brand'}),
                                                            test.findAll('div', attrs={'id': 'product-title'}),
                                                            test.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            test.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            test.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            test.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()

导入请求
导入时间
从bs4导入BeautifulSoup
从selenium导入webdriver
headers={'User-Agent'：'Mozilla/5.0（X11；Ubuntu；Linux x86_64；rv:77.0）Gecko/20100101 Firefox/77.0'}
#使用Selenium在所有页面中获取按钮
测试https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test=BeautifulSoup（requests.get（test\url，headers=headers.content，'html.parser'）
btn_计数=[]
对于测试中的btn_cnt。选择（'.js sku selector>div'）：
btn_cnt=btn_cnt['data-attributes'].计数（'isSelected'））
btn\u计数。追加（btn\u cnt）
按钮=列表（范围（1，btn\U cnt+1））
xpath=[]
对于b in按钮：
btn_path='/*[@id=“variation Size”]/div[2]/div['+str（b）+']/div/label'
打印（btn\U路径）
xpath.append（btn_路径）
print（“{：我发现了发生的事情。我没有将当前页面加载到soup，而是加载了一个全新的源页面
单击后，我添加了一个driver.page_源，给浏览器足够的时间加载（10秒），然后增强了页面源。它现在可以工作了
# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'

    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(1)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
                                                            soup.findAll('div', attrs={'id': 'product-title'}),
                                                            soup.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            soup.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            soup.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()

#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup（requests.get（test\url，headers=headers.content，'html.parser'）
btn_计数=[]
对于测试中的btn_cnt。选择（'.js sku selector>div'）：
btn_cnt=btn_cnt['data-attributes'].计数（'isSelected'））
btn\u计数。追加（btn\u cnt）
按钮=列表（范围（1，btn\U cnt+1））
xpath=[]
对于b in按钮：
btn_path='/*[@id=“variation Size”]/div[2]/div['+str（b）+']/div/label'
打印（btn\U路径）
xpath.append（btn_路径）
print（“{：我发现了发生的事情。我没有将当前页面加载到soup，而是加载了一个全新的源页面
单击后，我添加了一个driver.page_源，给浏览器足够的时间加载（10秒），然后增强了页面源。它现在可以工作了
# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'

    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(1)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
                                                            soup.findAll('div', attrs={'id': 'product-title'}),
                                                            soup.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            soup.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            soup.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()

#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup（requests.get（test\url，headers=headers.content，'html.parser'）
btn_计数=[]
对于测试中的btn_cnt。选择（'.js sku selector>div'）：
btn_cnt=btn_cnt['data-attributes'].计数（'isSelected'））
btn\u计数。追加（btn\u cnt）
按钮=列表（范围（1，btn\U cnt+1））
xpath=[]
对于b in按钮：
btn_path='/*[@id=“variation Size”]/div[2]/div['+str（b）+']/div/label'
打印（btn\U路径）
xpath.append（btn_路径）
打印（“{：您希望单击单选按钮。现在您正在单击标签。您希望单击单选按钮。现在您正在单击标签。