HTML值在使用Python单击Selenium按钮后不会更改

HTML值在使用Python单击Selenium按钮后不会更改,python,html,selenium,web-scraping,beautifulsoup,Python,Html,Selenium,Web Scraping,Beautifulsoup,我正在使用Soup和Selenium访问此页面,并试图获得所有包装类型的价格和评级列表 下面是我的代码: import requests import time from bs4 import BeautifulSoup from selenium import webdriver headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}

我正在使用Soup和Selenium访问此页面,并试图获得所有包装类型的价格和评级列表

下面是我的代码:

import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver

headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}

# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
    test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(5)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    for brand, product, id, auto_ship, price, rating in zip(test.findAll('span', attrs={'itemprop': 'brand'}),
                                                            test.findAll('div', attrs={'id': 'product-title'}),
                                                            test.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            test.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            test.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            test.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()
导入请求
导入时间
从bs4导入BeautifulSoup
从selenium导入webdriver
headers={'User-Agent':'Mozilla/5.0(X11;Ubuntu;Linux x86_64;rv:77.0)Gecko/20100101 Firefox/77.0'}
#使用Selenium在所有页面中获取按钮
测试https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)

print(“{:我发现了发生的事情。我没有将当前页面加载到soup,而是加载了一个全新的源页面

单击后,我添加了一个driver.page_源,给浏览器足够的时间加载(10秒),然后增强了页面源。它现在可以工作了

# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'

    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(1)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
                                                            soup.findAll('div', attrs={'id': 'product-title'}),
                                                            soup.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            soup.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            soup.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()
#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)

print(“{:我发现了发生的事情。我没有将当前页面加载到soup,而是加载了一个全新的源页面

单击后,我添加了一个driver.page_源,给浏览器足够的时间加载(10秒),然后增强了页面源。它现在可以工作了

# use Selenium to get buttons through all pages


test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')

btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
    btn_cnt = btn_cnt['data-attributes'].count('isSelected')
    btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))

xpath = []
for b in buttons:
    btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
    print(btn_path)
    xpath.append(btn_path)


print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
    test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'

    driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
    driver.get(test_url)
    time.sleep(1)
    driver.find_element_by_xpath(btn).click()
    time.sleep(5)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
                                                            soup.findAll('div', attrs={'id': 'product-title'}),
                                                            soup.findAll('div', attrs={'class': 'value js-part-number'}),
                                                            soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
                                                            soup.findAll('span', attrs={'class': 'ga-eec__price'}),
                                                            soup.select('div.ugc')):
        #date = date.today()
        brand = brand.text
        product = ' '.join(product.h1.text.split())
        id = ' '.join(id.span.text.split())
        p1 = auto_ship.text.index('(')
        auto_ship = ' '.join(auto_ship.text[:p1].split())
        regular_price = ' '.join(price.text.split())
        rating = rating.picture.img['src'][-7:-4].replace('_', '.')
        print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
        driver.quit()
#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)

打印(“{:您希望单击单选按钮。现在您正在单击标签。您希望单击单选按钮。现在您正在单击标签。