HTML值在使用Python单击Selenium按钮后不会更改
我正在使用Soup和Selenium访问此页面,并试图获得所有包装类型的价格和评级列表 下面是我的代码:HTML值在使用Python单击Selenium按钮后不会更改,python,html,selenium,web-scraping,beautifulsoup,Python,Html,Selenium,Web Scraping,Beautifulsoup,我正在使用Soup和Selenium访问此页面,并试图获得所有包装类型的价格和评级列表 下面是我的代码: import requests import time from bs4 import BeautifulSoup from selenium import webdriver headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0'}
# use Selenium to get buttons through all pages
test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
btn_cnt = btn_cnt['data-attributes'].count('isSelected')
btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))
xpath = []
for b in buttons:
btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
print(btn_path)
xpath.append(btn_path)
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
test_url = 'https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
driver.get(test_url)
time.sleep(5)
driver.find_element_by_xpath(btn).click()
time.sleep(5)
for brand, product, id, auto_ship, price, rating in zip(test.findAll('span', attrs={'itemprop': 'brand'}),
test.findAll('div', attrs={'id': 'product-title'}),
test.findAll('div', attrs={'class': 'value js-part-number'}),
test.findAll('p', attrs={'class': 'autoship-pricing p'}),
test.findAll('span', attrs={'class': 'ga-eec__price'}),
test.select('div.ugc')):
#date = date.today()
brand = brand.text
product = ' '.join(product.h1.text.split())
id = ' '.join(id.span.text.split())
p1 = auto_ship.text.index('(')
auto_ship = ' '.join(auto_ship.text[:p1].split())
regular_price = ' '.join(price.text.split())
rating = rating.picture.img['src'][-7:-4].replace('_', '.')
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
driver.quit()
导入请求
导入时间
从bs4导入BeautifulSoup
从selenium导入webdriver
headers={'User-Agent':'Mozilla/5.0(X11;Ubuntu;Linux x86_64;rv:77.0)Gecko/20100101 Firefox/77.0'}
#使用Selenium在所有页面中获取按钮
测试https://www.chewy.com/blue-buffalo-basics-limited/dp/37047'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)
print(“{:我发现了发生的事情。我没有将当前页面加载到soup,而是加载了一个全新的源页面
单击后,我添加了一个driver.page_源,给浏览器足够的时间加载(10秒),然后增强了页面源。它现在可以工作了
# use Selenium to get buttons through all pages
test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
btn_cnt = btn_cnt['data-attributes'].count('isSelected')
btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))
xpath = []
for b in buttons:
btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
print(btn_path)
xpath.append(btn_path)
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
driver.get(test_url)
time.sleep(1)
driver.find_element_by_xpath(btn).click()
time.sleep(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
soup.findAll('div', attrs={'id': 'product-title'}),
soup.findAll('div', attrs={'class': 'value js-part-number'}),
soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
soup.findAll('span', attrs={'class': 'ga-eec__price'}),
soup.select('div.ugc')):
#date = date.today()
brand = brand.text
product = ' '.join(product.h1.text.split())
id = ' '.join(id.span.text.split())
p1 = auto_ship.text.index('(')
auto_ship = ' '.join(auto_ship.text[:p1].split())
regular_price = ' '.join(price.text.split())
rating = rating.picture.img['src'][-7:-4].replace('_', '.')
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
driver.quit()
#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)
print(“{:我发现了发生的事情。我没有将当前页面加载到soup,而是加载了一个全新的源页面
单击后,我添加了一个driver.page_源,给浏览器足够的时间加载(10秒),然后增强了页面源。它现在可以工作了
# use Selenium to get buttons through all pages
test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test = BeautifulSoup(requests.get(test_url, headers=headers).content, 'html.parser')
btn_count = []
for btn_cnt in test.select('.js-sku-selector > div'):
btn_cnt = btn_cnt['data-attributes'].count('isSelected')
btn_count.append(btn_cnt)
buttons = list(range(1,btn_cnt+1))
xpath = []
for b in buttons:
btn_path = '//*[@id="variation-Size"]/div[2]/div[' + str(b) + ']/div/label'
print(btn_path)
xpath.append(btn_path)
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format('brand', 'product', 'id','auto_ship', 'regular','rating'))
for btn in xpath:
test_url = 'https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
driver = webdriver.Chrome(executable_path=r'C:\Users\public\chromedriver')
driver.get(test_url)
time.sleep(1)
driver.find_element_by_xpath(btn).click()
time.sleep(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
for brand, product, id, auto_ship, price, rating in zip(soup.findAll('span', attrs={'itemprop': 'brand'}),
soup.findAll('div', attrs={'id': 'product-title'}),
soup.findAll('div', attrs={'class': 'value js-part-number'}),
soup.findAll('p', attrs={'class': 'autoship-pricing p'}),
soup.findAll('span', attrs={'class': 'ga-eec__price'}),
soup.select('div.ugc')):
#date = date.today()
brand = brand.text
product = ' '.join(product.h1.text.split())
id = ' '.join(id.span.text.split())
p1 = auto_ship.text.index('(')
auto_ship = ' '.join(auto_ship.text[:p1].split())
regular_price = ' '.join(price.text.split())
rating = rating.picture.img['src'][-7:-4].replace('_', '.')
print('{:<25}{:<100}{:<15}{:<15}{:<15}{:<15}'.format(brand, product, id, auto_ship, regular_price, rating))
driver.quit()
#使用Selenium让按钮浏览所有页面
测试https://www.chewy.com/wellness-large-breed-complete-health/dp/34356'
test=BeautifulSoup(requests.get(test\url,headers=headers.content,'html.parser')
btn_计数=[]
对于测试中的btn_cnt。选择('.js sku selector>div'):
btn_cnt=btn_cnt['data-attributes'].计数('isSelected'))
btn\u计数。追加(btn\u cnt)
按钮=列表(范围(1,btn\U cnt+1))
xpath=[]
对于b in按钮:
btn_path='/*[@id=“variation Size”]/div[2]/div['+str(b)+']/div/label'
打印(btn\U路径)
xpath.append(btn_路径)
打印(“{:您希望单击单选按钮。现在您正在单击标签。您希望单击单选按钮。现在您正在单击标签。