无法使用Selenium Python打印来自多个URL的数据
至于说这段代码可以工作,但我面临的问题是,只有一个url会在它之后通过错误刮取数据,如下图所示,这可以帮助我解决这个问题。它通过未创建的会话后只打印一个链接错误无法使用Selenium Python打印来自多个URL的数据,python,selenium,beautifulsoup,webdriver,Python,Selenium,Beautifulsoup,Webdriver,至于说这段代码可以工作,但我面临的问题是,只有一个url会在它之后通过错误刮取数据,如下图所示,这可以帮助我解决这个问题。它通过未创建的会话后只打印一个链接错误 import requests from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys im
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pandas as pd
import time
url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
"https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
"https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
"https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
"https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
"https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
for endpoint in url:
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
driver.get(endpoint)
time.sleep(5)
image= None
page_Source = driver.page_source
soup = BeautifulSoup(page_Source, 'html.parser')
content= soup.find_all('div',class_='c-product-description-features s-product-description-features row')
pid=soup.find('span',class_='product-id').text
links = soup.find_all('img', srcset=True)
for link in links:
img= link['srcset']
alt=link['alt']
print(pid ,img,alt)
在for循环之外定义chrome驱动程序实例。我还没有测试,但这应该可以工作
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
"https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
"https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
"https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
"https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
"https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
for endpoint in url:
driver.get(endpoint)
time.sleep(5)
image= None
page_Source = driver.page_source
soup = BeautifulSoup(page_Source, 'html.parser')
content= soup.find_all('div',class_='c-product-description-features s-product-description-features row')
pid=soup.find('span',class_='product-id').text
links = soup.find_all('img', srcset=True)
for link in links:
img= link['srcset']
alt=link['alt']
print(pid ,img,alt)
或在完成每个url后退出会话()
url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
"https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
"https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
"https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
"https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
"https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
for endpoint in url:
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
driver.get(endpoint)
time.sleep(5)
image= None
page_Source = driver.page_source
soup = BeautifulSoup(page_Source, 'html.parser')
content= soup.find_all('div',class_='c-product-description-features s-product-description-features row')
pid=soup.find('span',class_='product-id').text
links = soup.find_all('img', srcset=True)
for link in links:
img= link['srcset']
alt=link['alt']
print(pid ,img,alt)
driver.quit()
问题:
问题在于远程调试端口,您已将其用于一个实例
解决方案:
删除您希望打开不同浏览器会话的任何原因。您可以使用一个浏览器会话完成此操作,并导航URL。无论如何,你不是在做并行执行。为了避免这种情况,请使用
driver.quit()退出驱动程序。
@KunduK可以在同一浏览器上打开多个URL??查看您是否使用了正确的chromedriver版本我昨天使用了最新的chrome驱动程序我更新了
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
url = ["https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
"https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
"https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
"https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
"https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
"https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
count=0
for endpoint in url:
print(count)
count+=1
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
#options.add_argument("--remote-debugging-port=9222")
driver = webdriver.Chrome('./chromedriver.exe', options=options)
driver.get(endpoint)