为什么Python Selenium经常导致页面卸载?
这更多的是一个我理解的问题(并安抚我的沮丧情绪),而不是一个如何解决的问题,但正如问题所述;为什么在Selenium上加载URL/页面(在我的例子中使用Python)通常不会加载并抛出NosTouchElementException错误?我理解,与正常浏览一样,有时网页无法加载。但我发现,我加载URL/页面的尝试中有25%-50%在30秒超时的情况下不起作用,因此,在得到URL/页面最终加载的实例之前,我必须重试多达10次,每次尝试之间的超时时间不断增加 如果你能帮我理解,我将不胜感激 事先谢谢你的解释 示例代码 我目前正在试验为什么Python Selenium经常导致页面卸载?,python,selenium,web-scraping,Python,Selenium,Web Scraping,这更多的是一个我理解的问题(并安抚我的沮丧情绪),而不是一个如何解决的问题,但正如问题所述;为什么在Selenium上加载URL/页面(在我的例子中使用Python)通常不会加载并抛出NosTouchElementException错误?我理解,与正常浏览一样,有时网页无法加载。但我发现,我加载URL/页面的尝试中有25%-50%在30秒超时的情况下不起作用,因此,在得到URL/页面最终加载的实例之前,我必须重试多达10次,每次尝试之间的超时时间不断增加 如果你能帮我理解,我将不胜感激 事先谢谢
从selenium导入webdriver
从selenium.webdriver.chrome.options导入选项
从selenium.webdriver.common.by导入
从selenium.webdriver.support.ui导入WebDriverWait
从selenium.webdriver.support将预期的_条件导入为EC
从selenium.common.Exception导入NoTouchElementException
导入mysql.connector
导入时间
导入日期时间
从pyvirtualdisplay导入显示
显示=显示(可见=0,大小=(19201080))
display.start()
chrome_options=options()
chrome\u选项。添加\u参数(“--no sandbox”)
chrome_选项。添加_参数(“--disable setuid sandbox”)
driver=webdriver.Chrome(Chrome\u选项=Chrome\u选项)
con=mysql.connector.connect(*****)
cursor=con.cursor()
sql_用户_搜索=“**”
cursor.execute(sql\u用户\u搜索)
searches=cursor.fetchall()
对于搜索中的z:
偏移量=0
url=”https://www.carsales.com.au/cars/{0}/{1}/”。格式(z[2],z[4],偏移量)
睡眠时间=5
重试次数=100次
错误=0
对于范围内的循环Cow(0,重试次数):
尝试:
错误=0
获取驱动程序(url)
时间。睡眠(睡眠时间)
驱动程序。通过xpath(“”/*[@class=“result set container”]”)查找元素。获取属性(“outerHTML”)
打印(“成功”)
除无任何例外:
打印(“错误”)
错误=1
通过
如果错误==1:
时间。sleep(sleep#时间)#在再次尝试获取数据之前等待
sleep_time+=1#在此处实施退避算法,即指数退避
其他:
打破
total_pagination=driver。通过xpath(“”/div[@class=“tabbed pagination”]/div[@class=“pagination container”]/div[@class=“pagination container”]/div[@class=“pagination”]/p”“”[0]查找_元素。文本
页数拆分=分页总数拆分(“”)
页数=int(页数拆分[1])
第页=0
当页面<页数\u时:
偏移量=第12页
url=”https://www.carsales.com.au/cars/{0}/{1}/?offset={2}”。格式(z[2],z[4],offset)
打印(url)
睡眠时间=5
重试次数=100次
错误=0
对于范围内的LoopyLop(0,重试次数):
尝试:
错误=0
获取驱动程序(url)
时间。睡眠(睡眠时间)
驱动程序。通过xpath(“”/*[@class=“result set container”]”)查找元素。获取属性(“outerHTML”)
打印(“成功”)
除无任何例外:
打印(“错误”)
错误=1
通过
如果错误==1:
时间。sleep(sleep#时间)#在再次尝试获取数据之前等待
sleep_time+=1#在此处实施退避算法,即指数退避
其他:
打破
rows=driver.find\u elements\u by_xpath(“//div[contains(@class,“listing item”)]”)
计数=len(行)
i=0
当我数的时候:
title=rows[i]。通过xpath(“//div[contains(@class,“title”)]/a/h2”“”[i]查找元素[u]。文本
i=i+1
query=“”****”。格式(*****)
cursor.execute(查询)
con.commit()
第页=第+1页
cursor.close()
con.close()
driver.quit()
display.popen.kill()
打印(“成功”)
具有30秒超时的第二个示例代码
该网站是
从selenium导入webdriver
从selenium.webdriver.chrome.options导入选项
从selenium.webdriver.common.by导入
从selenium.webdriver.support.ui导入WebDriverWait
从selenium.webdriver.support将预期的_条件导入为EC
导入mysql.connector
导入时间
从pyvirtualdisplay导入显示
显示=显示(可见=0,大小=(19201080))
display.start()
chrome_options=options()
chrome\u选项。添加\u参数(“--no sandbox”)
chrome_选项。添加_参数(“--disable setuid sandbox”)
driver=webdriver.Chrome(Chrome\u选项=Chrome\u选项)
date=int(time.strftime(“%d”))
月=int(time.strftime(“%m”))
con=mysql.connector.connect(*****)
cursor=con.cursor()
对于范围(11,13)内的z:
如果z==9:
结束日期=31
elif z==10:
结束日期=32
elif z==11:
结束日期=31
elif z==12:
结束日期=32
elif z==8:
结束日期=32
开始日期=1
如果z==月份且(结束日期-日期)<5:
开始日期=结束日期
elif z==(月+1)和(结束日期-日期)<5:
开始日期=开始日期+4-(结束日期-日期)
elif z>月份:
开始日期=1
其他:
开始日期=日期
打印(z)
打印(开始日期)
打印(结束日期)
对于范围内的x(开始日期、结束日期):
时间。睡眠(2)
x_url=str(x).zfill(2)
z_url=str(z).zfill(2)
日期=x_url+“-”+z_url
url=”https://www.tiket.com/pesawat/cari?d=DPS&a=JKT&date=2017-{1} -{0}&成人=2&儿童=0&婴儿=0“。格式(x_url,z_url)
打印(url)
获取驱动程序(url)
时间。睡眠(30)
last_height=驱动程序。执行_脚本(“return document.bo
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import mysql.connector
import time
import datetime
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080))
display.start()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-setuid-sandbox")
driver = webdriver.Chrome(chrome_options=chrome_options)
con = mysql.connector.connect(*****)
cursor = con.cursor()
sql_user_searches = "****"
cursor.execute(sql_user_searches)
searches = cursor.fetchall()
for z in searches:
offset = 0
url = "https://www.carsales.com.au/cars/{0}/{1}/".format(z[2],z[4],offset)
sleep_time = 5
num_retries = 100
error = 0
for loopingcow in range(0, num_retries):
try:
error = 0
driver.get(url)
time.sleep(sleep_time)
driver.find_element_by_xpath("""//*[@class="result-set-container "]""").get_attribute("outerHTML")
print("success")
except NoSuchElementException:
print("error")
error = 1
pass
if error == 1:
time.sleep(sleep_time) # wait before trying to fetch the data again
sleep_time += 1 # Implement your backoff algorithm here i.e. exponential backoff
else:
break
total_pagination = driver.find_elements_by_xpath("""//div[@class="tabbed-pagination"]/div[@class="pagination-container"]/div[@class="pagination-container"]/div[@class="pagination"]/p""")[0].text
number_of_pages_split = total_pagination.split(" ")
number_of_pages = int(number_of_pages_split[1])
page = 0
while page < number_of_pages:
offset = page * 12
url = "https://www.carsales.com.au/cars/{0}/{1}/?offset={2}".format(z[2],z[4],offset)
print(url)
sleep_time = 5
num_retries = 100
error = 0
for loopyloop in range(0, num_retries):
try:
error = 0
driver.get(url)
time.sleep(sleep_time)
driver.find_element_by_xpath("""//*[@class="result-set-container "]""").get_attribute("outerHTML")
print("success")
except NoSuchElementException:
print("error")
error = 1
pass
if error == 1:
time.sleep(sleep_time) # wait before trying to fetch the data again
sleep_time += 1 # Implement your backoff algorithm here i.e. exponential backoff
else:
break
rows = driver.find_elements_by_xpath("""//div[contains(@class,"listing-item")]""")
count = len(rows)
i = 0
while i < count:
title = rows[i].find_elements_by_xpath("""//div[contains(@class,"title ")]/a/h2""")[i].text
i = i + 1
query = """****""".format(*****)
cursor.execute(query)
con.commit()
page = page + 1
cursor.close()
con.close()
driver.quit()
display.popen.kill()
print("success")
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import mysql.connector
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080))
display.start()
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-setuid-sandbox")
driver = webdriver.Chrome(chrome_options=chrome_options)
date = int(time.strftime("%d"))
month = int(time.strftime("%m"))
con = mysql.connector.connect(*****)
cursor = con.cursor()
for z in range(11, 13):
if z == 9:
end_date = 31
elif z == 10:
end_date = 32
elif z == 11:
end_date = 31
elif z == 12:
end_date = 32
elif z == 8:
end_date = 32
start_date = 1
if z == month and (end_date - date) < 5:
start_date = end_date
elif z == (month + 1) and (end_date - date) < 5:
start_date = start_date + 4 - (end_date - date)
elif z > month:
start_date = 1
else:
start_date = date
print(z)
print(start_date)
print(end_date)
for x in range(start_date, end_date):
time.sleep(2)
x_url = str(x).zfill(2)
z_url = str(z).zfill(2)
date = x_url + "-" + z_url
url = "https://www.tiket.com/pesawat/cari?d=DPS&a=JKT&date=2017-{1}-{0}&adult=2&child=0&infant=0".format(x_url,z_url)
print(url)
driver.get(url)
time.sleep(30)
last_height = driver.execute_script("return document.body.scrollHeight")
print(last_height)
w = 0
while w < last_height:
print("Success")
w = last_height
try:
time.sleep(30)
print(driver.find_element_by_xpath("""//*[@id="tbody_depart"]""").get_attribute("outerHTML"))
rows = driver.find_elements_by_xpath("""//tr[contains(@id,"flight")]""")
for row in rows:
airline = row.get_attribute("data-airlinesname")
price = row.get_attribute("data-price")
departure = row.get_attribute("data-depart")
arrival = row.get_attribute("data-arrival")
baggage = row.get_attribute("data-baggage")
stops = row.get_attribute("data-stoptext")
query = """****""".format(******)
print(query)
cursor.execute(query)
con.commit()
except:
driver.get(url)
time.sleep(30)
print(driver.find_element_by_xpath("""//*[@id="tbody_depart"]""").get_attribute("outerHTML"))
rows = driver.find_elements_by_xpath("""//tr[contains(@id,"flight")]""")
for row in rows:
airline = row.get_attribute("data-airlinesname")
price = row.get_attribute("data-price")
departure = row.get_attribute("data-depart")
arrival = row.get_attribute("data-arrival")
baggage = row.get_attribute("data-baggage")
stops = row.get_attribute("data-stoptext")
query = """*****""".format(*****)
print(query)
cursor.execute(query)
con.commit()
cursor.close()
con.close()
driver.close()
display.popen.kill()