Python 继续使用Selenium获取StaleElement异常
我试图通过使用搜索词,然后对搜索结果进行爬网,来刮取Xero的应用程序目录。我尝试了隐式等待,在main和其他函数中捕获陈旧的元素异常,并将脚本的速度减慢到无效。问题似乎出在页面爬网代码中,但不知道为什么应用程序对象中的内容总是过时的Python 继续使用Selenium获取StaleElement异常,python,selenium-chromedriver,Python,Selenium Chromedriver,我试图通过使用搜索词,然后对搜索结果进行爬网,来刮取Xero的应用程序目录。我尝试了隐式等待,在main和其他函数中捕获陈旧的元素异常,并将脚本的速度减慢到无效。问题似乎出在页面爬网代码中,但不知道为什么应用程序对象中的内容总是过时的 from selenium import webdriver from selenium.webdriver.common.keys import Keys from time import sleep from random import randint fr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from random import randint
from selenium.common import exceptions
driver = webdriver.Chrome()
driver.implicitly_wait(5) #let DOM load
#load search URL
searchTerms = ["food and beverage","dentistry","auto dealerships","biotechnology","family office","clinic","laboratory","distribution","distributor","wholesale","calculate quotes","get funded","make payments","manage customers","manage human resources","manage projects","perform analytics","prepare taxes","receive payments","run payroll","send invoices","sync data","track expenses","track inventory","track time","agriculture","automotive","construction","education","franchise","health","hospitality","manufacturing","not for profit","professional services","realty","property","hospitality","retail","tourism","bills","CRM","Conversions","debtor","documents","e-commerce","financial services","inventory","invoicing","payments","payroll","HR","point of sale","practice management","reporting","consolidation","cashflow","investments"]
baseURL = "https://apps.xero.com/us/search?q="
#grab cards for each app in search term using class name
def main(searchTerms):
with open('XeroScrapeout.csv','w',newline='') as f:
try:
#limited the search terms to a few I know will need page crawls
for term in searchTerms[48:50]:
#generate new list of apps and ratings for each page load based on search term
apps= loadPage(baseURL,term)
#write the apps from each page after it loads and specify which term is for each batch of apps
appsScraped = writeApps(apps,term)
writeApp(appsScraped,f)
except exceptions.StaleElementReferenceException as e:
print(e,term,"len(apps) is...",len(apps),"appsScraped...",appsScraped)
pass
f.close()
driver.close()
def loadPage(baseURL,term):
apps =[]
#sleep(randint(10,30))
url = baseURL+term
driver.get(url)
#allow following pages
pages2crawl = driver.find_elements_by_class_name("xui-button-medium")
for page in pages2crawl:
try:
page2click = driver.find_element_by_tag_name("a")
page2click.click()
sleep(2)
apps.append(driver.find_elements_by_class_name("mp-card__content"))
except exceptions.StaleElementReferenceException as e:
print(e,term,"x is...",x,"page2click is...",page2click.text,"\n","len apps is...",len(apps))
pass
#apps = driver.find_elements_by_class_name("mp-card__content")
return apps
#need to add code that grabs url of app
#writes text in m-card_content - app name, description, number of ratings
def writeApps(apps,term):
appsScraped=[]
for x in range(0,len(apps[0])):
try:
#code to write app.text to csv
#adding again due to stale error
import pdb;pdb.set_trace()
apps = driver.find_elements_by_class_name("mp-card__content")
a=apps[0][x].text
a = a.split('\n')
line = term+";"+a[0]+";"+a[1]+";"+a[2]+";"+a[3]+";"
appsScraped.append(line)
except exceptions.StaleElementReferenceException as e:
print(e)
pass
return appsScraped
def writeApp(appsScraped,f):
if appsScraped:
try:
for x in range(0,len(appsScraped)):
finalLine = appsScraped[x]+";"+'\n'
f.write(finalLine)
except:
pass
main(searchTerms)
当页面中生成了新的元素时,我遇到了过时的元素,比如我正在抓取的弹出窗口或滚动库。试着让你的抓取更加集中,减少错误元素的空间,或者检查页面并禁用任何未使用的元素,然后看看它是否有效 陈旧元素=元素不再存在