Python 继续使用Selenium获取StaleElement异常_Python_Selenium Chromedriver

Python 继续使用Selenium获取StaleElement异常

python

Python 继续使用Selenium获取StaleElement异常,python,selenium-chromedriver,Python,Selenium Chromedriver,我试图通过使用搜索词，然后对搜索结果进行爬网，来刮取Xero的应用程序目录。我尝试了隐式等待，在main和其他函数中捕获陈旧的元素异常，并将脚本的速度减慢到无效。问题似乎出在页面爬网代码中，但不知道为什么应用程序对象中的内容总是过时的 from selenium import webdriver from selenium.webdriver.common.keys import Keys from time import sleep from random import randint fr

我试图通过使用搜索词，然后对搜索结果进行爬网，来刮取Xero的应用程序目录。我尝试了隐式等待，在main和其他函数中捕获陈旧的元素异常，并将脚本的速度减慢到无效。问题似乎出在页面爬网代码中，但不知道为什么应用程序对象中的内容总是过时的

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from random import randint 
from selenium.common import exceptions  


driver = webdriver.Chrome()
driver.implicitly_wait(5) #let DOM load
#load search URL
searchTerms = ["food and beverage","dentistry","auto dealerships","biotechnology","family office","clinic","laboratory","distribution","distributor","wholesale","calculate quotes","get funded","make payments","manage customers","manage human resources","manage projects","perform analytics","prepare taxes","receive payments","run payroll","send invoices","sync data","track expenses","track inventory","track time","agriculture","automotive","construction","education","franchise","health","hospitality","manufacturing","not for profit","professional services","realty","property","hospitality","retail","tourism","bills","CRM","Conversions","debtor","documents","e-commerce","financial services","inventory","invoicing","payments","payroll","HR","point of sale","practice management","reporting","consolidation","cashflow","investments"]
baseURL = "https://apps.xero.com/us/search?q="


#grab cards for each app in search term using class name

def main(searchTerms):
    with open('XeroScrapeout.csv','w',newline='') as f:
        try:
            #limited the search terms to a few I know will need page crawls
            for term in searchTerms[48:50]:
                #generate new list of apps and ratings for each page load based on search term
                apps= loadPage(baseURL,term)
                #write the apps from each page after it loads and specify which term is for each batch of apps
                appsScraped = writeApps(apps,term)
                writeApp(appsScraped,f)
        except exceptions.StaleElementReferenceException as e:
            print(e,term,"len(apps) is...",len(apps),"appsScraped...",appsScraped)
            pass
    f.close()
    driver.close()

def loadPage(baseURL,term):
    apps =[]
    #sleep(randint(10,30))
    url = baseURL+term
    driver.get(url)
    
    #allow following pages     
    pages2crawl = driver.find_elements_by_class_name("xui-button-medium")
    for page in pages2crawl:
        try:
            page2click = driver.find_element_by_tag_name("a")
            page2click.click()
            sleep(2)
            apps.append(driver.find_elements_by_class_name("mp-card__content"))
        except exceptions.StaleElementReferenceException as e:
            print(e,term,"x is...",x,"page2click is...",page2click.text,"\n","len apps is...",len(apps))
            pass
    #apps = driver.find_elements_by_class_name("mp-card__content")
    return apps

#need to add code that grabs url of app
#writes text in m-card_content - app name, description, number of ratings
def writeApps(apps,term):
    appsScraped=[]
    for x in range(0,len(apps[0])):
        try:
            #code to write app.text to csv
            #adding again due to stale error
            import pdb;pdb.set_trace()
            apps = driver.find_elements_by_class_name("mp-card__content")
            a=apps[0][x].text
            a = a.split('\n')
            line = term+";"+a[0]+";"+a[1]+";"+a[2]+";"+a[3]+";"
            appsScraped.append(line)
        except exceptions.StaleElementReferenceException as e:
            print(e)
            pass
    return appsScraped

def writeApp(appsScraped,f):
    if appsScraped:
        try:
            for x in range(0,len(appsScraped)):
                finalLine = appsScraped[x]+";"+'\n'
                f.write(finalLine)
        except:
            pass    
main(searchTerms)

当页面中生成了新的元素时，我遇到了过时的元素，比如我正在抓取的弹出窗口或滚动库。试着让你的抓取更加集中，减少错误元素的空间，或者检查页面并禁用任何未使用的元素，然后看看它是否有效

陈旧元素=元素不再存在