Python 脚本不';如果元件未定位,则无法工作

Python 脚本不';如果元件未定位,则无法工作,python,python-3.x,selenium,selenium-webdriver,web-scraping,Python,Python 3.x,Selenium,Selenium Webdriver,Web Scraping,我试图使网站自动化,但当它转到一个没有任何元素的页面时,它就不再运行了。例如,本页: 我想做的是,如果它不存在任何细节,它应该返回,然后继续下一个 谢谢你的帮助。 这是我的密码: import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from seleni

我试图使网站自动化,但当它转到一个没有任何元素的页面时,它就不再运行了。例如,本页: 我想做的是,如果它不存在任何细节,它应该返回,然后继续下一个

谢谢你的帮助。 这是我的密码:

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])

# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", { 
#     "profile.default_content_setting_values.notifications": 1 
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')

driver.implicitly_wait(100)

url = "https://www.marks4sure.com/allexams.html"

driver.get(url)

links = []
exam_code = []
exam_name = []
total_q = []

for x in range(70):
    for i in range(1, 57):
        more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
        links.append(more_details.get_attribute('href'))
        more_details.click()

        try:
            code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
            exam_code.append(code.text)
        except:
            print('N/A')
        try:
            name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
            exam_name.append(name.text)
        except:
            print('N/A')
        try:
            question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
            total_q.append(question.text)
        except:
            print('N/A')

        driver.back()

    next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
    next_page.click()

all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)

df = pd.DataFrame(all_info, columns = ["Links", "Exam Code", "Exam Name", "Total Question"])
df.to_csv("data.csv", encoding = 'utf-8')

driver.close()

在查找
更多详细信息时,您需要捕获
NoTouchElementException
,返回并继续循环的下一次迭代:

for x in range(70):
    for i in range(1, 57):
        more_details = None

        try:
            more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
        except NoSuchElementException:
            driver.back()
            continue

        links.append(more_details.get_attribute('href'))
        # rest of the code...

您没有检查更多详细信息元素

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time

option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])

# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
#     "profile.default_content_setting_values.notifications": 1
# })

driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')


driver.implicitly_wait(0.5)

url = "https://www.marks4sure.com/allexams.html"

driver.get(url)

links = []
exam_code = []
exam_name = []
total_q = []

for x in range(70):
    for i in range(1, 57):
        try:
            more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
            links.append(more_details.get_attribute('href'))
            more_details.click()
        except NoSuchElementException:
            continue

        try:
            if driver.find_element_by_xpath('/html/body/div[4]/div').get_attribute('class') == 'alert alert-danger':
                drier.back()
                continue
        except NoSuchElementException:
            pass

        try:
            code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
            exam_code.append(code.text)
        except:
            print('N/A')
        try:
            name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
            exam_name.append(name.text)
        except:
            print('N/A')
        try:
            question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
            total_q.append(question.text)
        except:
            print('N/A')

        driver.back()
    try:
        next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
        next_page.click()
    except NoSuchElementException:
        driver.refresh()

all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)

df = pd.DataFrame(all_info, columns=["Links", "Exam Code", "Exam Name", "Total Question"])

driver.close()

有人在这里帮助吗?请编辑您的问题,包括您得到的错误和堆栈跟踪。嗨,@4rigener我尝试了您的代码,但它不起作用。请重新检查一下好吗?@Awais如果你添加了错误,那么它会有帮助。好吧,如果你运行脚本,它会工作得很好,但是当脚本正在刮取的页面没有“代码”、“名称”或“问题”时,脚本会停在那里,不会回到列表页面刮取下一个页面。如果你运行脚本,你就会明白我的意思:)@Awais我编辑代码,这个怎么样?我以前测试过代码,但我不能,因为网站也会有同样的问题。我给你做了一个视频。这将更好地解释你:如果脚本没有看到任何内容,我希望它跳过。嗨,格雷格,我尝试了你的代码,但它不起作用。你能再检查一下吗?这仍然是同一个问题。