Python 脚本不'；如果元件未定位，则无法工作_Python_Python 3.x_Selenium_Selenium Webdriver_Web Scraping

Python 脚本不'；如果元件未定位，则无法工作

python python-3.x selenium selenium-webdriver web-scraping

Python 脚本不'；如果元件未定位，则无法工作,python,python-3.x,selenium,selenium-webdriver,web-scraping,Python,Python 3.x,Selenium,Selenium Webdriver,Web Scraping,我试图使网站自动化，但当它转到一个没有任何元素的页面时，它就不再运行了。例如，本页：我想做的是，如果它不存在任何细节，它应该返回，然后继续下一个谢谢你的帮助。这是我的密码： import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from seleni

我试图使网站自动化，但当它转到一个没有任何元素的页面时，它就不再运行了。例如，本页：我想做的是，如果它不存在任何细节，它应该返回，然后继续下一个

谢谢你的帮助。这是我的密码：

import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])

# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", { 
#     "profile.default_content_setting_values.notifications": 1 
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')

driver.implicitly_wait(100)

url = "https://www.marks4sure.com/allexams.html"

driver.get(url)

links = []
exam_code = []
exam_name = []
total_q = []

for x in range(70):
    for i in range(1, 57):
        more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
        links.append(more_details.get_attribute('href'))
        more_details.click()

        try:
            code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
            exam_code.append(code.text)
        except:
            print('N/A')
        try:
            name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
            exam_name.append(name.text)
        except:
            print('N/A')
        try:
            question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
            total_q.append(question.text)
        except:
            print('N/A')

        driver.back()

    next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
    next_page.click()

all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)

df = pd.DataFrame(all_info, columns = ["Links", "Exam Code", "Exam Name", "Total Question"])
df.to_csv("data.csv", encoding = 'utf-8')

driver.close()

在查找

更多详细信息时，您需要捕获NoTouchElementException
，返回并继续循环的下一次迭代：
for x in range(70):
    for i in range(1, 57):
        more_details = None

        try:
            more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
        except NoSuchElementException:
            driver.back()
            continue

        links.append(more_details.get_attribute('href'))
        # rest of the code...

您没有检查更多详细信息元素
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time

option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])

# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
#     "profile.default_content_setting_values.notifications": 1
# })

driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')


driver.implicitly_wait(0.5)

url = "https://www.marks4sure.com/allexams.html"

driver.get(url)

links = []
exam_code = []
exam_name = []
total_q = []

for x in range(70):
    for i in range(1, 57):
        try:
            more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
            links.append(more_details.get_attribute('href'))
            more_details.click()
        except NoSuchElementException:
            continue

        try:
            if driver.find_element_by_xpath('/html/body/div[4]/div').get_attribute('class') == 'alert alert-danger':
                drier.back()
                continue
        except NoSuchElementException:
            pass

        try:
            code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
            exam_code.append(code.text)
        except:
            print('N/A')
        try:
            name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
            exam_name.append(name.text)
        except:
            print('N/A')
        try:
            question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
            total_q.append(question.text)
        except:
            print('N/A')

        driver.back()
    try:
        next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
        next_page.click()
    except NoSuchElementException:
        driver.refresh()

all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)

df = pd.DataFrame(all_info, columns=["Links", "Exam Code", "Exam Name", "Total Question"])

driver.close()

有人在这里帮助吗？请编辑您的问题，包括您得到的错误和堆栈跟踪。嗨，@4rigener我尝试了您的代码，但它不起作用。请重新检查一下好吗？@Awais如果你添加了错误，那么它会有帮助。好吧，如果你运行脚本，它会工作得很好，但是当脚本正在刮取的页面没有“代码”、“名称”或“问题”时，脚本会停在那里，不会回到列表页面刮取下一个页面。如果你运行脚本，你就会明白我的意思：）@Awais我编辑代码，这个怎么样？我以前测试过代码，但我不能，因为网站也会有同样的问题。我给你做了一个视频。这将更好地解释你：如果脚本没有看到任何内容，我希望它跳过。嗨，格雷格，我尝试了你的代码，但它不起作用。你能再检查一下吗？这仍然是同一个问题。