Python 如何点击";“下一步”;用硒擦掉所有的页面?

Python 如何点击";“下一步”;用硒擦掉所有的页面?,python,selenium-webdriver,Python,Selenium Webdriver,我有以下代码来抓取网站: from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from selenium im

我有以下代码来抓取网站:

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from functools import reduce

def stats_canada():
     driver = webdriver.Chrome('/Users/wwds/Desktop/chromedriver')
     driver.get('https://www150.statcan.gc.ca/n1/en/type/data?count=100&p=0-data/tables%2C-All#tables')
     WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "tables-lnk"))).click()
     elements = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "#all a[target='_self']")))
     linkTitles = pd.DataFrame([title.text for title in elements]).rename(columns = {0 : 'Name'})
     links = pd.DataFrame([link.get_attribute("href") for link in elements]).rename(columns = {0 : 'Link'})
     elements = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "#all span[class='ndm-result-date']")))
     release_date = pd.DataFrame([date.text for date in elements]).rename(columns = {'0' : 'Release Date'}).rename(columns = {0 : 'Release Date'})
     elements = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "#all div[class='ndm-result-productid']")))
     table_id = pd.DataFrame([table.text for table in elements]).rename(columns = {0 : 'Table ID'})
     table_id['Table ID'] = table_id['Table ID'].str.replace("Table: ", "")
     data = reduce(lambda x,y: pd.merge(x, y, left_index = True, right_index = True), [linkTitles, links, release_date, table_id])
     return data


stats_canada()
我还可以导航到下一页:

driver.find_element_by_partial_link_text('Next').click()

但是,我想知道如何将其修改到我的函数中,以便它将刮除所有页面并将其附加到一个数据帧中?

您只需要获得页面数并对其进行迭代:

import re
import time

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver


def stats_canada():
     driver = webdriver.Chrome()
     url = 'https://www150.statcan.gc.ca/n1/en/type/data?count=100&p={}-data/tables%2C-All#tables'
     driver.get(url.format(0))
     WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "tables-lnk"))).click()

     pagination = [i for i in driver.find_elements_by_css_selector('#tables .tgl-panel .pagination.pagination-sm li')][-2]
     count_pages = re.search(r'to page (\d+) of', pagination.text).group(1)

     for page in range(1, int(count_pages)):
         driver.get(url.format(page))

stats_canada()

谢谢你的回复!看起来它似乎贯穿了所有的页面,但是,我如何让它与其余的函数一起工作来获取信息呢?