Python 3.x 获取错误“;ElementNotInteractitableException:消息:element not Interactitable“;在python selenium中使用send_key()函数时

Python 3.x 获取错误“;ElementNotInteractitableException:消息:element not Interactitable“;在python selenium中使用send_key()函数时,python-3.x,selenium-webdriver,beautifulsoup,Python 3.x,Selenium Webdriver,Beautifulsoup,我尝试了CSS选择器,在搜索框中使用send_key()函数发送值,然后提交,这样我就可以得到特定年份的医生列表,但下面出现了错误 “ElementNotInteractitableException:消息:元素不可交互” 下面是我编写的代码: from selenium import webdriver from bs4 import BeautifulSoup as bs from selenium.webdriver.common.action_chains import ActionCh

我尝试了CSS选择器,在搜索框中使用send_key()函数发送值,然后提交,这样我就可以得到特定年份的医生列表,但下面出现了错误 “ElementNotInteractitableException:消息:元素不可交互”

下面是我编写的代码:

from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time

url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)

driver.find_element_by_xpath("//a[contains(text(),'Year of Registration')]").click()
driver.find_elements_by_css_selector("input[type='text']")[-1].send_keys("2015")
driver.find_element_by_css_selector("input[value='Submit']").click()

next_page = True
while next_page == True:
    soup = bs(driver.page_source, 'html.parser')
    table1 = soup.find('table',{'id':'doct_info2'})
    try:
        rows = table1.find_all('tr')

        for row in rows:
            if len(row.find_all('td')) == 7:
                data = row.find_all('td')

                link = data[6].click()

                soup2 = bs(link, 'html.parser')
                table2 = soup2.find('table',{'id':'doctorBiodata'})
                rows = table2.find_all('tr')
                print(rows)
    except:
         pass

    time.sleep(5)
    try:
        driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
    except:
        print ('No more pages')
        next_page=False

driver.close()

要在文本框中输入值,您需要导入
WebDriverWait
(),等待
元素可点击
()并使用
发送键
()

要获取表格,您需要导入
WebDriverWait
(),并等待表格可见
visibility\u(位于
()

代码

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time

url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[text()='Year of Registration']"))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//input[@id='doctor_year']"))).send_keys("2015")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[@id='doctor_year_details']"))).click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.table.table-bordered.dataTable.no-footer")))

next_page = True
while next_page == True:
    soup = bs(driver.page_source, 'html.parser')
    table1 = soup.find('table',{'id':'doct_info2'})
    try:
        rows = table1.find_all('tr')

        for row in rows:
            if len(row.find_all('td')) == 7:
                data = row.find_all('td')

                link = data[6].click()

                soup2 = bs(link, 'html.parser')
                table2 = soup2.find('table',{'id':'doctorBiodata'})
                rows = table2.find_all('tr')
                print(rows)
    except:
         pass

    time.sleep(5)
    try:
        driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
    except:
        print ('No more pages')
        next_page=False

driver.close()

你可以通过简单的请求更快地完成整个事情。更改draw参数以获得不同的页面。您可以动态添加或删除时间戳参数“\u1”。将“年份”更改为不同的年份。初始json响应提供了一个记录计数,因此很容易计算500个批次中所有结果的循环结束计数。使用
Session
对象提高循环中多个请求的tcp连接重用效率

import requests
import pandas as pd

params = (
    ('service', 'getPaginatedDoctor'),
    ('draw', '1'),
    ('columns[0][data]', '0'),
    ('columns[0][name]', ''),
    ('columns[0][searchable]', 'true'),
    ('columns[0][orderable]', 'true'),
    ('columns[0][search][value]', ''),
    ('columns[0][search][regex]', 'false'),
    ('columns[1][data]', '1'),
    ('columns[1][name]', ''),
    ('columns[1][searchable]', 'true'),
    ('columns[1][orderable]', 'true'),
    ('columns[1][search][value]', ''),
    ('columns[1][search][regex]', 'false'),
    ('columns[2][data]', '2'),
    ('columns[2][name]', ''),
    ('columns[2][searchable]', 'true'),
    ('columns[2][orderable]', 'true'),
    ('columns[2][search][value]', ''),
    ('columns[2][search][regex]', 'false'),
    ('columns[3][data]', '3'),
    ('columns[3][name]', ''),
    ('columns[3][searchable]', 'true'),
    ('columns[3][orderable]', 'true'),
    ('columns[3][search][value]', ''),
    ('columns[3][search][regex]', 'false'),
    ('columns[4][data]', '4'),
    ('columns[4][name]', ''),
    ('columns[4][searchable]', 'true'),
    ('columns[4][orderable]', 'true'),
    ('columns[4][search][value]', ''),
    ('columns[4][search][regex]', 'false'),
    ('columns[5][data]', '5'),
    ('columns[5][name]', ''),
    ('columns[5][searchable]', 'true'),
    ('columns[5][orderable]', 'true'),
    ('columns[5][search][value]', ''),
    ('columns[5][search][regex]', 'false'),
    ('columns[6][data]', '6'),
    ('columns[6][name]', ''),
    ('columns[6][searchable]', 'true'),
    ('columns[6][orderable]', 'true'),
    ('columns[6][search][value]', ''),
    ('columns[6][search][regex]', 'false'),
    ('order[0][column]', '0'),
    ('order[0][dir]', 'asc'),
    ('start', '0'),
    ('length', '500'),
    ('search[value]', ''),
    ('search[regex]', 'false'),
    ('name', ''),
    ('registrationNo', ''),
    ('smcId', ''),
    ('year', '2015'),
    ('_', '1577634512046'),
)

table_headers = ['Sl. No.','Year of Info','Registration Number','State Medical Councils','Name','Father Name','Action']
r = requests.get('https://mciindia.org/MCIRest/open/getPaginatedData',  params=params)
df = pd.DataFrame(r.json()['data'], columns = table_headers)
print(df)

以下代码用于第一个
10
结果

您可以将
长度
10
更改为
42354
,这是
2015年的最大值
,您将在同一秒内获得
out.csv
文件

第二个文件是
data.csv
,它将包含内部医生详细信息

页面是通过
JavaScript
呈现的,因此我从浏览器
Developer Tools
Network Table
下找到了
XHR
请求。这是
JS

import pandas as pd
import csv
import re
import requests


def Table():
    table = pd.read_json("https://mciindia.org/MCIRest/open/getPaginatedData?service=getPaginatedDoctor&draw=1&columns[0][data]=0&columns[0][name]=&columns[0][searchable]=true&columns[0][orderable]=true&columns[0][search][value]=&columns[0][search][regex]=false&columns[1][data]=1&columns[1][name]=&columns[1][searchable]=true&columns[1][orderable]=true&columns[1][search][value]=&columns[1][search][regex]=false&columns[2][data]=2&columns[2][name]=&columns[2][searchable]=true&columns[2][orderable]=true&columns[2][search][value]=&columns[2][search][regex]=false&columns[3][data]=3&columns[3][name]=&columns[3][searchable]=true&columns[3][orderable]=true&columns[3][search][value]=&columns[3][search][regex]=false&columns[4][data]=4&columns[4][name]=&columns[4][searchable]=true&columns[4][orderable]=true&columns[4][search][value]=&columns[4][search][regex]=false&columns[5][data]=5&columns[5][name]=&columns[5][searchable]=true&columns[5][orderable]=true&columns[5][search][value]=&columns[5][search][regex]=false&columns[6][data]=6&columns[6][name]=&columns[6][searchable]=true&columns[6][orderable]=true&columns[6][search][value]=&columns[6][search][regex]=false&order[0][column]=0&order[0][dir]=asc&start=0&length=10&search[value]=&search[regex]=false&year=2015&_=1577626804003")['data']
    with open('out.csv', 'w', newline="") as f:
        writer = csv.writer(f)
        writer.writerow(
            ['Year Of The Info', 'Registration#', 'State Medical Councils', 'Name', 'FatherName'])
        data = []
        for item in table:
            writer.writerow(item[1:6])
            required = item[6]
            match = re.search(
                r"openDoctorDetailsnew\('([^']*)', '([^']*)'", required)
            data.append(match.group().split("'")[1:4:2])
        print("Data Saved Into out.csv")
    return data


def Details():
    names = []
    items = []
    for doc, val in Table():
        print(f"Extracting DoctorID# {doc}, RegValue# {val}")
        json = {'doctorId': doc, 'regdNoValue': val}
        r = requests.post(
            "https://mciindia.org/MCIRest/open/getDataFromService?service=getDoctorDetailsByIdImr", json=json).json()
        if r.keys() not in names:
            names.append(r.keys())
        items.append(r.values())
    print("Done")
    return names, items


def Save():
    with open('data.csv', 'w', newline="") as d:
        writer = csv.writer(d)
        n, i = Details()
        writer.writerows(n)
        writer.writerows(i)


Save()
检查输出样本和

注意:如果你需要大量的内部数据,你必须查看一下concurrent.futures


是否要查看
2015
的整个搜索结果?以及每个
医生的全部数据
?检查我的回答我大约在2小时前完成了代码,但正在检查
TCP
转储,以便一次性访问内部医生详细信息。实际上,
OP
正在对内部医生详细信息进行整轮操作,您可以通过单击
view
按钮查看这些详细信息。tbh我没有费心玩params,但看起来您可能可以删除相当多的值空值可以关闭。但是对于内部表。不一样@NiteshRao:)欢迎你。对于mecan来说,这是一项非常有趣的任务,请在上面的代码中添加注释,这样我就可以理解您的代码,因为我不知道json在python中是如何工作的,也不知道从哪里获取“table=pd.read_json(在链接上方)”和post link获取此错误“解码“string”时UTF-8序列中的无效八位字节”如何解决此问题。