Python 3.x 获取错误“;ElementNotInteractitableException:消息:element not Interactitable“;在python selenium中使用send_key()函数时
我尝试了CSS选择器,在搜索框中使用send_key()函数发送值,然后提交,这样我就可以得到特定年份的医生列表,但下面出现了错误 “ElementNotInteractitableException:消息:元素不可交互” 下面是我编写的代码:Python 3.x 获取错误“;ElementNotInteractitableException:消息:element not Interactitable“;在python selenium中使用send_key()函数时,python-3.x,selenium-webdriver,beautifulsoup,Python 3.x,Selenium Webdriver,Beautifulsoup,我尝试了CSS选择器,在搜索框中使用send_key()函数发送值,然后提交,这样我就可以得到特定年份的医生列表,但下面出现了错误 “ElementNotInteractitableException:消息:元素不可交互” 下面是我编写的代码: from selenium import webdriver from bs4 import BeautifulSoup as bs from selenium.webdriver.common.action_chains import ActionCh
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
driver.find_element_by_xpath("//a[contains(text(),'Year of Registration')]").click()
driver.find_elements_by_css_selector("input[type='text']")[-1].send_keys("2015")
driver.find_element_by_css_selector("input[value='Submit']").click()
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
要在文本框中输入值,您需要导入
WebDriverWait
(),等待元素可点击()并使用发送键
()
要获取表格,您需要导入WebDriverWait
(),并等待表格可见visibility\u(位于()
代码:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[text()='Year of Registration']"))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//input[@id='doctor_year']"))).send_keys("2015")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[@id='doctor_year_details']"))).click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.table.table-bordered.dataTable.no-footer")))
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
你可以通过简单的请求更快地完成整个事情。更改draw参数以获得不同的页面。您可以动态添加或删除时间戳参数“\u1”。将“年份”更改为不同的年份。初始json响应提供了一个记录计数,因此很容易计算500个批次中所有结果的循环结束计数。使用Session
对象提高循环中多个请求的tcp连接重用效率
import requests
import pandas as pd
params = (
('service', 'getPaginatedDoctor'),
('draw', '1'),
('columns[0][data]', '0'),
('columns[0][name]', ''),
('columns[0][searchable]', 'true'),
('columns[0][orderable]', 'true'),
('columns[0][search][value]', ''),
('columns[0][search][regex]', 'false'),
('columns[1][data]', '1'),
('columns[1][name]', ''),
('columns[1][searchable]', 'true'),
('columns[1][orderable]', 'true'),
('columns[1][search][value]', ''),
('columns[1][search][regex]', 'false'),
('columns[2][data]', '2'),
('columns[2][name]', ''),
('columns[2][searchable]', 'true'),
('columns[2][orderable]', 'true'),
('columns[2][search][value]', ''),
('columns[2][search][regex]', 'false'),
('columns[3][data]', '3'),
('columns[3][name]', ''),
('columns[3][searchable]', 'true'),
('columns[3][orderable]', 'true'),
('columns[3][search][value]', ''),
('columns[3][search][regex]', 'false'),
('columns[4][data]', '4'),
('columns[4][name]', ''),
('columns[4][searchable]', 'true'),
('columns[4][orderable]', 'true'),
('columns[4][search][value]', ''),
('columns[4][search][regex]', 'false'),
('columns[5][data]', '5'),
('columns[5][name]', ''),
('columns[5][searchable]', 'true'),
('columns[5][orderable]', 'true'),
('columns[5][search][value]', ''),
('columns[5][search][regex]', 'false'),
('columns[6][data]', '6'),
('columns[6][name]', ''),
('columns[6][searchable]', 'true'),
('columns[6][orderable]', 'true'),
('columns[6][search][value]', ''),
('columns[6][search][regex]', 'false'),
('order[0][column]', '0'),
('order[0][dir]', 'asc'),
('start', '0'),
('length', '500'),
('search[value]', ''),
('search[regex]', 'false'),
('name', ''),
('registrationNo', ''),
('smcId', ''),
('year', '2015'),
('_', '1577634512046'),
)
table_headers = ['Sl. No.','Year of Info','Registration Number','State Medical Councils','Name','Father Name','Action']
r = requests.get('https://mciindia.org/MCIRest/open/getPaginatedData', params=params)
df = pd.DataFrame(r.json()['data'], columns = table_headers)
print(df)
以下代码用于第一个10
结果
您可以将长度
从10
更改为42354
,这是2015年的最大值
,您将在同一秒内获得out.csv
文件
第二个文件是data.csv
,它将包含内部医生详细信息
页面是通过JavaScript
呈现的,因此我从浏览器Developer Tools
在Network Table
下找到了XHR
请求。这是JS
import pandas as pd
import csv
import re
import requests
def Table():
table = pd.read_json("https://mciindia.org/MCIRest/open/getPaginatedData?service=getPaginatedDoctor&draw=1&columns[0][data]=0&columns[0][name]=&columns[0][searchable]=true&columns[0][orderable]=true&columns[0][search][value]=&columns[0][search][regex]=false&columns[1][data]=1&columns[1][name]=&columns[1][searchable]=true&columns[1][orderable]=true&columns[1][search][value]=&columns[1][search][regex]=false&columns[2][data]=2&columns[2][name]=&columns[2][searchable]=true&columns[2][orderable]=true&columns[2][search][value]=&columns[2][search][regex]=false&columns[3][data]=3&columns[3][name]=&columns[3][searchable]=true&columns[3][orderable]=true&columns[3][search][value]=&columns[3][search][regex]=false&columns[4][data]=4&columns[4][name]=&columns[4][searchable]=true&columns[4][orderable]=true&columns[4][search][value]=&columns[4][search][regex]=false&columns[5][data]=5&columns[5][name]=&columns[5][searchable]=true&columns[5][orderable]=true&columns[5][search][value]=&columns[5][search][regex]=false&columns[6][data]=6&columns[6][name]=&columns[6][searchable]=true&columns[6][orderable]=true&columns[6][search][value]=&columns[6][search][regex]=false&order[0][column]=0&order[0][dir]=asc&start=0&length=10&search[value]=&search[regex]=false&year=2015&_=1577626804003")['data']
with open('out.csv', 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(
['Year Of The Info', 'Registration#', 'State Medical Councils', 'Name', 'FatherName'])
data = []
for item in table:
writer.writerow(item[1:6])
required = item[6]
match = re.search(
r"openDoctorDetailsnew\('([^']*)', '([^']*)'", required)
data.append(match.group().split("'")[1:4:2])
print("Data Saved Into out.csv")
return data
def Details():
names = []
items = []
for doc, val in Table():
print(f"Extracting DoctorID# {doc}, RegValue# {val}")
json = {'doctorId': doc, 'regdNoValue': val}
r = requests.post(
"https://mciindia.org/MCIRest/open/getDataFromService?service=getDoctorDetailsByIdImr", json=json).json()
if r.keys() not in names:
names.append(r.keys())
items.append(r.values())
print("Done")
return names, items
def Save():
with open('data.csv', 'w', newline="") as d:
writer = csv.writer(d)
n, i = Details()
writer.writerows(n)
writer.writerows(i)
Save()
检查输出样本和
注意:如果你需要大量的内部数据,你必须查看一下concurrent.futures
是否要查看2015
的整个搜索结果?以及每个医生的全部数据
?检查我的回答我大约在2小时前完成了代码,但正在检查TCP
转储,以便一次性访问内部医生详细信息。实际上,OP
正在对内部医生详细信息进行整轮操作,您可以通过单击view
按钮查看这些详细信息。tbh我没有费心玩params,但看起来您可能可以删除相当多的值空值可以关闭。但是对于内部表。不一样@NiteshRao:)欢迎你。对于mecan来说,这是一项非常有趣的任务,请在上面的代码中添加注释,这样我就可以理解您的代码,因为我不知道json在python中是如何工作的,也不知道从哪里获取“table=pd.read_json(在链接上方)”和post link获取此错误“解码“string”时UTF-8序列中的无效八位字节”如何解决此问题。