Python 刮取隐藏帧JavaScript

Python 刮取隐藏帧JavaScript,python,selenium,google-chrome,Python,Selenium,Google Chrome,我试图在一个隐藏的框架内抓取数据;框架如下所示 <!-- Content of the details tabs here --> <div id="tabDetail_0" class="tab_content tab_detail" style="display: block;"><iframe id="iframe_0" src="https://www.tmdn.org/tmview/get- detail?st13=GB500

我试图在一个隐藏的框架内抓取数据;框架如下所示

<!-- Content of the details tabs here  -->
    <div id="tabDetail_0" class="tab_content tab_detail" style="display: 
    block;"><iframe id="iframe_0" src="https://www.tmdn.org/tmview/get- 
    detail?st13=GB500000003342197" width="100%" height="600px;" 
    frameborder="0"></iframe></div></div></div> <!-- resultTabs -->

您需要做的是
切换到.frame

iframe = driver.find_element_by_xpath('//iframe[@id="iframe_0"]')
driver.switch_to.frame(iframe)
# than switch back:
driver.switch_to.default_content()
编辑:

您已经询问过id是否会更改要执行的操作,您可以在xpath中使用
contains
,如下所示:

# this will find any iframe with and id of iframe_ 
# you should check there is only one, you can do so with: `iframes = driver.find_elements_by_xpath('//iframe[contains(@id,"iframe_")]')`
# than `print(len(iframes))` to see the amount of iframes 
iframe = driver.find_element_by_xpath('//iframe[contains(@id,"iframe_")]')
driver.switch_to.frame(iframe)
# than switch back:
driver.switch_to.default_content()
在代码使用中:

import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

url ="https://www.tmdn.org/tmview/welcome#"

driver = webdriver.Chrome(executable_path=r"D:\New Proj\chromedriver.exe")
driver.get(url)
time.sleep(3)
driver.find_element_by_xpath('//*[@id="buttonBox"]/a').click()
time.sleep(3)

#Click advanced search
driver.find_element_by_name("lnkAdvancedSearch").click()
#
time.sleep(5)
#to select Designated territories
driver.find_element_by_id('DesignatedTerritories').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.optEUGroupContainer label')
for elem in TerritoryLabelElements:
    if elem.text == 'United Kingdom':
        elem.click()
time.sleep(5)
driver.find_element_by_id('DesignatedTerritories').click()
#
time.sleep(5)
#to select from Trade mark offices
driver.find_element_by_id('SelectedOffices').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
    if elem.text == 'GB United Kingdom ( UKIPO )':
        elem.click()
time.sleep(5)
driver.find_element_by_id('SelectedOffices').click()
#Trade mark status
driver.find_element_by_id('TradeMarkStatus').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
    if elem.text == 'Filed':
        elem.click()
    if elem.text == 'Registered':
        elem.click()
time.sleep(5)
driver.find_element_by_id('TradeMarkStatus').click()
# dates
startdate = driver.find_element_by_id("ApplicationDateFrom")
startdate.clear()
startdate.send_keys ('10-01-2018')
enddate = driver.find_element_by_id("ApplicationDateTo")
enddate.clear()
enddate.send_keys ('10-01-2018' )   
# click search
time.sleep(5)
driver.find_element_by_id("SearchCopy").click()
time.sleep(30)

#Click first link
el=driver.find_elements_by_class_name('cell_tmName_column')[0]
action = ActionChains(driver)
action.move_to_element_with_offset(el, 0, 0)
action.click()
action.perform()
time.sleep(10)
iframe = driver.find_element_by_xpath('//iframe[@id="iframe_0"]')
driver.switch_to.frame(iframe)
# do something here I am printing the HTML
print(iframe.get_attribute('innerHTML'))
# than switch back:
driver.switch_to.default_content()

希望这对你有帮助

嗨,Milah,如果
切换到.frame
请告诉我,如图所示是我的答案…嗨,Moshe,它工作得很好,但是iframe id一直在变化。这是iframe_0 somtimes iframe_1…嗨,米拉,我已经编辑了我的答案,用
contains
参见编辑。如果您需要更多的解释,请askHi Moshe,谢谢您的帮助;我编辑了上面的代码,我把所有的代码,包括刮削部分。第一个链接的代码运行正常,但是当第二个链接出现时,代码会停止,似乎驱动程序。切换到默认值不起作用。我不在工作,也不在电脑旁,但您可以使用
切换到活动元素()。。。明天我会尽力帮助更多。。。
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

url ="https://www.tmdn.org/tmview/welcome#"

driver = webdriver.Chrome(executable_path=r"D:\New Proj\chromedriver.exe")
driver.get(url)
time.sleep(3)
driver.find_element_by_xpath('//*[@id="buttonBox"]/a').click()
time.sleep(3)

#Click advanced search
driver.find_element_by_name("lnkAdvancedSearch").click()
#
time.sleep(5)
#to select Designated territories
driver.find_element_by_id('DesignatedTerritories').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.optEUGroupContainer label')
for elem in TerritoryLabelElements:
    if elem.text == 'United Kingdom':
        elem.click()
time.sleep(5)
driver.find_element_by_id('DesignatedTerritories').click()
#
time.sleep(5)
#to select from Trade mark offices
driver.find_element_by_id('SelectedOffices').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
    if elem.text == 'GB United Kingdom ( UKIPO )':
        elem.click()
time.sleep(5)
driver.find_element_by_id('SelectedOffices').click()
#Trade mark status
driver.find_element_by_id('TradeMarkStatus').click()
time.sleep(5)   
TerritoryLabelElements = driver.find_elements_by_css_selector( 'div.multiSelectOptions label')
for elem in TerritoryLabelElements:
    if elem.text == 'Filed':
        elem.click()
    if elem.text == 'Registered':
        elem.click()
time.sleep(5)
driver.find_element_by_id('TradeMarkStatus').click()
# dates
startdate = driver.find_element_by_id("ApplicationDateFrom")
startdate.clear()
startdate.send_keys ('10-01-2018')
enddate = driver.find_element_by_id("ApplicationDateTo")
enddate.clear()
enddate.send_keys ('10-01-2018' )   
# click search
time.sleep(5)
driver.find_element_by_id("SearchCopy").click()
time.sleep(30)

#Click first link
el=driver.find_elements_by_class_name('cell_tmName_column')[0]
action = ActionChains(driver)
action.move_to_element_with_offset(el, 0, 0)
action.click()
action.perform()
time.sleep(10)
iframe = driver.find_element_by_xpath('//iframe[@id="iframe_0"]')
driver.switch_to.frame(iframe)
# do something here I am printing the HTML
print(iframe.get_attribute('innerHTML'))
# than switch back:
driver.switch_to.default_content()