Python Selenium webdrive找不到某些类

Python Selenium webdrive找不到某些类,python,selenium,selenium-webdriver,css-selectors,selenium-chromedriver,Python,Selenium,Selenium Webdriver,Css Selectors,Selenium Chromedriver,抓取网站: 背景信息: Chrome webdrive 蟒蛇3 当前最新版本的chrome drive和selenium(2019年11月22日) 我的目标: 从每个vf注释线程类中提取注释。 html的部分结构如下所示: <div class="vf-commenting vf-comments-widget"> ... <div class="vf-horizontal-list vf3-conversations-list vf3-conversations-

抓取网站

背景信息

  • Chrome webdrive

  • 蟒蛇3

  • 当前最新版本的chrome drive和selenium(2019年11月22日)
我的目标

从每个vf注释线程类中提取注释。

html的部分结构如下所示:

<div class="vf-commenting vf-comments-widget">
...
    <div class="vf-horizontal-list vf3-conversations-list vf3-conversations-list--comments">
        <div class="vf-comment-thread"> ... </div>
        <div class="vf-comment-thread"> ... </div>
        <div class="vf-comment-thread"> ... </div>
        ...
    </div>
    ...
</div>

您面临的问题是,注释是由javascript动态生成的,因此您需要先向下滚动以加载它们

from time import sleep
from selenium import webdriver
#Open Browser
driver = webdriver.Chrome()

def ScrollDown(interal=3.5,looper=20):
    scroll_delay = interal
    count = 0

    ''' Get scroll height'''

    last_height = driver.execute_script("return document.body.scrollHeight")

    while count < looper:
        print('Scrolling down to bottom loop {}/{}'.format(count+1,looper))
        ''' Scroll down to bottom'''
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        ''' Wait to load page'''
        print('sleeping {} secs'.format(interal))
        sleep(scroll_delay)

        ''' Calculate new scroll height and compare with last scroll height'''
        new_height = driver.execute_script("return document.body.scrollHeight")

        if new_height == last_height:
            break

        last_height = new_height
        count += 1

driver.get('https://www.cbc.ca/news/canada/new-brunswick/dieppe-newfoundland-mail-packages-1.5367640')

# this will scroll down the page till all the dynamic content is loaded
ScrollDown()

#Method 1 get all children using * 
childer_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]/*"
all_children = driver.find_elements_by_xpath(childer_xpath)
if all_children:
    print([i.get_attribute("class") for i in all_children])
#Method 2 get all children using children tag name 
alt_childer_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]/div"
comm = driver.find_elements_by_xpath(alt_childer_xpath)
if comm:
    print([i.get_attribute("class") for i in comm])
#Method 3 get all children using xpath of the parent then loop throuth it's children
Parent_Cooments_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]"
parent_tag = driver.find_elements_by_xpath(Parent_Cooments_xpath)
if parent_tag:
    print([i.get_attribute("class") for i in parent_tag[0].find_elements_by_xpath('./*')])
    print([i.get_attribute("class") for i in parent_tag[0].find_elements_by_xpath('*')])

“//div[@class=vvf horizontal list']”缺少引号(“v”引号应该在哪里?)。。。你可能想要包含而不是相等?哦,是的,我只是没有足够仔细地复制它们。我的原始代码是正确的。谢谢你的回答。你好,谢谢你回答我的紧急问题。我试过你的建议,但它仍然给我一个空列表,而不是这个分区中的子类集。你知道另一个解决方案吗?或者有人可能是这方面的专家,可以解决我的问题吗?非常感谢:)@sherryyuan我仔细看了一下,解决了这个问题。你现在可以试试了。非常感谢你,我已经在这上面呆了很长时间,你帮我在几个小时内解决了它。再次感谢你@谢丽媛:不客气。如果你觉得有用,就投票支持它。
from time import sleep
from selenium import webdriver
#Open Browser
driver = webdriver.Chrome()

def ScrollDown(interal=3.5,looper=20):
    scroll_delay = interal
    count = 0

    ''' Get scroll height'''

    last_height = driver.execute_script("return document.body.scrollHeight")

    while count < looper:
        print('Scrolling down to bottom loop {}/{}'.format(count+1,looper))
        ''' Scroll down to bottom'''
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        ''' Wait to load page'''
        print('sleeping {} secs'.format(interal))
        sleep(scroll_delay)

        ''' Calculate new scroll height and compare with last scroll height'''
        new_height = driver.execute_script("return document.body.scrollHeight")

        if new_height == last_height:
            break

        last_height = new_height
        count += 1

driver.get('https://www.cbc.ca/news/canada/new-brunswick/dieppe-newfoundland-mail-packages-1.5367640')

# this will scroll down the page till all the dynamic content is loaded
ScrollDown()

#Method 1 get all children using * 
childer_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]/*"
all_children = driver.find_elements_by_xpath(childer_xpath)
if all_children:
    print([i.get_attribute("class") for i in all_children])
#Method 2 get all children using children tag name 
alt_childer_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]/div"
comm = driver.find_elements_by_xpath(alt_childer_xpath)
if comm:
    print([i.get_attribute("class") for i in comm])
#Method 3 get all children using xpath of the parent then loop throuth it's children
Parent_Cooments_xpath = "//div[contains(@class, 'vf-horizontal-list') and contains(@class ,'conversations-list--comments')]"
parent_tag = driver.find_elements_by_xpath(Parent_Cooments_xpath)
if parent_tag:
    print([i.get_attribute("class") for i in parent_tag[0].find_elements_by_xpath('./*')])
    print([i.get_attribute("class") for i in parent_tag[0].find_elements_by_xpath('*')])
['vf-comment-thread', 'vf-comment-thread', 'vf-comment-thread']