Python 无限滚动不滚动
我的代码可以抓取youtube视频的标题,但不会向下滚动以抓取所有标题Python 无限滚动不滚动,python,selenium,Python,Selenium,我的代码可以抓取youtube视频的标题,但不会向下滚动以抓取所有标题 import time from selenium import webdriver from bs4 import BeautifulSoup import requests from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.sup
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
driver = webdriver.Chrome(ChromeDriverManager().install())
url='https://www.youtube.com/results?search_query=doctor'
driver.get(url)
content=driver.page_source.encode('utf-8').strip()
soup=BeautifulSoup(content,'lxml')
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
titles = soup.findAll('a', id='video-title')
for title in titles:
print(title.text)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
当selenium打开浏览器时,它甚至根本不会滚动。如何修改代码以无限向下滚动并最终删除所有标题
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
driver = webdriver.Chrome(ChromeDriverManager().install())
url='https://www.youtube.com/results?search_query=doctor'
driver.get(url)
content=driver.page_source.encode('utf-8').strip()
soup=BeautifulSoup(content,'lxml')
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
titles = soup.findAll('a', id='video-title')
for title in titles:
print(title.text)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
您正在使用
scrollHeight
获取当前高度,但滚动时该值不会更改。查看是否应该改用滚动条
。它还说,您应该使用此方程式来检测滚动条是否位于底部:
element.scrollHeight - element.scrollTop === element.clientHeight
这对我来说很好
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
driver = webdriver.Chrome(ChromeDriverManager().install())
url='https://www.youtube.com/results?search_query=doctor'
driver.get(url)
content=driver.page_source.encode('utf-8').strip()
soup=BeautifulSoup(content,'lxml')
SCROLL_PAUSE_TIME = 1
# Get scroll height
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
# Scroll down to bottom
print('In While True')
driver.execute_script("window.scrollTo(0, arguments[0]);", last_height)
print('executed')
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
print('slept')
titles = soup.findAll('a', id='video-title')
for title in titles:
print(title.text)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height