硒元';除非我实际查看网页(可能是JavaScript的反爬虫机制?)

硒元';除非我实际查看网页(可能是JavaScript的反爬虫机制?),javascript,python,html,selenium,browser,Javascript,Python,Html,Selenium,Browser,下面的代码只有在我查看网页时才能正常工作(也就是由Selenium操作的Chrome选项卡) 有没有办法让它在我浏览另一个选项卡/窗口时也能正常工作 (我想知道网站怎么知道我是否在看网页……) 顺便说一句,我尝试添加一个用户代理,并使用以下代码向下滚动页面,希望该网页相信我正在“查看它”。但是,我失败了:( 因此,我认为您的问题的答案是由于窗口句柄。每当我们打开一个新选项卡时,Selenium会改变窗口对我们的焦点(显然)。因为焦点在另一个页面上,我们需要使用驱动程序。切换到.window(此处

下面的代码只有在我查看网页时才能正常工作(也就是由Selenium操作的Chrome选项卡)

有没有办法让它在我浏览另一个选项卡/窗口时也能正常工作

(我想知道网站怎么知道我是否在看网页……)

顺便说一句,我尝试添加一个用户代理,并使用以下代码向下滚动页面,希望该网页相信我正在“查看它”。但是,我失败了:(


因此,我认为您的问题的答案是由于
窗口句柄
。每当我们打开一个新选项卡时,
Selenium
会改变窗口对我们的焦点(显然)。因为焦点在另一个页面上,我们需要使用
驱动程序。切换到.window(此处句柄)
method。这样,我们就可以切换到合适的选项卡。为了做到这一点,我找到了一个具有类似功能的网站(也有日语/汉字?),可能会对您有所帮助

主程序-供参考

from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
from selenium.common.exceptions import WebDriverException
import time


def get_chrome_driver():
    """This sets up our Chrome Driver and returns it as an object"""
    path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
    chrome_options = webdriver.ChromeOptions() 
    
    # Browser is displayed in a custom window size
    chrome_options.add_argument("window-size=1500,1000")
    
    return webdriver.Chrome(executable_path = path_to_chrome,
                            options = chrome_options)

    
def wait_displayed(driver : ChromeDriver, xpath: str, int = 5):
    try:
        DriverWait(driver, int).until(
            DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
            )
    except:
        raise WebDriverException(f'Timeout: Failed to find {xpath}')


# Gets our chrome driver and opens our site
chrome_driver = get_chrome_driver()
chrome_driver.get("https://freelance.levtech.jp/project/search/?keyword=&srchbtn=top_search")
wait_displayed(chrome_driver, "//div[@class='l-contentWrap']//ul[@class='asideCta']")
wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']")
wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]")

# Click on the first item title link
titleLinkXpath = "(//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]//a[contains(@href, '/project/detail/')])[1]"
chrome_driver.find_element(By.XPATH, titleLinkXpath).click()
time.sleep(2)

# Get the currently displayed window handles
tabs_open = chrome_driver.window_handles
if tabs_open.__len__() != 2:
    raise Exception("Failed to click on our Link's Header")
else:
    print(f'You have: {tabs_open.__len__()} tabs open')

# Switch to the 2nd tab and then close it
chrome_driver.switch_to.window(tabs_open[1])
chrome_driver.close()

# Check how many tabs we have open
tabs_open = chrome_driver.window_handles
if tabs_open.__len__() != 1:
    raise Exception("Failed to close our 2nd tab")
else:
    print(f'You have: {tabs_open.__len__()} tabs open')

# Switch back to our main tab
chrome_driver.switch_to.window(tabs_open[0])
chrome_driver.quit()
chrome_driver.service.stop()
对于滚动,可以使用此方法

def scroll_to_element(driver : ChromeDriver, xpath : str, int = 5):
    try:
        webElement = DriverWait(driver, int).until(
            DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
            )
        driver.execute_script("arguments[0].scrollIntoView();", webElement)
    except:
        raise WebDriverException(f'Timeout: Failed to find element using xpath {xpath}\nResult: Could not scroll')

您好,Zvjezdan Veselinovic,感谢您告诉我如何使用
chrome\u driver.switch\u to.window(tabs\u open[0])将浏览器切换到某个选项卡
我希望让爬虫程序在后台工作,所以我同时检查其他内容,但如果这不可能,那么我应该使用这个伟大的解决方法。非常感谢!@AveryWu-没问题。如果此解决方案对您有帮助,您可以将其标记为已解决。否则,您可以将此问题留给其他人尝试解决answer@AveryWu-chrome驱动程序应该可以在后台工作,因为当
selenium
打开浏览器时,它在我们的Windows任务管理器
中有自己的实例。您不需要观看chrome窗口为您刮取数据。您可以打开Microsoft Word并键入论文或打开outlook电子邮件。您应该d、 另外,研究
并行化
selenium
,您将看到如何打开多个chrome窗口,它们都可以执行不同的任务。
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
from selenium.common.exceptions import WebDriverException
import time


def get_chrome_driver():
    """This sets up our Chrome Driver and returns it as an object"""
    path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
    chrome_options = webdriver.ChromeOptions() 
    
    # Browser is displayed in a custom window size
    chrome_options.add_argument("window-size=1500,1000")
    
    return webdriver.Chrome(executable_path = path_to_chrome,
                            options = chrome_options)

    
def wait_displayed(driver : ChromeDriver, xpath: str, int = 5):
    try:
        DriverWait(driver, int).until(
            DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
            )
    except:
        raise WebDriverException(f'Timeout: Failed to find {xpath}')


# Gets our chrome driver and opens our site
chrome_driver = get_chrome_driver()
chrome_driver.get("https://freelance.levtech.jp/project/search/?keyword=&srchbtn=top_search")
wait_displayed(chrome_driver, "//div[@class='l-contentWrap']//ul[@class='asideCta']")
wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']")
wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]")

# Click on the first item title link
titleLinkXpath = "(//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]//a[contains(@href, '/project/detail/')])[1]"
chrome_driver.find_element(By.XPATH, titleLinkXpath).click()
time.sleep(2)

# Get the currently displayed window handles
tabs_open = chrome_driver.window_handles
if tabs_open.__len__() != 2:
    raise Exception("Failed to click on our Link's Header")
else:
    print(f'You have: {tabs_open.__len__()} tabs open')

# Switch to the 2nd tab and then close it
chrome_driver.switch_to.window(tabs_open[1])
chrome_driver.close()

# Check how many tabs we have open
tabs_open = chrome_driver.window_handles
if tabs_open.__len__() != 1:
    raise Exception("Failed to close our 2nd tab")
else:
    print(f'You have: {tabs_open.__len__()} tabs open')

# Switch back to our main tab
chrome_driver.switch_to.window(tabs_open[0])
chrome_driver.quit()
chrome_driver.service.stop()
def scroll_to_element(driver : ChromeDriver, xpath : str, int = 5):
    try:
        webElement = DriverWait(driver, int).until(
            DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
            )
        driver.execute_script("arguments[0].scrollIntoView();", webElement)
    except:
        raise WebDriverException(f'Timeout: Failed to find element using xpath {xpath}\nResult: Could not scroll')