使用带chromedriver的Selenium Python拍摄完整页面的屏幕截图_Python_Selenium_Selenium Chromedriver_Webpage Screenshot

使用带chromedriver的Selenium Python拍摄完整页面的屏幕截图

python selenium

使用带chromedriver的Selenium Python拍摄完整页面的屏幕截图,python,selenium,selenium-chromedriver,webpage-screenshot,Python,Selenium,Selenium Chromedriver,Webpage Screenshot,在尝试了各种方法之后。。。我偶然发现了这个页面，并用chromedriver、selenium和python截取了整个页面的屏幕原始代码是。（我在下面的帖子中复制了代码）它使用了PIL，而且工作得很好！然而，有一个问题。。。也就是说，它捕获固定的标题并重复整个页面，并且在页面更改过程中遗漏页面的某些部分。截图示例url：如何使用此代码避免重复的标题。。。或者有没有更好的选择只使用python…（我不懂java，也不想使用java）。请参见下面当前结果的屏幕截图和示例代码 test.p

在尝试了各种方法之后。。。我偶然发现了这个页面，并用chromedriver、selenium和python截取了整个页面的屏幕

原始代码是。（我在下面的帖子中复制了代码）

它使用了PIL，而且工作得很好！然而，有一个问题。。。也就是说，它捕获固定的标题并重复整个页面，并且在页面更改过程中遗漏页面的某些部分。截图示例url：

如何使用此代码避免重复的标题。。。或者有没有更好的选择只使用python…（我不懂java，也不想使用java）。

请参见下面当前结果的屏幕截图和示例代码

test.py

"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/

It contains the *crucial* correction added in the comments by Jason Coutu.
"""

import sys

from selenium import webdriver
import unittest

import util

class Test(unittest.TestCase):
    """ Demonstration: Get Chrome to generate fullscreen screenshot """

    def setUp(self):
        self.driver = webdriver.Chrome()

    def tearDown(self):
        self.driver.quit()

    def test_fullpage_screenshot(self):
        ''' Generate document-height screenshot '''
        #url = "http://effbot.org/imagingbook/introduction.htm"
        url = "http://www.w3schools.com/js/default.asp"
        self.driver.get(url)
        util.fullpage_screenshot(self.driver, "test.png")


if __name__ == "__main__":
    unittest.main(argv=[sys.argv[0]])

import os
import time

from PIL import Image

def fullpage_screenshot(driver, file):

        print("Starting chrome full page screenshot workaround ...")

        total_width = driver.execute_script("return document.body.offsetWidth")
        total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
        viewport_width = driver.execute_script("return document.body.clientWidth")
        viewport_height = driver.execute_script("return window.innerHeight")
        print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
        rectangles = []

        i = 0
        while i < total_height:
            ii = 0
            top_height = i + viewport_height

            if top_height > total_height:
                top_height = total_height

            while ii < total_width:
                top_width = ii + viewport_width

                if top_width > total_width:
                    top_width = total_width

                print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
                rectangles.append((ii, i, top_width,top_height))

                ii = ii + viewport_width

            i = i + viewport_height

        stitched_image = Image.new('RGB', (total_width, total_height))
        previous = None
        part = 0

        for rectangle in rectangles:
            if not previous is None:
                driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
                print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
                time.sleep(0.2)

            file_name = "part_{0}.png".format(part)
            print("Capturing {0} ...".format(file_name))

            driver.get_screenshot_as_file(file_name)
            screenshot = Image.open(file_name)

            if rectangle[1] + viewport_height > total_height:
                offset = (rectangle[0], total_height - viewport_height)
            else:
                offset = (rectangle[0], rectangle[1])

            print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
            stitched_image.paste(screenshot, offset)

            del screenshot
            os.remove(file_name)
            part = part + 1
            previous = rectangle

        stitched_image.save(file)
        print("Finishing chrome full page screenshot workaround...")
        return True

util.py

"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/

It contains the *crucial* correction added in the comments by Jason Coutu.
"""

import sys

from selenium import webdriver
import unittest

import util

class Test(unittest.TestCase):
    """ Demonstration: Get Chrome to generate fullscreen screenshot """

    def setUp(self):
        self.driver = webdriver.Chrome()

    def tearDown(self):
        self.driver.quit()

    def test_fullpage_screenshot(self):
        ''' Generate document-height screenshot '''
        #url = "http://effbot.org/imagingbook/introduction.htm"
        url = "http://www.w3schools.com/js/default.asp"
        self.driver.get(url)
        util.fullpage_screenshot(self.driver, "test.png")


if __name__ == "__main__":
    unittest.main(argv=[sys.argv[0]])

import os
import time

from PIL import Image

def fullpage_screenshot(driver, file):

        print("Starting chrome full page screenshot workaround ...")

        total_width = driver.execute_script("return document.body.offsetWidth")
        total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
        viewport_width = driver.execute_script("return document.body.clientWidth")
        viewport_height = driver.execute_script("return window.innerHeight")
        print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
        rectangles = []

        i = 0
        while i < total_height:
            ii = 0
            top_height = i + viewport_height

            if top_height > total_height:
                top_height = total_height

            while ii < total_width:
                top_width = ii + viewport_width

                if top_width > total_width:
                    top_width = total_width

                print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
                rectangles.append((ii, i, top_width,top_height))

                ii = ii + viewport_width

            i = i + viewport_height

        stitched_image = Image.new('RGB', (total_width, total_height))
        previous = None
        part = 0

        for rectangle in rectangles:
            if not previous is None:
                driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
                print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
                time.sleep(0.2)

            file_name = "part_{0}.png".format(part)
            print("Capturing {0} ...".format(file_name))

            driver.get_screenshot_as_file(file_name)
            screenshot = Image.open(file_name)

            if rectangle[1] + viewport_height > total_height:
                offset = (rectangle[0], total_height - viewport_height)
            else:
                offset = (rectangle[0], rectangle[1])

            print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
            stitched_image.paste(screenshot, offset)

            del screenshot
            os.remove(file_name)
            part = part + 1
            previous = rectangle

        stitched_image.save(file)
        print("Finishing chrome full page screenshot workaround...")
        return True

导入操作系统
导入时间
从PIL导入图像
def fullpage_屏幕截图（驱动程序、文件）：
打印（“启动chrome全页屏幕截图解决方案…”）
总宽度=驱动程序。执行脚本（“return document.body.offsetWidth”）
total\u height=driver.execute\u脚本（“return document.body.parentNode.scrollHeight”）
viewport\u width=driver.execute\u脚本（“return document.body.clientWidth”）
viewport\u height=driver.execute\u脚本（“return window.innerHeight”）
打印（“总计：（{0}，{1}），视口：（{2}，{3}）”。格式（总宽度，总高度，视口宽度，视口高度））
矩形=[]
i=0
当i<总高度：
ii=0
顶部高度=i+视口高度
如果顶部高度>总高度：
顶部高度=总高度
ii<总宽度：
顶部宽度=ii+视口宽度
如果顶部宽度>总宽度：
顶部宽度=总宽度
打印（“附加矩形（{0}，{1}，{2}，{3}）”。格式（二，一，顶宽，顶高）
矩形。附加（（ii，i，顶部宽度，顶部高度））
ii=ii+视口宽度
i=i+视口高度
缝合图像=图像。新建（'RGB'，（总宽度，总高度））
先前=无
部分=0
对于矩形中的矩形：
如果“非上一个”为“无”：
driver.execute_脚本（“window.scrollTo（{0}，{1}）”.format（矩形[0]，矩形[1]））
打印（“滚动到（{0}，{1}）”。格式（矩形[0]，矩形[1]））
睡眠时间（0.2）
file_name=“part_{0}.png”。格式（part）
打印（“捕获{0}…”格式（文件名））
驱动程序。获取屏幕截图作为文件（文件名）
屏幕截图=Image.open（文件名）
如果矩形[1]+视口高度>总高度：
偏移=（矩形[0]，总高度-视口高度）
其他：
偏移量=（矩形[0]，矩形[1]）
打印（“添加到具有偏移量（{0}，{1}）的缝合图像”。格式（偏移量[0]，偏移量[1]））
缝合的_图像。粘贴（截图，偏移）
德尔截图
删除（文件名）
零件=零件+1
上一个=矩形
缝合的_图像。保存（文件）
打印（“完成chrome全页屏幕截图解决方案…”）
返回真值

您可以通过在屏幕截图之前更改标题的CSS来实现这一点：

topnav = driver.find_element_by_id("topnav")
driver.execute_script("arguments[0].setAttribute('style', 'position: absolute; top: 0px;')", topnav)

编辑：将此行放在窗口滚动后：

driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")

因此，在您的util.py中：

driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")

如果站点正在使用

标题

标记，在了解@Moshisho的方法后，您可以使用

通过标签名称（“标题”）查找元素

我的完整独立工作脚本是。。。（在每个滚动和位置后添加睡眠0.2）

导入系统从selenium导入webdriver 导入util 导入操作系统导入时间从PIL导入图像 def fullpage_屏幕截图（驱动程序、文件）：打印（“启动chrome全页屏幕截图解决方案…”）总宽度=驱动程序。执行脚本（“return document.body.offsetWidth”） total\u height=driver.execute\u脚本（“return document.body.parentNode.scrollHeight”） viewport\u width=driver.execute\u脚本（“return document.body.clientWidth”） viewport\u height=driver.execute\u脚本（“return window.innerHeight”）打印（“总计：（{0}，{1}），视口：（{2}，{3}）”。格式（总宽度，总高度，视口宽度，视口高度））矩形=[] i=0 当i<总高度： ii=0 顶部高度=i+视口高度如果顶部高度>总高度：顶部高度=总高度 ii<总宽度：顶部宽度=ii+视口宽度如果顶部宽度>总宽度：顶部宽度=总宽度打印（“附加矩形（{0}，{1}，{2}，{3}）”。格式（二，一，顶宽，顶高）矩形。附加（（ii，i，顶部宽度，顶部高度）） ii=ii+视口宽度 i=i+视口高度缝合图像=图像。新建（'RGB'，（总宽度，总高度））先前=无部分=0 对于矩形中的矩形：如果“非上一个”为“无”： driver.execute_脚本（“window.scrollTo（{0}，{1}）”.format（矩形[0]，矩形[1]））睡眠时间（0.2） driver.execute_脚本（“document.getElementById（'topnav'）.setAttribute（'style'，'position:absolute；top:0px；'）；”）睡眠时间（0.2）打印（“滚动到（{0}，{1}）”。格式（矩形[0]，矩形[1]））睡眠时间（0.2） file_name=“part_{0}.png”。格式（part）打印（“捕获{0}…”格式（文件名））驱动程序。获取屏幕截图作为文件（文件名）屏幕截图=Image.open（文件名）如果矩形[1]+视口高度>总高度：抵消

from selenium import webdriver

driver = webdriver.Firefox()
driver.get('https://developer.mozilla.org/')
element = driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
    file.write(element_png)

element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
    file.write(element_png)

element=driver.find_elements_by_xpath("/html/child::*/child::*")
    eheight=set()
    for e in element:
        eheight.add(round(e.size["height"]))
    print (eheight)
    total_height = sum(eheight)
    driver.execute_script("document.getElementsByTagName('html')[0].setAttribute('style', 'height:"+str(total_height)+"px')")
    element=driver.find_element_by_tag_name('body')
    element_png = element.screenshot_as_png
    with open(fname, "wb") as file:
        file.write(element_png)

# 1. get dimensions
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
time.sleep(sometime)
total_height = browser.execute_script("return document.body.parentNode.scrollHeight")
browser.quit()

# 2. get screenshot
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, total_height)
browser.get(url)  
browser.save_screenshot(screenshot_path)

import time
from PIL import Image
from io import BytesIO

def fullpage_screenshot(driver, file, scroll_delay=0.3):
    device_pixel_ratio = driver.execute_script('return window.devicePixelRatio')

    total_height = driver.execute_script('return document.body.parentNode.scrollHeight')
    viewport_height = driver.execute_script('return window.innerHeight')
    total_width = driver.execute_script('return document.body.offsetWidth')
    viewport_width = driver.execute_script("return document.body.clientWidth")

    # this implementation assume (viewport_width == total_width)
    assert(viewport_width == total_width)

    # scroll the page, take screenshots and save screenshots to slices
    offset = 0  # height
    slices = {}
    while offset < total_height:
        if offset + viewport_height > total_height:
            offset = total_height - viewport_height

        driver.execute_script('window.scrollTo({0}, {1})'.format(0, offset))
        time.sleep(scroll_delay)

        img = Image.open(BytesIO(driver.get_screenshot_as_png()))
        slices[offset] = img

        offset = offset + viewport_height

    # combine image slices
    stitched_image = Image.new('RGB', (total_width * device_pixel_ratio, total_height * device_pixel_ratio))
    for offset, image in slices.items():
        stitched_image.paste(image, (0, offset * device_pixel_ratio))
    stitched_image.save(file)

fullpage_screenshot(driver, 'test.png')

browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
height = browser.execute_script("return document.body.parentNode.scrollHeight")

# 2. get screenshot
browser.set_window_size(default_width, height)
browser.save_screenshot(screenshot_path)

browser.quit()

from selenium import webdriver

def save_screenshot(driver: webdriver.Chrome, path: str = '/tmp/screenshot.png') -> None:
    # Ref: https://stackoverflow.com/a/52572919/
    original_size = driver.get_window_size()
    required_width = driver.execute_script('return document.body.parentNode.scrollWidth')
    required_height = driver.execute_script('return document.body.parentNode.scrollHeight')
    driver.set_window_size(required_width, required_height)
    # driver.save_screenshot(path)  # has scrollbar
    driver.find_element_by_tag_name('body').screenshot(path)  # avoids scrollbar
    driver.set_window_size(original_size['width'], original_size['height'])

total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(total_width, total_height)
driver.save_screenshot("SomeName.png")

from selenium import webdriver

url = 'https://stackoverflow.com/'
path = '/path/to/save/in/scrape.png'

driver = webdriver.Chrome()
driver.get(url)
el = driver.find_element_by_tag_name('body')
el.screenshot(path)
driver.quit()

await driver.executeScript(`
      document.documentElement.style.display = "table";
      document.documentElement.style.width = "100%";
      document.body.style.display = "table-row";
`);

await driver.findElement(By.css('body')).takeScreenshot();

URL = 'http://www.w3schools.com/js/default.asp'

options = webdriver.ChromeOptions()
options.headless = True

driver = webdriver.Chrome(options=options)
driver.get(URL)

S = lambda X: driver.execute_script('return document.body.parentNode.scroll'+X)
driver.set_window_size(S('Width'),S('Height')) # May need manual adjustment
driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')

driver.quit()

scheight = .1
while scheight < 9.9:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
    scheight += .01

import os

from selenium import webdriver
from PIL import Image


def full_screenshot(driver: webdriver):
    driver.execute_script(f"window.scrollTo({0}, {0})")
    total_width = driver.execute_script("return document.body.offsetWidth")
    total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
    viewport_width = driver.execute_script("return document.body.clientWidth")
    viewport_height = driver.execute_script("return window.innerHeight")
    rectangles = []
    i = 0
    while i < total_height:
        ii = 0
        top_height = i + viewport_height
        if top_height > total_height:
            top_height = total_height
        while ii < total_width:
            top_width = ii + viewport_width
            if top_width > total_width:
                top_width = total_width
            rectangles.append((ii, i, top_width, top_height))
            ii = ii + viewport_width
        i = i + viewport_height
    stitched_image = Image.new('RGB', (total_width, total_height))
    previous = None
    part = 0

    for rectangle in rectangles:
        if not previous is None:
            driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
        file_name = "part_{0}.png".format(part)
        driver.get_screenshot_as_file(file_name)
        screenshot = Image.open(file_name)

        if rectangle[1] + viewport_height > total_height:
            offset = (rectangle[0], total_height - viewport_height)
        else:
            offset = (rectangle[0], rectangle[1])
        stitched_image.paste(screenshot, offset)
        del screenshot
        os.remove(file_name)
        part = part + 1
        previous = rectangle
    return stitched_image

from PIL import Image
from io import BytesIO

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

def open_url(url):
    options = Options()

    options.headless = True

    driver = webdriver.Chrome(chrome_options=options)

    driver.maximize_window()
    driver.get(url)
    save_screenshot(driver, 'screen.png')

def save_screenshot(driver, file_name):
    height, width = scroll_down(driver)
    driver.set_window_size(width, height)
    img_binary = driver.get_screenshot_as_png()
    img = Image.open(BytesIO(img_binary))
    img.save(file_name)
    # print(file_name)
    print(" screenshot saved ")


def scroll_down(driver):
    total_width = driver.execute_script("return document.body.offsetWidth")
    total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
    viewport_width = driver.execute_script("return document.body.clientWidth")
    viewport_height = driver.execute_script("return window.innerHeight")

    rectangles = []

    i = 0
    while i < total_height:
        ii = 0
        top_height = i + viewport_height

        if top_height > total_height:
            top_height = total_height

        while ii < total_width:
            top_width = ii + viewport_width

            if top_width > total_width:
                top_width = total_width

            rectangles.append((ii, i, top_width, top_height))

            ii = ii + viewport_width

        i = i + viewport_height

    previous = None
    part = 0

    for rectangle in rectangles:
        if not previous is None:
            driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
            time.sleep(0.5)
        # time.sleep(0.2)

        if rectangle[1] + viewport_height > total_height:
            offset = (rectangle[0], total_height - viewport_height)
        else:
            offset = (rectangle[0], rectangle[1])

        previous = rectangle

    return (total_height, total_width)

open_url("https://www.medium.com")

#coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

def test_fullpage_screenshot(self):
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--start-maximized')
    driver = webdriver.Chrome(chrome_options=chrome_options)
    driver.get("yoururlxxx")
    time.sleep(2)

    #the element with longest height on page
    ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')
    total_height = ele.size["height"]+1000

    driver.set_window_size(1920, total_height)      #the trick
    time.sleep(2)
    driver.save_screenshot("screenshot1.png")
    driver.quit()

if __name__ == "__main__":
    test_fullpage_screenshot()

cd "enter the directory"

pip install Selenium-Screenshot

from Screenshot import Screenshot_Clipping
from selenium import webdriver

ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://github.com/sam4u3/Selenium_Screenshot/tree/master/test"
driver.get(url)

# the line below makes taking & saving screenshots very easy.

img_url=ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
print(img_url)
driver.close()

driver.quit()

from Screenshot import Screenshot_Clipping
from selenium import webdriver
import time
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://www.bbc.com/news/world-asia-china-51108726"
driver.get(url)
time.sleep(1)
img_url = ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
driver.close()

driver.quit()

from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

options = Options()
options.headless = True
service = Service('/your/path/to/geckodriver')
driver = webdriver.Firefox(options=options, service=service)

driver.get('https://www.nytimes.com/')
driver.get_full_page_screenshot_as_file('example.png')

driver.close()

driver.get_full_page_screenshot_as_file
driver.save_full_page_screenshot
driver.get_full_page_screenshot_as_png
driver.get_full_page_screenshot_as_base64

import base64
...
        page_rect = browser.driver.execute_cdp_cmd("Page.getLayoutMetrics", {})
        screenshot = browser.driver.execute_cdp_cmd(
            "Page.captureScreenshot",
            {
                "format": "png",
                "captureBeyondViewport": True,
                "clip": {
                    "width": page_rect["contentSize"]["width"],
                    "height": page_rect["contentSize"]["height"],
                    "x": 0,
                    "y": 0,
                    "scale": 1
                }
            })

        with open(path, "wb") as file:
            file.write(base64.urlsafe_b64decode(screenshot["data"]))