使用带chromedriver的Selenium Python拍摄完整页面的屏幕截图
在尝试了各种方法之后。。。我偶然发现了这个页面,并用chromedriver、selenium和python截取了整个页面的屏幕 原始代码是。(我在下面的帖子中复制了代码) 它使用了PIL,而且工作得很好!然而,有一个问题。。。也就是说,它捕获固定的标题并重复整个页面,并且在页面更改过程中遗漏页面的某些部分。截图示例url: 如何使用此代码避免重复的标题。。。或者有没有更好的选择只使用python…(我不懂java,也不想使用java)。 请参见下面当前结果的屏幕截图和示例代码 test.py使用带chromedriver的Selenium Python拍摄完整页面的屏幕截图,python,selenium,selenium-chromedriver,webpage-screenshot,Python,Selenium,Selenium Chromedriver,Webpage Screenshot,在尝试了各种方法之后。。。我偶然发现了这个页面,并用chromedriver、selenium和python截取了整个页面的屏幕 原始代码是。(我在下面的帖子中复制了代码) 它使用了PIL,而且工作得很好!然而,有一个问题。。。也就是说,它捕获固定的标题并重复整个页面,并且在页面更改过程中遗漏页面的某些部分。截图示例url: 如何使用此代码避免重复的标题。。。或者有没有更好的选择只使用python…(我不懂java,也不想使用java)。 请参见下面当前结果的屏幕截图和示例代码 test.p
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
import unittest
import util
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
#url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
self.driver.get(url)
util.fullpage_screenshot(self.driver, "test.png")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
util.py
"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/
It contains the *crucial* correction added in the comments by Jason Coutu.
"""
import sys
from selenium import webdriver
import unittest
import util
class Test(unittest.TestCase):
""" Demonstration: Get Chrome to generate fullscreen screenshot """
def setUp(self):
self.driver = webdriver.Chrome()
def tearDown(self):
self.driver.quit()
def test_fullpage_screenshot(self):
''' Generate document-height screenshot '''
#url = "http://effbot.org/imagingbook/introduction.htm"
url = "http://www.w3schools.com/js/default.asp"
self.driver.get(url)
util.fullpage_screenshot(self.driver, "test.png")
if __name__ == "__main__":
unittest.main(argv=[sys.argv[0]])
import os
import time
from PIL import Image
def fullpage_screenshot(driver, file):
print("Starting chrome full page screenshot workaround ...")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
rectangles.append((ii, i, top_width,top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
time.sleep(0.2)
file_name = "part_{0}.png".format(part)
print("Capturing {0} ...".format(file_name))
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
stitched_image.save(file)
print("Finishing chrome full page screenshot workaround...")
return True
导入操作系统
导入时间
从PIL导入图像
def fullpage_屏幕截图(驱动程序、文件):
打印(“启动chrome全页屏幕截图解决方案…”)
总宽度=驱动程序。执行脚本(“return document.body.offsetWidth”)
total\u height=driver.execute\u脚本(“return document.body.parentNode.scrollHeight”)
viewport\u width=driver.execute\u脚本(“return document.body.clientWidth”)
viewport\u height=driver.execute\u脚本(“return window.innerHeight”)
打印(“总计:({0},{1}),视口:({2},{3})”。格式(总宽度,总高度,视口宽度,视口高度))
矩形=[]
i=0
当i<总高度:
ii=0
顶部高度=i+视口高度
如果顶部高度>总高度:
顶部高度=总高度
ii<总宽度:
顶部宽度=ii+视口宽度
如果顶部宽度>总宽度:
顶部宽度=总宽度
打印(“附加矩形({0},{1},{2},{3})”。格式(二,一,顶宽,顶高)
矩形。附加((ii,i,顶部宽度,顶部高度))
ii=ii+视口宽度
i=i+视口高度
缝合图像=图像。新建('RGB',(总宽度,总高度))
先前=无
部分=0
对于矩形中的矩形:
如果“非上一个”为“无”:
driver.execute_脚本(“window.scrollTo({0},{1})”.format(矩形[0],矩形[1]))
打印(“滚动到({0},{1})”。格式(矩形[0],矩形[1]))
睡眠时间(0.2)
file_name=“part_{0}.png”。格式(part)
打印(“捕获{0}…”格式(文件名))
驱动程序。获取屏幕截图作为文件(文件名)
屏幕截图=Image.open(文件名)
如果矩形[1]+视口高度>总高度:
偏移=(矩形[0],总高度-视口高度)
其他:
偏移量=(矩形[0],矩形[1])
打印(“添加到具有偏移量({0},{1})的缝合图像”。格式(偏移量[0],偏移量[1]))
缝合的_图像。粘贴(截图,偏移)
德尔截图
删除(文件名)
零件=零件+1
上一个=矩形
缝合的_图像。保存(文件)
打印(“完成chrome全页屏幕截图解决方案…”)
返回真值
您可以通过在屏幕截图之前更改标题的CSS来实现这一点:
topnav = driver.find_element_by_id("topnav")
driver.execute_script("arguments[0].setAttribute('style', 'position: absolute; top: 0px;')", topnav)
编辑:将此行放在窗口滚动后:
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
因此,在您的util.py中:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
driver.execute_script("document.getElementById('topnav').setAttribute('style', 'position: absolute; top: 0px;');")
如果站点正在使用
标题
标记,在了解@Moshisho的方法后,您可以使用通过标签名称(“标题”)查找元素
我的完整独立工作脚本是。。。(在每个滚动和位置后添加睡眠0.2)
导入系统
从selenium导入webdriver
导入util
导入操作系统
导入时间
从PIL导入图像
def fullpage_屏幕截图(驱动程序、文件):
打印(“启动chrome全页屏幕截图解决方案…”)
总宽度=驱动程序。执行脚本(“return document.body.offsetWidth”)
total\u height=driver.execute\u脚本(“return document.body.parentNode.scrollHeight”)
viewport\u width=driver.execute\u脚本(“return document.body.clientWidth”)
viewport\u height=driver.execute\u脚本(“return window.innerHeight”)
打印(“总计:({0},{1}),视口:({2},{3})”。格式(总宽度,总高度,视口宽度,视口高度))
矩形=[]
i=0
当i<总高度:
ii=0
顶部高度=i+视口高度
如果顶部高度>总高度:
顶部高度=总高度
ii<总宽度:
顶部宽度=ii+视口宽度
如果顶部宽度>总宽度:
顶部宽度=总宽度
打印(“附加矩形({0},{1},{2},{3})”。格式(二,一,顶宽,顶高)
矩形。附加((ii,i,顶部宽度,顶部高度))
ii=ii+视口宽度
i=i+视口高度
缝合图像=图像。新建('RGB',(总宽度,总高度))
先前=无
部分=0
对于矩形中的矩形:
如果“非上一个”为“无”:
driver.execute_脚本(“window.scrollTo({0},{1})”.format(矩形[0],矩形[1]))
睡眠时间(0.2)
driver.execute_脚本(“document.getElementById('topnav').setAttribute('style','position:absolute;top:0px;');”)
睡眠时间(0.2)
打印(“滚动到({0},{1})”。格式(矩形[0],矩形[1]))
睡眠时间(0.2)
file_name=“part_{0}.png”。格式(part)
打印(“捕获{0}…”格式(文件名))
驱动程序。获取屏幕截图作为文件(文件名)
屏幕截图=Image.open(文件名)
如果矩形[1]+视口高度>总高度:
抵消
from selenium import webdriver
driver = webdriver.Firefox()
driver.get('https://developer.mozilla.org/')
element = driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open("test2.png", "wb") as file:
file.write(element_png)
element=driver.find_elements_by_xpath("/html/child::*/child::*")
eheight=set()
for e in element:
eheight.add(round(e.size["height"]))
print (eheight)
total_height = sum(eheight)
driver.execute_script("document.getElementsByTagName('html')[0].setAttribute('style', 'height:"+str(total_height)+"px')")
element=driver.find_element_by_tag_name('body')
element_png = element.screenshot_as_png
with open(fname, "wb") as file:
file.write(element_png)
# 1. get dimensions
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
time.sleep(sometime)
total_height = browser.execute_script("return document.body.parentNode.scrollHeight")
browser.quit()
# 2. get screenshot
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, total_height)
browser.get(url)
browser.save_screenshot(screenshot_path)
import time
from PIL import Image
from io import BytesIO
def fullpage_screenshot(driver, file, scroll_delay=0.3):
device_pixel_ratio = driver.execute_script('return window.devicePixelRatio')
total_height = driver.execute_script('return document.body.parentNode.scrollHeight')
viewport_height = driver.execute_script('return window.innerHeight')
total_width = driver.execute_script('return document.body.offsetWidth')
viewport_width = driver.execute_script("return document.body.clientWidth")
# this implementation assume (viewport_width == total_width)
assert(viewport_width == total_width)
# scroll the page, take screenshots and save screenshots to slices
offset = 0 # height
slices = {}
while offset < total_height:
if offset + viewport_height > total_height:
offset = total_height - viewport_height
driver.execute_script('window.scrollTo({0}, {1})'.format(0, offset))
time.sleep(scroll_delay)
img = Image.open(BytesIO(driver.get_screenshot_as_png()))
slices[offset] = img
offset = offset + viewport_height
# combine image slices
stitched_image = Image.new('RGB', (total_width * device_pixel_ratio, total_height * device_pixel_ratio))
for offset, image in slices.items():
stitched_image.paste(image, (0, offset * device_pixel_ratio))
stitched_image.save(file)
fullpage_screenshot(driver, 'test.png')
browser = webdriver.Chrome(chrome_options=options)
browser.set_window_size(default_width, default_height)
browser.get(url)
height = browser.execute_script("return document.body.parentNode.scrollHeight")
# 2. get screenshot
browser.set_window_size(default_width, height)
browser.save_screenshot(screenshot_path)
browser.quit()
from selenium import webdriver
def save_screenshot(driver: webdriver.Chrome, path: str = '/tmp/screenshot.png') -> None:
# Ref: https://stackoverflow.com/a/52572919/
original_size = driver.get_window_size()
required_width = driver.execute_script('return document.body.parentNode.scrollWidth')
required_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(required_width, required_height)
# driver.save_screenshot(path) # has scrollbar
driver.find_element_by_tag_name('body').screenshot(path) # avoids scrollbar
driver.set_window_size(original_size['width'], original_size['height'])
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(total_width, total_height)
driver.save_screenshot("SomeName.png")
from selenium import webdriver
url = 'https://stackoverflow.com/'
path = '/path/to/save/in/scrape.png'
driver = webdriver.Chrome()
driver.get(url)
el = driver.find_element_by_tag_name('body')
el.screenshot(path)
driver.quit()
await driver.executeScript(`
document.documentElement.style.display = "table";
document.documentElement.style.width = "100%";
document.body.style.display = "table-row";
`);
await driver.findElement(By.css('body')).takeScreenshot();
URL = 'http://www.w3schools.com/js/default.asp'
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(options=options)
driver.get(URL)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll'+X)
driver.set_window_size(S('Width'),S('Height')) # May need manual adjustment
driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
driver.quit()
scheight = .1
while scheight < 9.9:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
scheight += .01
import os
from selenium import webdriver
from PIL import Image
def full_screenshot(driver: webdriver):
driver.execute_script(f"window.scrollTo({0}, {0})")
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
stitched_image = Image.new('RGB', (total_width, total_height))
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
file_name = "part_{0}.png".format(part)
driver.get_screenshot_as_file(file_name)
screenshot = Image.open(file_name)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
stitched_image.paste(screenshot, offset)
del screenshot
os.remove(file_name)
part = part + 1
previous = rectangle
return stitched_image
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def open_url(url):
options = Options()
options.headless = True
driver = webdriver.Chrome(chrome_options=options)
driver.maximize_window()
driver.get(url)
save_screenshot(driver, 'screen.png')
def save_screenshot(driver, file_name):
height, width = scroll_down(driver)
driver.set_window_size(width, height)
img_binary = driver.get_screenshot_as_png()
img = Image.open(BytesIO(img_binary))
img.save(file_name)
# print(file_name)
print(" screenshot saved ")
def scroll_down(driver):
total_width = driver.execute_script("return document.body.offsetWidth")
total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
viewport_width = driver.execute_script("return document.body.clientWidth")
viewport_height = driver.execute_script("return window.innerHeight")
rectangles = []
i = 0
while i < total_height:
ii = 0
top_height = i + viewport_height
if top_height > total_height:
top_height = total_height
while ii < total_width:
top_width = ii + viewport_width
if top_width > total_width:
top_width = total_width
rectangles.append((ii, i, top_width, top_height))
ii = ii + viewport_width
i = i + viewport_height
previous = None
part = 0
for rectangle in rectangles:
if not previous is None:
driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
time.sleep(0.5)
# time.sleep(0.2)
if rectangle[1] + viewport_height > total_height:
offset = (rectangle[0], total_height - viewport_height)
else:
offset = (rectangle[0], rectangle[1])
previous = rectangle
return (total_height, total_width)
open_url("https://www.medium.com")
#coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def test_fullpage_screenshot(self):
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get("yoururlxxx")
time.sleep(2)
#the element with longest height on page
ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')
total_height = ele.size["height"]+1000
driver.set_window_size(1920, total_height) #the trick
time.sleep(2)
driver.save_screenshot("screenshot1.png")
driver.quit()
if __name__ == "__main__":
test_fullpage_screenshot()
cd "enter the directory"
pip install Selenium-Screenshot
from Screenshot import Screenshot_Clipping
from selenium import webdriver
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://github.com/sam4u3/Selenium_Screenshot/tree/master/test"
driver.get(url)
# the line below makes taking & saving screenshots very easy.
img_url=ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
print(img_url)
driver.close()
driver.quit()
from Screenshot import Screenshot_Clipping
from selenium import webdriver
import time
ob = Screenshot_Clipping.Screenshot()
driver = webdriver.Chrome()
url = "https://www.bbc.com/news/world-asia-china-51108726"
driver.get(url)
time.sleep(1)
img_url = ob.full_Screenshot(driver, save_path=r'.', image_name='Myimage.png')
driver.close()
driver.quit()
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
service = Service('/your/path/to/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
driver.get('https://www.nytimes.com/')
driver.get_full_page_screenshot_as_file('example.png')
driver.close()
driver.get_full_page_screenshot_as_file
driver.save_full_page_screenshot
driver.get_full_page_screenshot_as_png
driver.get_full_page_screenshot_as_base64
import base64
...
page_rect = browser.driver.execute_cdp_cmd("Page.getLayoutMetrics", {})
screenshot = browser.driver.execute_cdp_cmd(
"Page.captureScreenshot",
{
"format": "png",
"captureBeyondViewport": True,
"clip": {
"width": page_rect["contentSize"]["width"],
"height": page_rect["contentSize"]["height"],
"x": 0,
"y": 0,
"scale": 1
}
})
with open(path, "wb") as file:
file.write(base64.urlsafe_b64decode(screenshot["data"]))