Python 使用Selenium获取BLOB url时出错
我尝试通过脚本注入在Python中使用Selenium获取存储在内存中的blob内容 代码如下:Python 使用Selenium获取BLOB url时出错,python,selenium,Python,Selenium,我尝试通过脚本注入在Python中使用Selenium获取存储在内存中的blob内容 代码如下: from selenium import webdriver import base64 from bs4 import BeautifulSoup def download_blob(driver, uri): result = driver.execute_async_script(""" var uri = arguments[0]; var call
from selenium import webdriver
import base64
from bs4 import BeautifulSoup
def download_blob(driver, uri):
result = driver.execute_async_script("""
var uri = arguments[0];
var callback = arguments[arguments.length-1];
var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer';
xhr.onload = function(){ callback(toBase64(xhr.response)) };
xhr.onerror = function(){ callback(xhr.status) };
xhr.open('GET', uri);
xhr.send();
""", uri)
print(uri, result)
if type(result) == int :
raise Exception("Request failed with status %s" % result)
return base64.b64decode(result)
options = webdriver.ChromeOptions()
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36')
driver = webdriver.Chrome(options=options)
url = 'https://www.youtube.com/watch?v=KBtk5FUeJbk'
driver.get(url)
html = driver.page_source
soup = BeautifulSoup(html, 'html5lib')
blob_url = soup.find('video', attrs={'class': 'video-stream html5-main-video'})['src']
byte_stream = download_blob(driver, blob_url)
我甚至尝试过改变网站,因为我认为YouTube可能会有一些关于删除内容的严格政策,但仍然没有成功。其他所有网站都给出了同样的回答
也欢迎您深入了解一些JavaScript替代方案。各位,有什么线索吗?各位,有什么线索吗?
blob:https://www.youtube.com/5e3f1fab-3839-45a1-bb62-3582635b9e7d 0
Traceback (most recent call last):
File "C:\Users\*****\Desktop\blob-download.py", line 32, in <module>
byte_stream = download_blob(driver, blob_url)
File "C:\Users\*****\Desktop\blob-download.py", line 20, in download_blob
raise Exception("Request failed with status %s" % result)
Exception: Request failed with status 0
byte_stream = download_blob(driver, 'blob:https://www.youtube.com/5e3f1fab-3839-45a1-bb62-3582635b9e7d') # this would definitely not work