Python、Selenium、Firefox:Force PDF下载
例如: 使用Selenium,我单击第一个表单497。在我的浏览器中,将打开pdf的新选项卡。在硒中,似乎什么都没有发生 这是我的代码,其中有些部分是经过编辑的Python、Selenium、Firefox:Force PDF下载,python,selenium,firefox,Python,Selenium,Firefox,例如: 使用Selenium,我单击第一个表单497。在我的浏览器中,将打开pdf的新选项卡。在硒中,似乎什么都没有发生 这是我的代码,其中有些部分是经过编辑的 def scrape(session_key=None): options = Options() options.headless = True profile = webdriver.FirefoxProfile() profile.set_preference("browser.dow
def scrape(session_key=None):
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.dir", os.path.join(base_dir, 'reports'))
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.helperApps.alwaysAsk.force", False);
profile.set_preference("browser.download.manager.showAlertOnComplete", False)
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference('browser.helperApps.neverAsk.saveToDisk','application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip,application/x-rar-compressed, application/octet-stream,application/msword,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/rtf,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-excel,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/xls,application/msword,text/csv,application/vnd.ms-excel.sheet.binary.macroEnabled.12,text/plain,text/csv/xls/xlsb,application/csv,application/download,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/octet-stream')
profile.set_preference("pdfjs.disabled", True)
profile.set_preference("plugin.disable_full_page_plugin_for_types", "application/pdf")
driver = webdriver.Firefox(firefox_profile=profile, options=options)
driver.get(magic_url)
committee_table = driver.find_elements_by_css_selector('table')[2]
links = [link.get_attribute('href') for link in committee_table.find_elements_by_tag_name('a')]
driver.get('https://apps1.lavote.net/camp/comm.cfm?&cid=82')
forms_table = driver.find_elements_by_css_selector('table')[1]
forms_table_trs = forms_table.find_elements_by_css_selector('tr')
for i, row in enumerate(forms_table_trs):
if i > 0:
cells = row.find_elements_by_css_selector('td')
print(1)
try:
link = cells[2].find_elements_by_tag_name('a')[0]
link.click()
pdfs = glob.glob(os.path.join(base_dir, 'scraper/*.pdf'))
latest_pdf_file = max(pdfs, key=os.path.getctime)
parse_funcs[form_type](latest_pdf_file)
except Exception as e:
print(e)
正如您可能已经猜到的,没有PDF。它们没有下载。这就是我来这里的原因。我如何才能做到这一点?如果您只需要文件,而不需要测试实际的浏览器对话例程,请使用Python获取文件,而不是要求Selenium这样做 从页面获取PDF URL,然后使用request将文件下载到内存中,然后打开().write()将其保存到文件系统
req = requests.get(url, allow_redirects=True)
open(filename, 'wb').write(r.content)
您也可以从r获取文件名,但这有点麻烦。请在此处查看: