Download 使用HtmlUnit下载文件
我正在尝试下载一个网站的xls文件。当我点击链接下载文件时,我会看到一个javascript确认框。我是这样处理的Download 使用HtmlUnit下载文件,download,htmlunit,Download,Htmlunit,我正在尝试下载一个网站的xls文件。当我点击链接下载文件时,我会看到一个javascript确认框。我是这样处理的 ConfirmHandler okHandler = new ConfirmHandler(){ public boolean handleConfirm(Page page, String message) { return true; } }; webClient.s
ConfirmHandler okHandler = new ConfirmHandler(){
public boolean handleConfirm(Page page, String message) {
return true;
}
};
webClient.setConfirmHandler(okHandler);
有一个下载文件的链接
<a href="./my_file.php?mode=xls&w=d2hlcmUgc2VsbElkPSd3b3JsZGNvbScgYW5kIHN0YXR1cz0nV0FJVERFTEknIGFuZCBkYXRlIDw9IC0xMzQ4MTUzMjAwICBhbmQgZGF0ZSA%2BPSAtMTM1MDgzMTU5OSA%3D" target="actionFrame" onclick="return confirm('Do you want do download XLS file?')"><u>Download</u></a>
已执行handeConfirm()方法。但我不知道如何从服务器保存文件流。我试图用下面的代码查看流
anchor.click().getWebResponse().getContentAsString();
InputStream xls = event.getWebWindow().getEnclosedPage().getWebResponse().getContentAsStream();
但是,结果与第x页相同。有人知道如何从服务器捕获流吗?谢谢。我找到了一种使用WebWindowListener获取InputStream的方法。在webWindowContentChanged(WebWindowEvent事件)的内部,我将代码放在下面
anchor.click().getWebResponse().getContentAsString();
InputStream xls = event.getWebWindow().getEnclosedPage().getWebResponse().getContentAsStream();
在我得到xls后,我可以将文件保存到硬盘中 我是根据你的帖子写的。。注意:您可以更改仅下载特定类型文件的内容类型条件。例如(application/octect流、application/pdf等)
package net.s4bdigital.export.main;
导入java.io.File;
导入java.io.FileOutputStream;
导入java.io.IOException;
导入java.io.InputStream;
导入java.io.OutputStream;
导入java.util.List;
导入org.junit.Before;
导入org.junit.Test;
导入org.openqa.selenium.By;
导入org.openqa.selenium.WebDriver;
导入org.openqa.selenium.htmlunit.HtmlUnitDriver;
导入com.gargoylesoftware.htmlunit.ConfirmHandler;
导入com.gargoylesoftware.htmlunit.Page;
导入com.gargoylesoftware.htmlunit.WebClient;
导入com.gargoylesoftware.htmlunit.WebResponse;
导入com.gargoylesoftware.htmlunit.WebWindowEvent;
导入com.gargoylesoftware.htmlunit.WebWindowListener;
导入com.gargoylesoftware.htmlunit.util.NameValuePair;
公共类HtmlUnitDownloadFile{
受保护的字符串baseUrl;
受保护的静态WebDriver;
@以前
public void openBrowser(){
baseUrl=”http://localhost/teste.html";
driver=新的CustomHtmlUnitDriver();
((HtmlUnitDriver)driver).setJavascriptEnabled(true);
}
@试验
public void downloadAFile()引发异常{
get(baseUrl);
driver.findElement(By.linkText(“单击以下载文件”))。单击();
}
公共类CustomHtmlUnitDriver扩展HtmlUnitDriver{
//这就是神奇之处。保留对客户端实例的引用
受保护的WebClient modifyWebClient(WebClient客户端){
ConfirmHandler-okHandler=新ConfirmHandler(){
公共布尔HandleConfig(页面,字符串消息){
返回true;
}
};
client.setConfirmHandler(okHandler);
client.addWebWindowListener(新的WebWindowListener(){
公共无效WebWindowOpen(WebWindowEvent事件){
//TODO自动生成的方法存根
}
public void webWindowContentChanged(WebWindowEvent事件){
WebResponse=event.getWebWindow().getEnclosedPage().getWebResponse();
System.out.println(response.getLoadTime());
System.out.println(response.getStatusCode());
System.out.println(response.getContentType());
List headers=response.getResponseHeaders();
for(NameValuePair标题:标题){
System.out.println(header.getName()+“:”+header.getValue());
}
//更改或添加所需内容类型的条件
//像文件一样接收。
if(response.getContentType().equals(“text/plain”)){
getFileResponse(响应,“target/testDownload.war”);
}
}
公共作废webWindowClosed(WebWindowEvent事件){
}
});
返回客户;
}
}
公共静态void getFileResponse(WebResponse响应,字符串文件名){
InputStream InputStream=null;
//将inputStream写入FileOutputStream
OutputStream OutputStream=null;
试试{
inputStream=response.getContentAsStream();
//将inputStream写入FileOutputStream
outputStream=新文件outputStream(新文件(文件名));
int read=0;
字节[]字节=新字节[1024];
而((read=inputStream.read(bytes))!=-1){
outputStream.write(字节,0,读取);
}
System.out.println(“完成!”);
}捕获(IOE异常){
e、 printStackTrace();
}最后{
如果(inputStream!=null){
试一试{
inputStream.close();
}捕获(IOE异常){
e、 printStackTrace();
}
}
if(outputStream!=null){
试一试{
//outputStream.flush();
outputStream.close();
}捕获(IOE异常){
e、 printStackTrace();
}
}
}
}
}
如果你不想用硒来包装HtmlUnit,有一种更简单的方法。只需为HtmlUnit的WebClient提供扩展的WebWindowListener
您还可以使用ApacheCommons.io方便地进行流复制
WebClient webClient = new WebClient();
webClient.addWebWindowListener(new WebWindowListener() {
public void webWindowOpened(WebWindowEvent event) { }
public void webWindowContentChanged(WebWindowEvent event) {
// Change or add conditions for content-types that you would
// to like receive like a file.
if (response.getContentType().equals("text/plain")) {
try {
IOUtils.copy(response.getContentAsStream(), new FileOutputStream("downloaded_file"));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void webWindowClosed(WebWindowEvent event) {}
});
找出下载URL,并在列表中删除它。通过下载url,我们可以使用此代码获取整个文件
试试看{
String path=“您的目的地路径”;
List downloadfiles=(List)page.getByXPath(“要刮取的标记”);
if(downloadfiles.isEmpty()){
System.out.println(“未找到任何项!”);
}否则{
用于(HtmlElement htmlItem:downloadfiles){
String DownloadURL=htmlItem.getHrefAttribute();
PageInvoicePDF=client.getPage(下载URL);
if(invoicePdf.getWebResponse().getContentType().equals(“应用程序/pdf”)){
final WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setTimeout(2000);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.waitForBackgroundJavaScript(2000);
//get General page
final HtmlPage page = webClient.getPage("http://your");
//get Frame
final HtmlPage frame = ((HtmlPage)
page.getFrameByName("Frame").getEnclosedPage());
webClient.setConfirmHandler(new ConfirmHandler() {
public boolean handleConfirm(Page page, String message) {
return true;
}
});
//get element file
final DomElement file = mainFrame.getElementByName("File");
final InputStream xls = file.click().getWebResponse().getContentAsStream();
assertNotNull(xls);
}
try{
String path = "your destination path";
List<HtmlElement> downloadfiles = (List<HtmlElement>) page.getByXPath("the tag you want to scrape");
if (downloadfiles.isEmpty()) {
System.out.println("No items found !");
} else {
for (HtmlElement htmlItem : downloadfiles) {
String DownloadURL = htmlItem.getHrefAttribute();
Page invoicePdf = client.getPage(DownloadURL);
if (invoicePdf.getWebResponse().getContentType().equals("application/pdf")) {
System.out.println("creatign PDF:");
IOUtils.copy(invoicePdf.getWebResponse().getContentAsStream(),
new FileOutputStream(path + "file name"));
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
public static void prepareForDownloadingFile(WebClient webClient, File output) {
webClient.addWebWindowListener(new WebWindowListener() {
public void webWindowOpened(WebWindowEvent event) {
}
public void webWindowContentChanged(WebWindowEvent event) {
Page page = event.getNewPage();
FileOutputStream fos = null;
InputStream is = null;
if (page != null && page instanceof UnexpectedPage) {
try {
fos = new FileOutputStream(output);
UnexpectedPage uPage = (UnexpectedPage) page;
is = uPage.getInputStream();
IOUtils.copy(is, fos);
webClient.removeWebWindowListener(this);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (fos != null)
fos.close();
if (is != null)
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void webWindowClosed(WebWindowEvent event) {
}
});
}