使用java从网站下载多个文件_Java_Html_File_Download

使用java从网站下载多个文件

java html file download

使用java从网站下载多个文件,java,html,file,download,Java,Html,File,Download,我正在使用代理登录到一个安全站点，希望能够将所有文件和文件夹下载到本地光盘上。这就是我目前所拥有的 EDIT-**当前，下面的代码将从给定的根目录开始，并下载所有子目录中的所有文件。。。很酷：）但是它没有复制我需要的文件夹结构。需要帮忙吗**编辑首先，我得到4个参数（因此可以在Linux上的cmd行上使用） 1）我要下载的目录的url 2）安全登录的用户名 3） psw 4）我希望文件保存在本地光盘上的目录 public class ApacheUrl4 { // this

我正在使用代理登录到一个安全站点，希望能够将所有文件和文件夹下载到本地光盘上。这就是我目前所拥有的

EDIT-**当前，下面的代码将从给定的根目录开始，并下载所有子目录中的所有文件。。。很酷：）但是它没有复制我需要的文件夹结构。需要帮忙吗**编辑

首先，我得到4个参数（因此可以在Linux上的cmd行上使用）

1）我要下载的目录的url 2）安全登录的用户名 3） psw 4）我希望文件保存在本地光盘上的目录

       public class ApacheUrl4
{
// this is the entry point for what I want the instase of the class to do
    public static void main(String args[]) throws Exception {

        String url  = args[0];
        final String username  = args[1];
        final String password1  = args[2];
        String directory  = args[3];

        checkArguments(args);

        ApacheUrl4 max = new ApacheUrl4();
        max.process(url, username, password1, directory);

    }
    public void process (String url, String username1, String password1, String directory) throws Exception {

        final char[] password  = password1.toCharArray();   
        final String username = username1;
         Authenticator.setDefault(new Authenticator(){
              protected  PasswordAuthentication  getPasswordAuthentication(){
               PasswordAuthentication p=new PasswordAuthentication(username , password);
               return p;
              }
             });


        BufferedInputStream in = null;
        BufferedInputStream in2 = null;
        FileOutputStream fout = null;
    // proxy 
        String proxyip = "000.000.000" ;
        int proxyport = 8080;
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyip, proxyport));
     // URL connection to file 
        URL file = new URL(url);
        URLConnection connection = file.openConnection(proxy);      
        ((HttpURLConnection)connection).getResponseCode();
        int reponsecode = ((HttpURLConnection)connection).getResponseCode();
        System.out.println("response code " + reponsecode);


        if (reponsecode == HttpURLConnection.HTTP_FORBIDDEN){
            System.out.println("Invalid username or psw");
            return;
        }
        if (reponsecode != HttpURLConnection.HTTP_OK){
            System.out.println("Unable to find response");
            return;
        }





        //Save the file into the chosen folder
        in = new BufferedInputStream(connection.getInputStream());

        //Create instance of DocumentBuilderFactory
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        //Get the DocumentBuilder
        DocumentBuilder docBuilder = factory.newDocumentBuilder();
        //Using existing XML Document
        Document doc = docBuilder.parse(in);

        //create the root element 
        Element root = doc.getDocumentElement();
        NodeList nodeList = root.getElementsByTagName("li"); 


        for(int i=0; i<nodeList.getLength(); i++){
          Node childNode = nodeList.item(i);
          if (childNode.getTextContent().contains("/")) {


            //  System.out.println(url + childNode.getTextContent());
                process(url + childNode.getTextContent(), username, password1, directory);                        

        }

    if (childNode.getTextContent().contains(".") && !childNode.getTextContent().contains("..")) {


            String textcon =  url + childNode.getTextContent();
            System.out.println("aaa " + textcon);

            if (url.endsWith("/")) {
                System.out.println("ends with a /");    
            }

            textcon = textcon.replace( " ", "%20");
            URL file2 = new URL(textcon);

            String[] urlparts = textcon.split("/");
            int urllength = urlparts.length;
            String lastarray = urlparts[urllength-2];
            System.out.println("last array " + lastarray);


            URLConnection connection2 = file2.openConnection(proxy);        
            in2 = new BufferedInputStream(connection2.getInputStream());
            String test2 = childNode.getTextContent();
            System.out.println("eeee " + childNode.getTextContent());

            String filename = (directory + test2 );
              File f=new File(filename);
                  if(f.isDirectory())
                  continue;





              //InputStream inputStream= new FileInputStream("InputStreamToFile.java");
              OutputStream out=new FileOutputStream(f);
              byte buf[]=new byte[1024];
              int len;
              while((len=in2.read(buf))>0)
              out.write(buf,0,len);
              out.close();
              in2.close();


        }
    }
}




    // this is part of the validation of arguments provided by user
    private static void checkArguments(String[] args) {
        while (args.length < 4 || args[0].isEmpty() || args.length > 4 ) {
                System.out.println("Please specify five arguments in the following format \n "  +
                " URL USERNAME PASWORD FILEPATH FILENAME " +
                "EG: \"java helloW http://www.google.com user_name password C:\\path/dir/ filename.exe\" ");
                System.exit(1);
         }
    }
}

公共类ApacheUrl4
{
//这是我希望类的instase执行的操作的入口点
公共静态void main（字符串args[]）引发异常{
字符串url=args[0]；
最终字符串username=args[1]；
最终字符串密码1=args[2]；
String directory=args[3]；
校验参数（args）；
ApacheUrl4 max=新的ApacheUrl4（）；
最大进程（url、用户名、密码1、目录）；
}
公共无效进程（字符串url、字符串用户名1、字符串密码1、字符串目录）引发异常{
final char[]password=password1.toCharArray（）；
最终字符串username=username1；
setDefault（新验证器（）{
受保护的密码身份验证getPasswordAuthentication（）{
PasswordAuthentication p=新的PasswordAuthentication（用户名、密码）；
返回p；
}
});
BufferedInputStream in=null；
BufferedInputStream in2=null；
FileOutputStream fout=null；
//代理
字符串proxyip=“000.000.000”；
int proxyport=8080；
代理代理=新代理（Proxy.Type.HTTP，新的InetSocketAddress（proxyip，proxyport））；
//文件的URL连接
URL文件=新URL（URL）；
URLConnection=file.openConnection（代理）；
（（HttpURLConnection）连接）；
int reponsecode=（（HttpURLConnection）connection.getResponseCode（）；
System.out.println（“响应代码”+响应代码）；
if（reponsecode==HttpURLConnection.HTTP\u禁止）{
System.out.println（“无效用户名或psw”）；
返回；
}
if（reponsecode！=HttpURLConnection.HTTP\u确定）{
System.out.println（“无法找到响应”）；
返回；
}
//将文件保存到所选文件夹中
in=新的BufferedInputStream（connection.getInputStream（））；
//创建DocumentBuilderFactory的实例
DocumentBuilderFactory工厂=DocumentBuilderFactory.newInstance（）；
//获取DocumentBuilder
DocumentBuilder docBuilder=factory.newDocumentBuilder（）；
//使用现有的XML文档
Document doc=docBuilder.parse（in）；
//创建根元素
元素根=doc.getDocumentElement（）；
NodeList NodeList=root.getElementsByTagName（“li”）；
对于（int i=0；i0）
out.write（buf，0，len）；
out.close（）；
in2.close（）；
}
}
}
//这是验证用户提供的参数的一部分
私有静态void checkArguments（字符串[]args）{
while（args.length<4 | | args[0].isEmpty（）| | args.length>4）{
System.out.println（“请按以下格式指定五个参数\n”+
“URL用户名PASWORD文件路径文件名”+
“例如：\”java helloWhttp://www.google.com 用户名密码C:\\path/dir/filename.exe\”；
系统出口（1）；
}
}
}

要下载目录中的文件，首先需要目录列表。如果允许，这将由服务器自动生成。首先，使用浏览器检查此特定服务器上是否存在这种情况

然后需要解析列表页面，并下载每个url。坏消息是这些页面没有标准。好消息是，互联网的大部分都托管在apache或IIS上，因此，如果您能够管理这两个，那么您已经了解了很好的部分

您可能只需要将文件解析为xml（xhtml）并使用xpath恢复所有URL就可以了。

您从中读取的服务器是否允许目录浏览？我的意思是，如果你用浏览器访问它，你会看到目录列表吗？谢谢Joeri，这帮我找到了正确的路径。我只需要一点帮助，了解如何在下载文件时创建文件夹。目前，下面的代码将从给定的根目录开始，并下载所有子目录中的所有文件。。。很酷：）但是它没有复制我需要的文件夹结构。有什么帮助吗？只需制作一个类似于

下载内容（URL源、文件目标）

的方法即可。如果在目录列表中需要子文件夹，请执行递归调用

下载内容（source+“/”+folderName，new File（target，folderName））

；