Java 如何逐个下载所有URL并保存在不同的文件夹中

Java 如何逐个下载所有URL并保存在不同的文件夹中,java,html,csv,selenium,download,Java,Html,Csv,Selenium,Download,我有一个html文件,其中保存了所有url(CSV文件的下载链接)。我想要一个工具/程序,必须逐个检查每个url并下载文件,然后将文件保存在指定文件夹中,该文件夹将写入同一html文件本身 html文件是一个包含3列的表 文件名、文件位置和下载URL Url将在打开新窗口(target=\u blank)后下载CSV文件。下载后,如果没有错误,它将自动关闭子窗口 我尝试过自动化(Selenium使用java) 但也存在以下挑战 它应该等到下载完成 有时url可能会显示错误,在这种情况下,它应该关

我有一个html文件,其中保存了所有url(CSV文件的下载链接)。我想要一个工具/程序,必须逐个检查每个url并下载文件,然后将文件保存在指定文件夹中,该文件夹将写入同一html文件本身

html文件是一个包含3列的表 文件名、文件位置和下载URL

Url将在打开新窗口(
target=\u blank
)后下载CSV文件。下载后,如果没有错误,它将自动关闭子窗口

我尝试过自动化(Selenium使用java)

但也存在以下挑战

  • 它应该等到下载完成
  • 有时url可能会显示错误,在这种情况下,它应该关闭子窗口并返回到父窗口
  • 我已经解决了第一个问题,通过保持一个观察者来检查文件是否每秒下载(通过计算文件夹中csv文件的数量)

    我可以切换到儿童窗口,检查是否有任何错误,但如果没有错误,我的司机就卡在那里了

    如何解决这个问题

    用于检查子窗口中是否存在错误的代码

       public boolean foundError(FirefoxDriver driver) {
        System.out.println(browser.getWindowHandle() + "Parent" + parentHandle);
        String child = "";
        int numberOfWindows = 0;
        //return true;
        if (driver.getWindowHandles().size() > 1) {
            for (String winHandle : driver.getWindowHandles()) {
                numberOfWindows++;
    
                if (!parentHandle.equals(winHandle)) {
                    child = winHandle;
                    System.out.println("Child" + winHandle);
                }
    
            }
        }
        if (numberOfWindows > 1) {
            System.out.println("tostring1" + driver.toString());
            if (!parentHandle.equals(child)) {
                driver.switchTo().window(child);
            }
            System.out.println("Switched to child");
            Set set = driver.getWindowHandles();
            System.out.println("Number of windows=" + set.size());
            //  System.out.println("Number of windows="+set.size()+"driver url"+driver.getCurrentUrl());
            //  System.out.println("tostring2"+driver.toString());
            try {
                // WebDriverWait wait1 = new WebDriverWait(driver, 5);
    
                System.out.println("Body text" + driver.findElementByTagName("body").getText());/////////////////////////////Here driver will get stuck
    
                //System.out.println("text"+driver.findElementByClassName("body").toString());
                //  List<WebElement> elements=driver.findElementsByClassName("ErrorBody");elements.size()>0
                if (!driver.findElementByTagName("body").getText().equals("")) {
                    driver.close();
                    driver.switchTo().window(parentHandle);
                    return true;
    
                }
                System.out.println("No error");
                driver.switchTo().window(parentHandle);
                System.out.println("Switched to parent");
    
            } catch (Exception e) {
                System.out.println("Error Catch block page time out:" + e);
                driver.switchTo().window(parentHandle);
                return false;
                //  driver.switchTo().window(parentHandle);
            }
        }
    
        return false;
    
    }
    
    public boolean foundError(FirefoxDriver){
    System.out.println(browser.getWindowHandle()+“Parent”+parentHandle);
    字符串child=“”;
    int numberOfWindows=0;
    //返回true;
    if(driver.getWindowHandles().size()>1){
    对于(字符串winHandle:driver.getWindowHandles()){
    numberOfWindows++;
    如果(!parentHandle.equals(winHandle)){
    child=winHandle;
    System.out.println(“Child”+winHandle);
    }
    }
    }
    如果(窗口数>1){
    System.out.println(“tostring1”+driver.toString());
    如果(!parentHandle.equals(子项)){
    driver.switchTo().窗口(子窗口);
    }
    System.out.println(“切换到子项”);
    Set=driver.getWindowHandles();
    System.out.println(“窗口数=“+set.size());
    //System.out.println(“窗口数=”+set.size()+“驱动程序url”+driver.getCurrentUrl());
    //System.out.println(“tostring2”+driver.toString());
    试一试{
    //WebDriverWait wait1=新的WebDriverWait(驱动程序,5);
    System.out.println(“正文文本”+driver.findElementByTagName(“正文”).getText());//这里的驱动程序将被卡住
    //System.out.println(“text”+driver.findElementByClassName(“body”).toString());
    //List elements=driver.findElementsByClassName(“ErrorBody”);elements.size()>0
    如果(!driver.findElementByTagName(“body”).getText()等于(“”){
    driver.close();
    driver.switchTo().window(parentHandle);
    返回true;
    }
    System.out.println(“无错误”);
    driver.switchTo().window(parentHandle);
    System.out.println(“切换到父级”);
    }捕获(例外e){
    System.out.println(“错误捕获块页面超时:+e”);
    driver.switchTo().window(parentHandle);
    返回false;
    //driver.switchTo().window(parentHandle);
    }
    }
    返回false;
    }
    
    我使用了不同的方法 使用Jsoup解析html文件并下载

        import java.io.File;
    import java.io.IOException;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    /*
     * To change this license header, choose License Headers in Project Properties.
     * To change this template file, choose Tools | Templates
     * and open the template in the editor.
     */
    /**
     *
     * @author nudanesh
     */
    public class URLDownload {
    
        private Document doc;
        String url = "", folder, file;
        private final File sourceFile;
        int i = 1;
        int r = 1, c = 1;
        int anchorCol = 3;
        Library lib;
    
        URLDownload() {
            lib = new Library();
            sourceFile = new File("Download.html");
            try {
    
                doc = Jsoup.parse(sourceFile, "UTF-8");
            } catch (IOException ex) {
                Logger.getLogger(URLDownload.class.getName()).log(Level.SEVERE, null, ex);
            }
            //Elements links = doc.select("a[href]");
            Elements rows = doc.select("tr");
            System.out.println("Size=" + rows.size());
            for (Element row : rows) {
    
    
                    Elements cols = row.getElementsByTag("td");
                    c = 1;
                    for (Element col : cols) {
                        System.out.println("Row"+r);
                        if (c == 1) {
                            file = col.text();//System.out.println("File in main"+file);
                        } else if (c == 2) {
                            folder = col.text();//System.out.println("Folder in main"+folder);
                        } else {
                            try {
                                url = col.getElementsByTag("a").attr("href");
                            } catch (Exception e) {
                                System.out.print("-");
                            }
                        }
    
                        c++;
                    }
                    if (!url.equals("")) {
                        lib.setLocation(file,folder);
                        lib.downloadFile(url);
                    }
                    url = "";
    
                i++;
                r++;
            }
        }
    
        public static void main(String arg[]) {
    
            new URLDownload();
        }
    }
    
    下面是库类文件

        import java.io.BufferedOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.io.UnsupportedEncodingException;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.nio.file.Files;
    import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Calendar;
    import java.util.Date;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import org.apache.poi.xssf.usermodel.XSSFCell;
    import org.apache.poi.xssf.usermodel.XSSFSheet;
    import org.apache.poi.xssf.usermodel.XSSFWorkbook;
    
    /*
     * To change this license header, choose License Headers in Project Properties.
     * To change this template file, choose Tools | Templates
     * and open the template in the editor.
     */
    /**
     *
     * @author nudanesh
     */
    public class Library  {
    
        boolean downloaded = false;
        Thread t;
        int waitTime = 0;
        String baseLoc = "";
        int size = 1024, ByteWritten = 0;
        URL url;
        URLConnection uCon = null;
        String folderLoc = "", file = "firstFile.csv";
        File loc;
        private OutputStream outStream;
        private InputStream is=null;
        private byte[] buf;
        private int ByteRead;
        private int FolderInUrl = 4;
        private boolean rootFolder = true;
        private File resultFile;
        private FileOutputStream fileResult;
        private XSSFWorkbook workbookResult;
        private XSSFSheet sheetResult;
        private int updateExcelRowNum = -1;
        private int updateExcelColNum = -1;
        String date;
        private int waitLimit = 900000;
    
        Library() {
            /*System.out.print(Calendar.getInstance().toString());
             Date d=new Date();
             String date=d.toString();
             System.out.println(date);*/
    
            //t = new Thread(this);
           // t.start();
    
            date = new SimpleDateFormat("yyyy_MM_dd_HH_mm_ss").format(Calendar.getInstance().getTime());
            System.out.print(date);
            baseLoc = date + "/";
            WriteDataToExcel();
            baseLoc += "Business Reports/";
            createRowExcel(updateExcelRowNum);
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Report Name");
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Path");
            updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Status");
            updateExcel();
        }
    
        public void setLocation(String a, String b) {
            file = a;
            file += ".csv";
            folderLoc = baseLoc + getFolderPath(b);
    
       // System.out.println("File Name: "+file);
            // System.out.println("Folder loc: "+folderLoc);
        }
    
        public String getFolderPath(String b) {
            String path = "";
            try {
                System.out.println("path" + b);
                path = b;
                // path = java.net.URLDecoder.decode(b, "UTF-8");
                String p[] = path.split("/");
                path = "";
                for (int i = FolderInUrl; i < p.length - 1; i++) {
                    rootFolder = false;
                    p[i] = removeSpacesAtEnd(p[i]);
    
                    path = path + p[i] + "/";
                }
    
            } catch (Exception ex) {
                Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
            }
            return path;
        }
    
        public void downloadFile(String urlString) {
            // System.out.println("Started");
            try {
                url = new URL(urlString);
            } catch (MalformedURLException ex) {
                Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
            }
            try {
                loc = new File(folderLoc);
                if (!loc.exists()) {
                    loc.mkdirs();
                }
    
    
                outStream = new BufferedOutputStream(new FileOutputStream(folderLoc + file));
                uCon = url.openConnection();
            uCon.setReadTimeout(waitLimit);
                    is = uCon.getInputStream();
                   downloaded=true;
                buf = new byte[size];
    
                while ((ByteRead = is.read(buf)) != -1) {
                    System.out.println("while executing" + ByteRead);
                    outStream.write(buf, 0, ByteRead);
                    ByteWritten += ByteRead;
                }
    
                //System.out.println("Downloaded" + ByteWritten);
                resetCounters();
                createRowExcel(updateExcelRowNum);
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
                if (ByteWritten < 1000) {
                    updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
                } else {
                    updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Downloaded ");
                }
                updateExcel();
            } catch (Exception e) {
                System.out.println("error catch" + e);
                resetCounters();
                createRowExcel(updateExcelRowNum);
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, file);
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, folderLoc);
                updateRowColExcel(updateExcelRowNum, updateExcelColNum, "Rejected the Download after waiting " + (waitLimit / 60000) + " minutes");
                updateExcel();
                waitTime = 0;
            } finally {
                try {
                    System.out.println("Error in streams");
                    if(downloaded)
                    is.close();
                    outStream.close();
                    downloaded= false;
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
        public void moveToFolder(String reportName, String path) {
    
            try {
                File repo = new File(folderLoc + "/" + reportName + ".csv");
                path = folderLoc + "/" + path;
                File pathFolder = new File(path);
                if (!pathFolder.exists()) {
                    pathFolder.mkdirs();
                }
                pathFolder = new File(path + reportName + ".csv");
                System.out.println("Path=" + pathFolder.getAbsolutePath() + "\nReport path=" + repo.getAbsolutePath());
                System.out.println("Source" + repo.getAbsolutePath());
    
                //System.out.println("Status" + repo.renameTo(new File(pathFolder.getAbsolutePath())));
                System.out.println("Status" + Files.move(repo.toPath(), new File(pathFolder.getAbsolutePath()).toPath(), REPLACE_EXISTING));
    //Files.
    
            } catch (Exception e) {
                System.out.println("error while moving" + e);
            }
    
        }
    
        public String changeSpecialCharacters(String report) {
    
            report = report.replaceAll(":", "_");
            return report;
        }
    
        public String removeSpacesAtEnd(String inputPath) {
    
            for (int i = inputPath.length() - 1; i >= 0; i--) {
                if (inputPath.charAt(i) != ' ') {
                    break;
                } else {
                    System.out.println("Before string is" + inputPath);
                    inputPath = inputPath.substring(0, i);
                    System.out.println("AFter string is" + inputPath);
                }
            }
    
            return inputPath;
        }
    
        public void WriteDataToExcel() {
    
            try {
                // file = new FileInputStream(new File("config.xlsx"));
    
                //   File resultFolder = new File("Results");
                //   if (resultFolder.exists()) {
                //       deleteDirectory(resultFolder);
                //   }
                // resultFolder.mkdirs();
                if (!new File(baseLoc).exists()) {
                    new File(baseLoc).mkdirs();
                }
                resultFile = new File(baseLoc + "Reports info " + date + ".xlsx");
                System.out.println("Path" + resultFile.getAbsolutePath());
                resultFile.createNewFile();
                // rFilePath = resultFile.getAbsolutePath();
    
                fileResult = new FileOutputStream(resultFile);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
    
            //Get the workbook instance for XLS file
            //      System.out.println("file success");
            XSSFWorkbook workbook = null;
    
            try {
    
                workbookResult = new XSSFWorkbook();
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            System.out.println("Opening the browser");
            //Get first sheet from the workbook
    
            sheetResult = workbookResult.createSheet();
            //sheetResult.set
            //Get iterator to all the rows in current sheet
    
            //Get iterator to all cells of current row
            //ar.add(folderLocation);
            //  ar.add(firefoxProfileLocation);
        }
    
        public void updateExcel() {
            try {
                //fileResult.close();
    
                fileResult = new FileOutputStream(resultFile);
                workbookResult.write(fileResult);
                fileResult.close();
            } catch (Exception e) {
                System.out.println(e);
            }
    
        }
    
        public void createRowExcel(int num) {
            updateExcelRowNum++;
            num = updateExcelRowNum;
            sheetResult.createRow(num);
    
        }
    
        public void updateRowColExcel(int rnum, int cnum, String value) {
            updateExcelColNum++;
            cnum = updateExcelColNum;
            sheetResult.getRow(rnum).createCell(cnum);
            XSSFCell cell = sheetResult.getRow(rnum).getCell(cnum);
            cell.setCellValue(value);
    
        }
    
        public void updateColumn(int rnum, int cnum, String value) {
            XSSFCell cell = sheetResult.getRow(rnum).getCell(cnum);
            cell.setCellValue(value);
    
        }
    
        public void resetCounters() {
            updateExcelColNum = -1;
    
        }
    
     /*   @Override
        public void run() {
            while (true) {
                if (true) {
                    waitTime += 1000;
    System.out.println(waitTime);
                    if (waitTime > waitLimit) {
                        try {
                            is.close();
                            outStream.close();
                        //downloaded=false;
                            // cancelDownload=true;
                        } catch (Exception ex) {
                            Logger.getLogger(Library.class.getName()).log(Level.SEVERE, null, ex);
                        }
    
                    }
                }
                try {
                    Thread.sleep(1000);
                } catch (Exception e) {
                }
    
            }
        }*/
    
    }
    
    import java.io.BufferedOutputStream;
    导入java.io.File;
    导入java.io.FileNotFoundException;
    导入java.io.FileOutputStream;
    导入java.io.IOException;
    导入java.io.InputStream;
    导入java.io.OutputStream;
    导入java.io.UnsupportedEncodingException;
    导入java.net.MalformedURLException;
    导入java.net.URL;
    导入java.net.URLConnection;
    导入java.nio.file.Files;
    导入静态java.nio.file.StandardCopyOption.REPLACE_EXISTING;
    导入java.text.simpleDataFormat;
    导入java.util.ArrayList;
    导入java.util.Calendar;
    导入java.util.Date;
    导入java.util.logging.Level;
    导入java.util.logging.Logger;
    导入org.apache.poi.xssf.usermodel.XSSFCell;
    导入org.apache.poi.xssf.usermodel.xssfheet;
    导入org.apache.poi.xssf.usermodel.xssf工作簿;
    /*
    *要更改此许可证标题,请在“项目属性”中选择“许可证标题”。
    *要更改此模板文件,请选择工具|模板
    *然后在编辑器中打开模板。
    */
    /**
    *
    *@作者努达尼什
    */
    公共班级图书馆{
    布尔值=假;
    螺纹t;
    int waitTime=0;
    字符串baseLoc=“”;
    int size=1024,字节写入=0;
    网址;
    URLConnection=null;
    字符串folderLoc=“”,file=“firstFile.csv”;
    文件loc;
    私有输出流外流;
    私有InputStream为空;
    专用字节[]buf;
    私家车;
    私有int FolderInUrl=4;
    private boolean rootFolder=true;
    私有文件结果文件;
    私有文件输出流文件结果;
    私有XSSFWorkbook工作簿结果;
    私有XSSF表结果;
    private int updateExcelRowNum=-1;
    private int updateExcelColNum=-1;
    字符串日期;
    私人有限公司=900000;
    图书馆(){
    /*System.out.print(Calendar.getInstance().toString());
    日期d=新日期();
    字符串日期=d.toString();
    系统输出打印项次(日期)*/
    //t=新螺纹(本螺纹);
    //t.开始();
    日期=新的SimpleDataFormat(“yyyy\u MM\u dd\u HH\u MM\u ss”).格式(Calendar.getInstance().getTime());
    系统输出打印(日期);
    baseLoc=日期+“/”;
    WriteDataoExcel();
    baseLoc+=“业务报告/”;
    createRowExcel(updateExcelRowNum);
    updateRowColExcel(updateExcelRowNum,updateExcelColNum,“报告名称”);
    updateRowColExcel(updateExcelRowNum,updateExcelColNum,“路径”);
    updateRowColExcel(updateExcelRowNum,updateExcelColNum,“状态”);
    updateExcel();
    }
    公共场所