Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/401.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/excel/25.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/joomla/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 如何使用POI从excel导出嵌入文件?_Java_Excel_Apache Poi_Ole_Embedding - Fatal编程技术网

Java 如何使用POI从excel导出嵌入文件?

Java 如何使用POI从excel导出嵌入文件?,java,excel,apache-poi,ole,embedding,Java,Excel,Apache Poi,Ole,Embedding,我已经编写了一个java basic程序,它正在使用ApachePOI制作嵌入Excel工作表中的3种文件(ppt、doc、txt)。现在,我要以原始格式导出此文件。如何做到这一点 参考链接是。 我已经从这个链接制作了一个程序 简而言之,我想在嵌入式文件上导出功能 我使用下面的代码尝试了上述问题,但它不适用于导出excel工作表中的嵌入文件: 以下是试图解决的代码: public static void main(String[] args) throws IOException { S

我已经编写了一个java basic程序,它正在使用ApachePOI制作嵌入Excel工作表中的3种文件(ppt、doc、txt)。现在,我要以原始格式导出此文件。如何做到这一点

参考链接是。 我已经从这个链接制作了一个程序

简而言之,我想在嵌入式文件上导出功能

我使用下面的代码尝试了上述问题,但它不适用于导出excel工作表中的嵌入文件:

以下是试图解决的代码:

public static void main(String[] args) throws IOException {
    String fileName = "ole_ppt_in_xls.xls";
    ReadExcel(fileName);
}

 public static void ReadExcel(String fileName) throws IOException {
    FileInputStream inputFileStream = new FileInputStream(fileName);

    POIFSFileSystem fs = new POIFSFileSystem(inputFileStream);
    HSSFWorkbook workbook = new HSSFWorkbook(fs);

    for (HSSFObjectData obj : workbook.getAllEmbeddedObjects()) {
        // the OLE2 Class Name of the object
        String oleName = obj.getOLE2ClassName();
        System.out.println(oleName);
        if (oleName.equals("Worksheet")) {
            System.out.println("Worksheet");
            DirectoryNode dn = (DirectoryNode) obj.getDirectory();
            HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(dn, fs, false);

        } else if (oleName.equals("Document")) {
            System.out.println("Document");
            DirectoryNode dn = (DirectoryNode) obj.getDirectory();
            HWPFDocument embeddedWordDocument = new HWPFDocument(dn, fs);
        } else if (oleName.equals("Presentation")) {
            System.out.println("Presentation");
            DirectoryNode dn = (DirectoryNode) obj.getDirectory();
            SlideShow embeddedPowerPointDocument = new SlideShow(
                    new HSLFSlideShow(dn, fs));
        } else if (oleName.equals("Presentation")) {
            System.out.println("Presentation");
            DirectoryNode dn = (DirectoryNode) obj.getDirectory();
            SlideShow embeddedPowerPointDocument = new SlideShow(
                    new HSLFSlideShow(dn, fs));
        }else {
            System.out.println("Else part ");
            if (obj.hasDirectoryEntry()) {
                System.out.println("obj.hasDirectoryEntry()"+obj.hasDirectoryEntry());
                // The DirectoryEntry is a DocumentNode. Examine its entries

                DirectoryNode dn = (DirectoryNode) obj.getDirectory();
                for (Iterator entries = dn.getEntries(); entries.hasNext();) {
                    Entry entry = (Entry) entries.next();
                    System.out.println(oleName + "." + entry.getName());
                }
            } else {
                System.out.println("Else part 22");
                byte[] objectData = obj.getObjectData();
            }
        }
    }

}
上述程序的输出屏幕:


那么,如何实现导出功能呢

这部分是的副本,我已经为其编写了原件

根据请求,我还添加了一个示例,说明如何在a的帮助下添加和嵌入-同时,我还将代码添加到POI中,因此现在更容易了。对于基于OOXML的文件,请查看

因此,代码会遍历DrawingParator的所有形状,并提取图片和嵌入的文件

我已经在这个答案中添加了完整的代码,而不是一个片段,因为我预计下一个“为什么我不能导出这种嵌入”很快就会出现

package poijartest;

import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.imageio.ImageIO;

import org.apache.poi.ddf.EscherComplexProperty;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherProperty;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
import org.apache.poi.hssf.usermodel.HSSFObjectData;
import org.apache.poi.hssf.usermodel.HSSFPatriarch;
import org.apache.poi.hssf.usermodel.HSSFPicture;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFShape;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.Ole10NativeException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.usermodel.AutoShape;
import org.apache.poi.sl.usermodel.ShapeType;
import org.apache.poi.sl.usermodel.Slide;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFPicture;
import org.apache.poi.xssf.usermodel.XSSFPictureData;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTPicture;

/**
 * Tested with POI 3.16-beta1
 * 
 * 17.12.2014: original version for
 *    http://apache-poi.1045710.n5.nabble.com/How-to-get-the-full-file-name-of-a-picture-in-xls-file-td5717205.html
 * 
 * 17.12.2016: added sample/dummy data for
 *    https://stackoverflow.com/questions/41101012/how-to-export-embeded-file-which-from-excel-using-poi 
 */
public class EmbeddedReader {

    private File excel_file;
    private ImageReader image_reader;

    public static void main(String[] args) throws Exception {
        File sample = new File("bla.xls");
        getSampleEmbedded(sample);
        ImageReader ir = new ImageReader(sample);

        for (EmbeddedData ed : ir.embeddings) {
            System.out.println(ed.filename);
            FileOutputStream fos = new FileOutputStream(ed.filename);
            IOUtils.copy(ed.is, fos);
            fos.close();
        }

        ir.close();
    }

    static void getSampleEmbedded(File sample) throws IOException {
        HSSFWorkbook wb = new HSSFWorkbook();
        int storageId = wb.addOlePackage(getSamplePPT(), "dummy.ppt", "dummy.ppt", "dummy.ppt");
        int picId = wb.addPicture(getSamplePng(), HSSFPicture.PICTURE_TYPE_PNG);
        HSSFSheet sheet = wb.createSheet();
        HSSFPatriarch pat = sheet.createDrawingPatriarch();
        HSSFClientAnchor anc = pat.createAnchor(0, 0, 0, 0, 1, 1, 3, 6);
        HSSFObjectData od = pat.createObjectData(anc, storageId, picId);
        od.setNoFill(true);
        wb.write(sample);
        wb.close();
    }

    static byte[] getSamplePng() throws IOException {
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        URL imgUrl = cl.getResource("javax/swing/plaf/metal/icons/ocean/directory.gif");
        BufferedImage img = ImageIO.read(imgUrl);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        ImageIO.write(img, "PNG", bos);
        return bos.toByteArray();
    }

    static byte[] getSamplePPT() throws IOException {
        HSLFSlideShow ppt = new HSLFSlideShow();
        Slide<?,?> slide = ppt.createSlide();

        AutoShape<?,?> sh1 = slide.createAutoShape();
        sh1.setShapeType(ShapeType.STAR_32);
        sh1.setAnchor(new java.awt.Rectangle(50, 50, 100, 200));
        sh1.setFillColor(Color.red);

        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        ppt.write(bos);
        ppt.close();

        POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(bos.toByteArray()));
        poifs.getRoot().setStorageClsid(ClassID.PPT_SHOW);

        bos.reset();
        poifs.writeFilesystem(bos);
        poifs.close();

        return bos.toByteArray();
    }

    public EmbeddedReader(String excel_path) throws IOException {
        excel_file = new File(excel_path);
        image_reader = new ImageReader(excel_file);
    }

    public String[] get_file_names() {
        ArrayList<String> file_names = new ArrayList<String>();
        for (EmbeddedData ed : image_reader.embeddings) {
            file_names.add(ed.filename);
        }
        return file_names.toArray(new String[file_names.size()]);
    }

    public InputStream get_stream(String file_name) {
        InputStream input_stream = null;
        for (EmbeddedData ed : image_reader.embeddings) {
            if(file_name.equals(ed.filename)) {
                input_stream = ed.is;
                break;
            }
        }
        return input_stream;
    }

    static class ImageReader implements Closeable {
        EmbeddedExtractor extractors[] = {
            new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor()
        };

        List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
        Workbook wb;

        public ImageReader(File excelfile) throws IOException {
            try {
                wb = WorkbookFactory.create(excelfile);
                Sheet receiptImages = wb.getSheet("Receipt images");
                if (wb instanceof XSSFWorkbook) {
                    addSheetPicsAndEmbedds((XSSFSheet)receiptImages);
                } else {
                    addAllEmbedds((HSSFWorkbook)wb);
                    addSheetPics((HSSFSheet)receiptImages);
                }
            } catch (Exception e) {
                // todo: error handling
            }
        }

        protected void addSheetPicsAndEmbedds(XSSFSheet sheet) throws IOException {
            if (sheet == null) return;
            XSSFDrawing draw = sheet.createDrawingPatriarch();
            for (XSSFShape shape : draw.getShapes()) {
                if (!(shape instanceof XSSFPicture)) continue;
                XSSFPicture picture = (XSSFPicture)shape;
                XSSFPictureData pd = picture.getPictureData();
                PackagePart pp = pd.getPackagePart();
                CTPicture ctPic = picture.getCTPicture();
                String filename = null;
                try {
                    filename = ctPic.getNvPicPr().getCNvPr().getName();
                } catch (Exception e) {}
                if (filename == null || "".equals(filename)) {
                    filename = new File(pp.getPartName().toString()).getName();
                }
                EmbeddedData ed = new EmbeddedData();
                ed.filename = fileNameWithoutPath(filename);
                ed.is = pp.getInputStream();
                embeddings.add(ed);
            }
        }

        protected void addAllEmbedds(HSSFWorkbook hwb) throws IOException {
            for (HSSFObjectData od : hwb.getAllEmbeddedObjects()) {
                String alternativeName = getAlternativeName(od);
                if (od.hasDirectoryEntry()) {
                    DirectoryNode src = (DirectoryNode)od.getDirectory();
                    for (EmbeddedExtractor ee : extractors) {
                        if (ee.canExtract(src)) {
                            EmbeddedData ed = ee.extract(src);
                            if (ed.filename == null || ed.filename.startsWith("MBD") || alternativeName != null) {
                                ed.filename = alternativeName;
                            }
                            ed.filename = fileNameWithoutPath(ed.filename);
                            ed.source = "object";
                            embeddings.add(ed);
                            break;
                        }
                    }
                }
            }
        }

        protected String getAlternativeName(HSSFShape shape) {
            EscherOptRecord eor = reflectEscherOptRecord(shape);
            if (eor == null) return null;
            for (EscherProperty ep : eor.getEscherProperties()) {
                if ("groupshape.shapename".equals(ep.getName()) && ep.isComplex()) {
                    return new String(((EscherComplexProperty)ep).getComplexData(),
                            Charset.forName("UTF-16LE"));
                }
            }
            return null;
        }

        protected void addSheetPics(HSSFSheet sheet) {
            if (sheet == null) return;
            int picIdx=0;
            int emfIdx = 0;
            HSSFPatriarch patriarch = sheet.getDrawingPatriarch();
            if (patriarch == null) return;
            // Loop through the objects
            for (HSSFShape shape : patriarch.getChildren()) {
                if (!(shape instanceof HSSFPicture)) {
                    continue;
                }
                HSSFPicture picture = (HSSFPicture) shape;
                if (picture.getShapeType() != HSSFSimpleShape.OBJECT_TYPE_PICTURE) continue;
                HSSFPictureData pd = picture.getPictureData();
                byte pictureBytes[] = pd.getData();
                int pictureBytesOffset = 0;
                int pictureBytesLen = pictureBytes.length;
                String filename = picture.getFileName();
                // try to find an alternative name
                if (filename == null || "".equals(filename)) {
                    filename = getAlternativeName(picture);
                }
                // default to dummy name
                if (filename == null || "".equals(filename)) {
                    filename = "picture"+(picIdx++);
                }
                filename = filename.trim();


                // check for emf+ embedded pdf (poor mans style :( )
                // Mac Excel 2011 embeds pdf files with this method.
                boolean validFile = true;
                if (pd.getFormat() == Workbook.PICTURE_TYPE_EMF) {
                    validFile = false;
                    int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes());
                    if (idxStart != -1) {
                        int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes());
                        if (idxEnd != -1) {
                            pictureBytesOffset = idxStart;
                            pictureBytesLen = idxEnd-idxStart+6;
                            validFile = true;
                        }
                    } else {
                        // This shape was not a Mac Excel 2011 embedded pdf file.
                        // So this is a shape related to a regular embedded object
                        // Lets update the object filename with the shapes filename
                        // if the object filename is of format ARGF1234.pdf
                        EmbeddedData ed_obj = embeddings.get(emfIdx);
                        Pattern pattern = Pattern.compile("^[A-Z0-9]{8}\\.[pdfPDF]{3}$");
                        Matcher matcher = pattern.matcher(ed_obj.filename);
                        if(matcher.matches()) {
                            ed_obj.filename = filename;
                        }
                        emfIdx += 1;
                    }
                }

                EmbeddedData ed = new EmbeddedData();
                ed.filename = fileNameWithoutPath(filename);
                ed.is = new ByteArrayInputStream(pictureBytes, pictureBytesOffset, pictureBytesLen);
                if(fileNotInEmbeddings(ed.filename) && validFile) {
                    embeddings.add(ed);
                }
            }
        }

        private static EscherOptRecord reflectEscherOptRecord(HSSFShape shape) {
            try {
                Method m = HSSFShape.class.getDeclaredMethod("getOptRecord");
                m.setAccessible(true);
                return (EscherOptRecord)m.invoke(shape);
            } catch (Exception e) {
                // todo: log ... well actually "should not happen" ;)
                return null;
            }
        }

        private String fileNameWithoutPath(String filename) {
            int last_index = filename.lastIndexOf("\\");
            return filename.substring(last_index + 1);
        }

        private boolean fileNotInEmbeddings(String filename) {
            boolean exists = true;
            for(EmbeddedData ed : embeddings) {
                if(ed.filename.equals(filename)) {
                    exists = false;
                }
            }
            return exists;
        }

        public void close() throws IOException {
            Iterator<EmbeddedData> ed = embeddings.iterator();
            while (ed.hasNext()) {
                ed.next().is.close();
            }
            wb.close();
        }
    }

    static class EmbeddedData {
        String filename;
        InputStream is;
        String source;
    }

    static abstract class EmbeddedExtractor {
        abstract boolean canExtract(DirectoryNode dn);
        abstract EmbeddedData extract(DirectoryNode dn) throws IOException;
        protected EmbeddedData extractFS(DirectoryNode dn, String filename) throws IOException {
            assert(canExtract(dn));
            POIFSFileSystem dest = new POIFSFileSystem();
            copyNodes(dn, dest.getRoot());
            EmbeddedData ed = new EmbeddedData();
            ed.filename = filename;
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            dest.writeFilesystem(bos);
            dest.close();
            ed.is = new ByteArrayInputStream(bos.toByteArray());
            return ed;
        }
    }

    static class Ole10Extractor extends EmbeddedExtractor {
        public boolean canExtract(DirectoryNode dn) {
            ClassID clsId = dn.getStorageClsid();
            return ClassID.OLE10_PACKAGE.equals(clsId);
        }
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
            try {
                Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
                EmbeddedData ed = new EmbeddedData();
                ed.filename = new File(ole10.getFileName()).getName();
                ed.is = new ByteArrayInputStream(ole10.getDataBuffer());
                return ed;
            } catch (Ole10NativeException e) {
                throw new IOException(e);
            }
        }
    }

    static class PdfExtractor extends EmbeddedExtractor {
        static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
        public boolean canExtract(DirectoryNode dn) {
            ClassID clsId = dn.getStorageClsid();
            return (PdfClassID.equals(clsId)
            || dn.hasEntry("CONTENTS"));
        }
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
            EmbeddedData ed = new EmbeddedData();
            ed.is = dn.createDocumentInputStream("CONTENTS");
            ed.filename = dn.getName()+".pdf";
            return ed;
        }
    }

    static class WordExtractor extends EmbeddedExtractor {
        public boolean canExtract(DirectoryNode dn) {
            ClassID clsId = dn.getStorageClsid();
            return (ClassID.WORD95.equals(clsId)
            || ClassID.WORD97.equals(clsId)
            || dn.hasEntry("WordDocument"));
        }
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
            return extractFS(dn, dn.getName()+".doc");
        }
    }

    static class ExcelExtractor extends EmbeddedExtractor {
        public boolean canExtract(DirectoryNode dn) {
            ClassID clsId = dn.getStorageClsid();
            return (ClassID.EXCEL95.equals(clsId)
                    || ClassID.EXCEL97.equals(clsId)
                    || dn.hasEntry("Workbook") /*...*/);
        }
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
            return extractFS(dn, dn.getName()+".xls");
        }
    }

    static class FsExtractor extends EmbeddedExtractor {
        public boolean canExtract(DirectoryNode dn) {
            return true;
        }
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
            return extractFS(dn, dn.getName()+".dat");
        }
    }

    private static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
        for (Entry e : src) {
            if (e instanceof DirectoryNode) {
                DirectoryNode srcDir = (DirectoryNode)e;
                DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
                destDir.setStorageClsid(srcDir.getStorageClsid());
                copyNodes(srcDir, destDir);
            } else {
                InputStream is = src.createDocumentInputStream(e);
                dest.createDocument(e.getName(), is);
                is.close();
            }
        }
    }


    /**
     * Knuth-Morris-Pratt Algorithm for Pattern Matching
     * Finds the first occurrence of the pattern in the text.
     */
    private static int indexOf(byte[] data, int offset, byte[] pattern) {
        int[] failure = computeFailure(pattern);

        int j = 0;
        if (data.length == 0) return -1;

        for (int i = offset; i < data.length; i++) {
            while (j > 0 && pattern[j] != data[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == data[i]) { j++; }
            if (j == pattern.length) {
                return i - pattern.length + 1;
            }
        }
        return -1;
    }

    /**
     * Computes the failure function using a boot-strapping process,
     * where the pattern is matched against itself.
     */
    private static int[] computeFailure(byte[] pattern) {
        int[] failure = new int[pattern.length];

        int j = 0;
        for (int i = 1; i < pattern.length; i++) {
            while (j > 0 && pattern[j] != pattern[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == pattern[i]) {
                j++;
            }
            failure[i] = j;
        }

        return failure;
    }
}
package测试;
导入java.awt.Color;
导入java.awt.image.buffereImage;
导入java.io.ByteArrayInputStream;
导入java.io.ByteArrayOutputStream;
导入java.io.Closeable;
导入java.io.File;
导入java.io.FileOutputStream;
导入java.io.IOException;
导入java.io.InputStream;
导入java.lang.reflect.Method;
导入java.net.URL;
导入java.nio.charset.charset;
导入java.util.ArrayList;
导入java.util.Iterator;
导入java.util.List;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入javax.imageio.imageio;
导入org.apache.poi.ddf.EscherComplexProperty;
导入org.apache.poi.ddf.EscherOptRecord;
导入org.apache.poi.ddf.EscherProperty;
导入org.apache.poi.hpsf.ClassID;
导入org.apache.poi.hslf.usermodel.HSLFSlideShow;
导入org.apache.poi.hssf.usermodel.HSSFClientAnchor;
导入org.apache.poi.hssf.usermodel.HSSFObjectData;
导入org.apache.poi.hssf.usermodel.HSSFPatriarch;
导入org.apache.poi.hssf.usermodel.HSSFPicture;
导入org.apache.poi.hssf.usermodel.HSSFPictureData;
导入org.apache.poi.hssf.usermodel.HSSFShape;
导入org.apache.poi.hssf.usermodel.HSSFSheet;
导入org.apache.poi.hssf.usermodel.HSSFSimpleShape;
导入org.apache.poi.hssf.usermodel.HSSFWorkbook;
导入org.apache.poi.openxml4j.opc.PackagePart;
导入org.apache.poi.poifs.filesystem.DirectoryNode;
导入org.apache.poi.poifs.filesystem.Entry;
导入org.apache.poi.poifs.filesystem.Ole10Native;
导入org.apache.poi.poifs.filesystem.Ole10NativeException;
导入org.apache.poi.poifs.filesystem.poifsfsystem;
导入org.apache.poi.sl.usermodel.AutoShape;
导入org.apache.poi.sl.usermodel.ShapeType;
导入org.apache.poi.sl.usermodel.Slide;
导入org.apache.poi.ss.usermodel.Sheet;
导入org.apache.poi.ss.usermodel.工作簿;
导入org.apache.poi.ss.usermodel.WorkbookFactory;
导入org.apache.poi.util.IOUtils;
导入org.apache.poi.xssf.usermodel.XSSFDrawing;
导入org.apache.poi.xssf.usermodel.XSSFPicture;
导入org.apache.poi.xssf.usermodel.XSSFPictureData;
导入org.apache.poi.xssf.usermodel.XSSFShape;
导入org.apache.poi.xssf.usermodel.xssfheet;
导入org.apache.poi.xssf.usermodel.xssf工作簿;
导入org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTPicture;
/**
*用POI 3.16-beta1测试
* 
*2014年12月17日:原始版本
*    http://apache-poi.1045710.n5.nabble.com/How-to-get-the-full-file-name-of-a-picture-in-xls-file-td5717205.html
* 
*2016年12月17日:增加了样品/虚拟数据
*    https://stackoverflow.com/questions/41101012/how-to-export-embeded-file-which-from-excel-using-poi 
*/
公共类嵌入阅读器{
专用文件excel_文件;
私人图像阅读器;
公共静态void main(字符串[]args)引发异常{
文件样本=新文件(“bla.xls”);
getSampleEmbedded(样本);
ImageReader ir=新的ImageReader(样本);
用于(嵌入数据:ir.嵌入){
System.out.println(ed.filename);
FileOutputStream fos=新的FileOutputStream(ed.filename);
IOUtils.副本(教育信息系统,fos);
fos.close();
}
ir.close();
}
静态void getSampleEmbedded(文件示例)引发IOException{
HSSFWorkbook wb=新的HSSFWorkbook();
int-storageId=wb.addOlePackage(getSamplePPT(),“dummy.ppt”,“dummy.ppt”,“dummy.ppt”);
int picId=wb.addPicture(getSamplePng(),HSSFPicture.PICTURE\u TYPE\u PNG);
HSSFSheet sheet=wb.createSheet();
HSSFPatriarch pat=sheet.createDrawingParhical();
HSSFClientAnchor anc=pat.createAnchor(0,0,0,0,1,1,3,6);
HSSFObjectData od=pat.createObjectData(anc、storageId、picId);
od.setNoFill(真);
wb.书写(样本);
wb.close();
}
静态字节[]getSamplePng()引发IOException{
ClassLoader cl=Thread.currentThread().getContextClassLoader();
URL imgUrl=cl.getResource(“javax/swing/plaf/metal/icons/ocean/directory.gif”);
BuffereImage img=ImageIO.read(imgUrl);
ByteArrayOutputStream bos=新建ByteArrayOutputStream();
ImageIO.write(img,“PNG”,bos);
返回bos.toByteArray();
}
静态字节[]getSamplePPT()引发IOException{
HSLFSlideShow ppt=新的HSLFSlideShow();
幻灯片=ppt.createSlide();
AutoShape sh1=slide.createAutoShape();
sh1.setShapeType(ShapeType.STAR_32);
setAnchor(新java.awt.Rectangle(50,50100200));
sh1.setFillColor(颜色:红色);
ByteArrayOutputStream bos=新建ByteArrayOutputStream();
ppt.write(bos);
ppt.close();
POIFSFileSystem poifs=new POIFSFileSystem(new ByteArrayInputStream(bos.toByteArray());
poifs.getRoot().setStorageClsid(ClassID.P
 import java.awt.Color;
 import java.awt.image.BufferedImage;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.lang.reflect.Method;
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import javax.imageio.ImageIO;
 import org.apache.poi.ddf.EscherComplexProperty;
 import org.apache.poi.ddf.EscherOptRecord;
 import org.apache.poi.ddf.EscherProperty;
 import org.apache.poi.hpsf.ClassID;
 import org.apache.poi.hslf.HSLFSlideShow;
 import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
 import org.apache.poi.hssf.usermodel.HSSFObjectData;
 import org.apache.poi.hssf.usermodel.HSSFPatriarch;
 import org.apache.poi.hssf.usermodel.HSSFPicture;
 import org.apache.poi.hssf.usermodel.HSSFPictureData;
 import org.apache.poi.hssf.usermodel.HSSFShape;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
 import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.Ole10Native;
 import org.apache.poi.poifs.filesystem.Ole10NativeException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.sl.usermodel.AutoShape;
 import org.apache.poi.sl.usermodel.Slide;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.usermodel.WorkbookFactory;
 import org.apache.poi.util.IOUtils;
 import org.apache.poi.xssf.usermodel.XSSFDrawing;
 import org.apache.poi.xssf.usermodel.XSSFPicture;
 import org.apache.poi.xssf.usermodel.XSSFPictureData;
 import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.poi.xssf.usermodel.XSSFSheet;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTPicture;


 public class EmbeddedReader {

public static final OleType OLE10_PACKAGE = new OleType("{0003000C-0000-0000-C000-000000000046}");
public static final OleType PPT_SHOW = new OleType("{64818D10-4F9B-11CF-86EA-00AA00B929E8}");
public static final OleType XLS_WORKBOOK = new OleType("{00020841-0000-0000-C000-000000000046}");
public static final OleType TXT_ONLY = new OleType("{5e941d80-bf96-11cd-b579-08002b30bfeb}");
public static final OleType EXCEL97 = new OleType("{00020820-0000-0000-C000-000000000046}");
public static final OleType EXCEL95 = new OleType("{00020810-0000-0000-C000-000000000046}");
public static final OleType WORD97 = new OleType("{00020906-0000-0000-C000-000000000046}");
public static final OleType WORD95 = new OleType("{00020900-0000-0000-C000-000000000046}");
public static final OleType POWERPOINT97 = new OleType("{64818D10-4F9B-11CF-86EA-00AA00B929E8}");
public static final OleType POWERPOINT95 = new OleType("{EA7BAE70-FB3B-11CD-A903-00AA00510EA3}");
public static final OleType EQUATION30 = new OleType("{0002CE02-0000-0000-C000-000000000046}");
public static final OleType PdfClassID = new OleType("{B801CA65-A1FC-11D0-85AD-444553540000}");

private File excel_file;
private ImageReader image_reader;

static class OleType {
    final String classId;

    OleType(String classId) {
        this.classId = classId;
    }

    ClassID getClassID() {
        ClassID cls = new ClassID();
        byte clsBytes[] = cls.getBytes();
        String clsStr = classId.replaceAll("[{}-]", "");
        for (int i = 0; i < clsStr.length(); i += 2) {
            clsBytes[i / 2] = (byte) Integer.parseInt(
                    clsStr.substring(i, i + 2), 16);
        }
        return cls;
    }
}

public static void main(String[] args) throws Exception {
    File sample = new File("D:\\ole_ppt_in_xls.xls");
    ImageReader ir = new ImageReader(sample);
    for (EmbeddedData ed : ir.embeddings) {
    FileOutputStream fos = new FileOutputStream(System.getProperty("user.home") + "/Desktop" + "/sumit/"+ ed.filename);
        IOUtils.copy(ed.is, fos);
        fos.close();
    }
    ir.close();
}

static byte[] getSamplePng() throws IOException {
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    URL imgUrl = cl.getResource("javax/swing/plaf/metal/icons/ocean/directory.gif");
    BufferedImage img = ImageIO.read(imgUrl);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ImageIO.write(img, "PNG", bos);
    return bos.toByteArray();
}


public EmbeddedReader(String excel_path) throws IOException {
    excel_file = new File(excel_path);
    image_reader = new ImageReader(excel_file);
}

public String[] get_file_names() {
    ArrayList<String> file_names = new ArrayList<String>();
    for (EmbeddedData ed : image_reader.embeddings) {
        file_names.add(ed.filename);
    }
    return file_names.toArray(new String[file_names.size()]);
}

public InputStream get_stream(String file_name) {
    InputStream input_stream = null;
    for (EmbeddedData ed : image_reader.embeddings) {
        if (file_name.equals(ed.filename)) {
            input_stream = ed.is;
            break;
        }
    }
    return input_stream;
}

static class ImageReader implements Closeable {
    EmbeddedExtractor extractors[] = { new Ole10Extractor(),new PdfExtractor(), new WordExtractor(), new ExcelExtractor(),new FsExtractor() };

    List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
    Workbook wb;

    public ImageReader(File excelfile) throws IOException {
        try {
            wb = WorkbookFactory.create(excelfile);
            Sheet receiptImages = wb.getSheet("Receipt images");
            if (wb instanceof XSSFWorkbook) {
                addSheetPicsAndEmbedds((XSSFSheet) receiptImages);
            } else {
                addAllEmbedds((HSSFWorkbook) wb);
                addSheetPics((HSSFSheet) receiptImages);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected void addSheetPicsAndEmbedds(XSSFSheet sheet)throws IOException {
        if (sheet == null)
            return;
        XSSFDrawing draw = sheet.createDrawingPatriarch();
        for (XSSFShape shape : draw.getShapes()) {
            if (!(shape instanceof XSSFPicture))
                continue;
            XSSFPicture picture = (XSSFPicture) shape;
            XSSFPictureData pd = picture.getPictureData();
            PackagePart pp = pd.getPackagePart();
            CTPicture ctPic = picture.getCTPicture();
            String filename = null;
            try {
                filename = ctPic.getNvPicPr().getCNvPr().getName();
            } catch (Exception e) {
            }
            if (filename == null || "".equals(filename)) {
                filename = new File(pp.getPartName().toString()).getName();
            }
            EmbeddedData ed = new EmbeddedData();
            ed.filename = fileNameWithoutPath(filename);
            ed.is = pp.getInputStream();
            embeddings.add(ed);
        }
    }

    protected void addAllEmbedds(HSSFWorkbook hwb) throws IOException {
        for (HSSFObjectData od : hwb.getAllEmbeddedObjects()) {
            String alternativeName = getAlternativeName(od);
            if (od.hasDirectoryEntry()) {
                DirectoryNode src = (DirectoryNode) od.getDirectory();
                for (EmbeddedExtractor ee : extractors) {
                    if (ee.canExtract(src)) {
                        EmbeddedData ed = ee.extract(src);
                        if (ed.filename == null || ed.filename.startsWith("MBD")|| alternativeName != null) {
                            if (alternativeName != null) {
                                ed.filename = alternativeName;
                            }
                        }
                        ed.filename = fileNameWithoutPath(ed.filename);
                        ed.source = "object";
                        embeddings.add(ed);
                        break;
                    }
                }
            }
        }
    }

    protected String getAlternativeName(HSSFShape shape) {
    EscherOptRecord eor = reflectEscherOptRecord(shape);
        if (eor == null) {
            return null;
        }
        for (EscherProperty ep : eor.getEscherProperties()) {
            if ("groupshape.shapename".equals(ep.getName())
                    && ep.isComplex()) {
                return new String(
                        ((EscherComplexProperty) ep).getComplexData(),
                        Charset.forName("UTF-16LE"));
            }
        }
        return null;
    }

    protected void addSheetPics(HSSFSheet sheet) {
        if (sheet == null)
            return;
        int picIdx = 0;
        int emfIdx = 0;
        HSSFPatriarch patriarch = sheet.getDrawingPatriarch();
        if (patriarch == null)
            return;
        // Loop through the objects
        for (HSSFShape shape : patriarch.getChildren()) {
            if (!(shape instanceof HSSFPicture)) {
                continue;
            }
            HSSFPicture picture = (HSSFPicture) shape;
            if (picture.getShapeType() != HSSFSimpleShape.OBJECT_TYPE_PICTURE)
                continue;
            HSSFPictureData pd = picture.getPictureData();
            byte pictureBytes[] = pd.getData();
            int pictureBytesOffset = 0;
            int pictureBytesLen = pictureBytes.length;
            String filename = picture.getFileName();
            // try to find an alternative name
            if (filename == null || "".equals(filename)) {
                filename = getAlternativeName(picture);
            }
            // default to dummy name
            if (filename == null || "".equals(filename)) {
                filename = "picture" + (picIdx++);
            }
            filename = filename.trim();

            // check for emf+ embedded pdf (poor mans style :( )
            // Mac Excel 2011 embeds pdf files with this method.
            boolean validFile = true;
            if (pd.getFormat() == Workbook.PICTURE_TYPE_EMF) {
                validFile = false;
                int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes());
                if (idxStart != -1) {
                    int idxEnd = indexOf(pictureBytes, idxStart,"%%EOF".getBytes());
                    if (idxEnd != -1) {
                        pictureBytesOffset = idxStart;
                        pictureBytesLen = idxEnd - idxStart + 6;
                        validFile = true;
                    }
                } else {
                    // This shape was not a Mac Excel 2011 embedded pdf  file.
                    // So this is a shape related to a regular embedded object
                    // Lets update the object filename with the shapes filename
                    // if the object filename is of format ARGF1234.pdf
                    EmbeddedData ed_obj = embeddings.get(emfIdx);
                    Pattern pattern = Pattern
                            .compile("^[A-Z0-9]{8}\\.[pdfPDF]{3}$");
                    Matcher matcher = pattern.matcher(ed_obj.filename);
                    if (matcher.matches()) {
                        ed_obj.filename = filename;
                    }
                    emfIdx += 1;
                }
            }

            EmbeddedData ed = new EmbeddedData();
            ed.filename = fileNameWithoutPath(filename);
            ed.is = new ByteArrayInputStream(pictureBytes,
                    pictureBytesOffset, pictureBytesLen);
            if (fileNotInEmbeddings(ed.filename) && validFile) {
                embeddings.add(ed);
            }
        }
    }

    private static EscherOptRecord reflectEscherOptRecord(HSSFShape shape) {
        try {
            Method m = HSSFShape.class.getDeclaredMethod("getOptRecord");
            m.setAccessible(true);
            return (EscherOptRecord) m.invoke(shape);
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private String fileNameWithoutPath(String filename) {
        int last_index = filename.lastIndexOf("\\");
        return filename.substring(last_index + 1);
    }

    private boolean fileNotInEmbeddings(String filename) {
        boolean exists = true;
        for (EmbeddedData ed : embeddings) {
            if (ed.filename.equals(filename)) {
                exists = false;
            }
        }
        return exists;
    }

    public void close() throws IOException {
        Iterator<EmbeddedData> ed = embeddings.iterator();
        while (ed.hasNext()) {
            ed.next().is.close();
        }
        wb.close();
    }
}

static class EmbeddedData {
    String filename;
    InputStream is;
    String source;
}

static abstract class EmbeddedExtractor {
    abstract boolean canExtract(DirectoryNode dn);
    abstract EmbeddedData extract(DirectoryNode dn) throws IOException;
    protected EmbeddedData extractFS(DirectoryNode dn, String filename)
            throws IOException {
        assert (canExtract(dn));
        POIFSFileSystem dest = new POIFSFileSystem();
        copyNodes(dn, dest.getRoot());
        EmbeddedData ed = new EmbeddedData();
        ed.filename = filename;
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        dest.writeFilesystem(bos);
         bos.close();
        ed.is = new ByteArrayInputStream(bos.toByteArray());
        return ed;
    }
}

static class Ole10Extractor extends EmbeddedExtractor {
    public boolean canExtract(DirectoryNode dn) {
        ClassID clsId = dn.getStorageClsid();
        return OLE10_PACKAGE.equals(clsId);
    }

    public EmbeddedData extract(DirectoryNode dn) throws IOException {
        try {
            Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
            EmbeddedData ed = new EmbeddedData();
            ed.filename = new File(ole10.getFileName()).getName();
            ed.is = new ByteArrayInputStream(ole10.getDataBuffer());
            return ed;
        } catch (Ole10NativeException e) {
            e.printStackTrace();
            throw new IOException(e);
        }
    }
}

static class PdfExtractor extends EmbeddedExtractor {
    public boolean canExtract(DirectoryNode dn) {
        ClassID clsId = dn.getStorageClsid();
        return (PdfClassID.equals(clsId) || dn.hasEntry("CONTENTS"));
    }

    public EmbeddedData extract(DirectoryNode dn) throws IOException {
        EmbeddedData ed = new EmbeddedData();
        ed.is = dn.createDocumentInputStream("CONTENTS");
        ed.filename = dn.getName() + ".pdf";
        return ed;
    }
}

static class WordExtractor extends EmbeddedExtractor {
    public boolean canExtract(DirectoryNode dn) {
        ClassID clsId = dn.getStorageClsid();
        return (WORD95.equals(clsId) || WORD97.equals(clsId) || dn.hasEntry("WordDocument"));
    }

    public EmbeddedData extract(DirectoryNode dn) throws IOException {
        return extractFS(dn, dn.getName() + ".doc");
    }
}

static class ExcelExtractor extends EmbeddedExtractor {
    public boolean canExtract(DirectoryNode dn) {
        ClassID clsId = dn.getStorageClsid();
        return (EXCEL95.equals(clsId) || EXCEL97.equals(clsId) || dn
                .hasEntry("Workbook") /* ... */);
    }

    public EmbeddedData extract(DirectoryNode dn) throws IOException {
        return extractFS(dn, dn.getName() + ".xls");
    }
}

static class FsExtractor extends EmbeddedExtractor {

    public boolean canExtract(DirectoryNode dn) {
        return true;
    }

    public EmbeddedData extract(DirectoryNode dn) throws IOException {
        return extractFS(dn, dn.getName() + ".dat");
    }
}

private static void copyNodes(DirectoryNode src, DirectoryNode dest)
        throws IOException {
    for (Entry e : src) {
        if (e instanceof DirectoryNode) {
            DirectoryNode srcDir = (DirectoryNode) e;
            DirectoryNode destDir = (DirectoryNode) dest
                    .createDirectory(srcDir.getName());
            destDir.setStorageClsid(srcDir.getStorageClsid());
            copyNodes(srcDir, destDir);
        } else {
            InputStream is = src.createDocumentInputStream(e);
            dest.createDocument(e.getName(), is);
            is.close();
        }
    }
}

/**
 * Knuth-Morris-Pratt Algorithm for Pattern Matching Finds the first
 * occurrence of the pattern in the text.
 */
private static int indexOf(byte[] data, int offset, byte[] pattern) {
    int[] failure = computeFailure(pattern);

    int j = 0;
    if (data.length == 0)
        return -1;

    for (int i = offset; i < data.length; i++) {
        while (j > 0 && pattern[j] != data[i]) {
            j = failure[j - 1];
        }
        if (pattern[j] == data[i]) {
            j++;
        }
        if (j == pattern.length) {
            return i - pattern.length + 1;
        }
    }
    return -1;
}

/**
 * Computes the failure function using a boot-strapping process, where the
 * pattern is matched against itself.
 */
private static int[] computeFailure(byte[] pattern) {
    int[] failure = new int[pattern.length];

    int j = 0;
    for (int i = 1; i < pattern.length; i++) {
        while (j > 0 && pattern[j] != pattern[i]) {
            j = failure[j - 1];
        }
        if (pattern[j] == pattern[i]) {
            j++;
        }
        failure[i] = j;
    }

    return failure;
}
import java.io.FileInputStream;
import java.io.InputStream;

import org.apache.poi.ss.extractor.EmbeddedData;
import org.apache.poi.ss.extractor.EmbeddedExtractor;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

public class BlaExtract {
    public static void main(String[] args) throws Exception {
        InputStream fis = new FileInputStream("bla.xlsx");
        Workbook wb = WorkbookFactory.create(fis);
        fis.close();

        EmbeddedExtractor ee = new EmbeddedExtractor();
        for (Sheet s : wb) {
            for (EmbeddedData ed : ee.extractAll(s)) {
                System.out.println(ed.getFilename()+" ("+ed.getContentType()+") - "+ed.getEmbeddedData().length+" bytes");
            }
        }
        wb.close();
    }
}
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;

/**
 * Demonstrates how you can extract embedded data from a .xlsx file
 */
public class GetEmbedded {

    public static void main(String[] args) throws Exception {
        String path = "SomeExcelFile.xlsx"
        XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(new File(path)));

             for (PackagePart pPart : workbook.getAllEmbedds()) {
                            String contentType = pPart.getContentType();

                            if (contentType.equals("application/vnd.ms-excel")) { //This is to read xls workbook embedded to xlsx file
                                HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
                                int countOfSheetXls=embeddedWorkbook.getNumberOfSheets();

                 }
                            else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) { //This is to read xlsx workbook embedded to xlsx file
                                 if(pPart.getPartName().getName().equals("/xl/embeddings/Microsoft_Excel_Worksheet12.xlsx")){
                                 //"/xl/embeddings/Microsoft_Excel_Worksheet12.xlsx" - Can read an Excel from a particular sheet 
                                // This is the worksheet from the Parent Excel-sheet-12

                                     XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
                                     int countOfSheetXlsx=embeddedWorkbook.getNumberOfSheets();
                                     ArrayList<String> sheetNames= new ArrayList<String>();
                                        for(int i=0;i<countOfSheetXlsx;i++){
                                        String name=workbook.getSheetName(i);
                                        sheetNames.add(name);
                                        }
                                }
                            }
                }
     }
}