Java 将DOCX转换为包含图像的HTML
我正在使用DOCX4J将DOCX转换为HTML。我已成功完成转换并获得HTML格式。我将使用HTML格式将其嵌入电子邮件正文以发送电子邮件。但我有一些问题,如下所示Java 将DOCX转换为包含图像的HTML,java,html,docx,docx4j,Java,Html,Docx,Docx4j,我正在使用DOCX4J将DOCX转换为HTML。我已成功完成转换并获得HTML格式。我将使用HTML格式将其嵌入电子邮件正文以发送电子邮件。但我有一些问题,如下所示 无法在电子邮件正文中显示图像 失去空间和子弹 请找到我写的代码 WordprocessingMLPackage wordMLPackage; wordMLPackage = Docx4J.load(new java.io.File(resourcePath2)); HTMLSettings htmlSettings = Docx4J
WordprocessingMLPackage wordMLPackage;
wordMLPackage = Docx4J.load(new java.io.File(resourcePath2));
HTMLSettings htmlSettings = Docx4J.createHTMLSettings();
htmlSettings.setImageDirPath(imageFolder + resourcePath2 + "_files");
htmlSettings.setImageTargetUri(imageFolder +resourcePath2.substring(resourcePath2.lastIndexOf("/")+1) + "_files");
htmlSettings.setWmlPackage(wordMLPackage);
OutputStream os;
os = new ByteArrayOutputStream();
Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);
Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_SAVE_FLAT_XML);
DOCX = ((ByteArrayOutputStream)os).toString();
对于要在电子邮件正文中工作的图像,我想您需要使用数据URI或将其发布到web可访问的位置 无论哪种情况,您都需要编写以下内容的实现:
public interface ConversionImageHandler {
/**
* @param picture
* @param relationship of the image
* @param part of the image, if it is an internal image, otherwise null
* @return uri for the image we've saved, or null
* @throws Docx4JException this exception will be logged, but not propagated
*/
public String handleImage(AbstractWordXmlPicture picture, Relationship relationship, BinaryPart part) throws Docx4JException;
}
并将docx4j配置为与htmlSettings.setImageHandler一起使用
您可以查看docx4j源代码中的一些现有实现,并利用AbstractConversionImageHandler中的helper方法(如createEncodedImage,如果您需要数据URI)。对于要在电子邮件正文中工作的图像,我想您需要使用数据URI或将其发布到web可访问的位置 无论哪种情况,您都需要编写以下内容的实现:
public interface ConversionImageHandler {
/**
* @param picture
* @param relationship of the image
* @param part of the image, if it is an internal image, otherwise null
* @return uri for the image we've saved, or null
* @throws Docx4JException this exception will be logged, but not propagated
*/
public String handleImage(AbstractWordXmlPicture picture, Relationship relationship, BinaryPart part) throws Docx4JException;
}
并将docx4j配置为与htmlSettings.setImageHandler一起使用
您可以查看docx4j源代码中的一些现有实现,并利用AbstractConversionImageHandler中的helper方法(如createEncodedImage,如果需要数据URI)。您可以在代码中添加类似的内容
package tcg.doc.web.managedBeans;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
@Component
@Scope("session")
@Qualifier("ConvertWord")
public class ConvertWord {
private static final String docName = "TestDocx.docx";
private static final String outputlFolderPath = "d:/";
String htmlNamePath = "docHtml.html";
String zipName="_tmp.zip";
File docFile = new File(outputlFolderPath+docName);
File zipFile = new File(zipName);
public void ConvertWordToHtml() {
try {
// 1) Load DOCX into XWPFDocument
InputStream doc = new FileInputStream(new File(outputlFolderPath+docName));
System.out.println("InputStream"+doc);
XWPFDocument document = new XWPFDocument(doc);
// 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder)
XHTMLOptions options = XHTMLOptions.create(); //.URIResolver(new FileURIResolver(new File("word/media")));;
// Extract image
String root = "target";
File imageFolder = new File( root + "/images/" + doc );
options.setExtractor( new FileImageExtractor( imageFolder ) );
// URI resolver
options.URIResolver( new FileURIResolver( imageFolder ) );
OutputStream out = new FileOutputStream(new File(htmlPath()));
XHTMLConverter.getInstance().convert(document, out, options);
System.out.println("OutputStream "+out.toString());
} catch (FileNotFoundException ex) {
} catch (IOException ex) {
}
}
public static void main(String[] args) {
ConvertWord cwoWord=new ConvertWord();
cwoWord.ConvertWordToHtml();
System.out.println();
}
public String htmlPath(){
// d:/docHtml.html
return outputlFolderPath+htmlNamePath;
}
public String zipPath(){
// d:/_tmp.zip
return outputlFolderPath+zipName;
}
}
对于pom.xml上的maven依赖项
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.4</version>
</dependency>
fr.opensagres.xdocreport
您可以在代码中添加类似的内容
package tcg.doc.web.managedBeans;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
@Component
@Scope("session")
@Qualifier("ConvertWord")
public class ConvertWord {
private static final String docName = "TestDocx.docx";
private static final String outputlFolderPath = "d:/";
String htmlNamePath = "docHtml.html";
String zipName="_tmp.zip";
File docFile = new File(outputlFolderPath+docName);
File zipFile = new File(zipName);
public void ConvertWordToHtml() {
try {
// 1) Load DOCX into XWPFDocument
InputStream doc = new FileInputStream(new File(outputlFolderPath+docName));
System.out.println("InputStream"+doc);
XWPFDocument document = new XWPFDocument(doc);
// 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder)
XHTMLOptions options = XHTMLOptions.create(); //.URIResolver(new FileURIResolver(new File("word/media")));;
// Extract image
String root = "target";
File imageFolder = new File( root + "/images/" + doc );
options.setExtractor( new FileImageExtractor( imageFolder ) );
// URI resolver
options.URIResolver( new FileURIResolver( imageFolder ) );
OutputStream out = new FileOutputStream(new File(htmlPath()));
XHTMLConverter.getInstance().convert(document, out, options);
System.out.println("OutputStream "+out.toString());
} catch (FileNotFoundException ex) {
} catch (IOException ex) {
}
}
public static void main(String[] args) {
ConvertWord cwoWord=new ConvertWord();
cwoWord.ConvertWordToHtml();
System.out.println();
}
public String htmlPath(){
// d:/docHtml.html
return outputlFolderPath+htmlNamePath;
}
public String zipPath(){
// d:/_tmp.zip
return outputlFolderPath+zipName;
}
}
对于pom.xml上的maven依赖项
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.4</version>
</dependency>
fr.opensagres.xdocreport
为什么不将docx附加到电子邮件中?我们的用户不希望将其作为附件,他们需要整个docx放在电子邮件正文中。空格和项目符号可能需要是单独的问题,例如OpenXML没有按您的预期处理。为什么不将docx附加到电子邮件中?我们的用户不希望将其作为附件,他们需要电子邮件正文中的整个DOCX。空格和项目符号可能需要是单独的问题,其中OpenXML的处理方式可能与您预期的不同。apache group的官方xwpf转换器在哪里?apache group的官方xwpf转换器在哪里?