Java 将XHTML和自定义标记读入DOM树
我正在使用flying Discer将XHTML转换为PDF,它工作得很好,但现在我想添加书签,根据fs文档,应该这样做:Java 将XHTML和自定义标记读入DOM树,java,xml,dom,flying-saucer,Java,Xml,Dom,Flying Saucer,我正在使用flying Discer将XHTML转换为PDF,它工作得很好,但现在我想添加书签,根据fs文档,应该这样做: <bookmarks> <bookmark name='1. Foo bar baz' href='#1'> <bookmark name='1.1 Baz quux' href='#1.2'> </bookmark> </bookmark> <bookmark
<bookmarks>
<bookmark name='1. Foo bar baz' href='#1'>
<bookmark name='1.1 Baz quux' href='#1.2'>
</bookmark>
</bookmark>
<bookmark name='2. Foo bar baz' href='#2'>
<bookmark name='2.1 Baz quux' href='#2.2'>
</bookmark>
</bookmark>
</bookmarks>
编辑
以下是LocalEntityResolver:
class LocalEntityResolver implements EntityResolver {
private static final Logger LOG = ESAPI.getLogger(LocalEntityResolver.class);
private static final Map<String, String> DTDS;
static {
DTDS = new HashMap<String, String>();
DTDS.put("-//W3C//DTD XHTML 1.0 Strict//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
DTDS.put("-//W3C//DTD XHTML 1.0 Transitional//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
DTDS.put("-//W3C//ENTITIES Latin 1 for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent");
DTDS.put("-//W3C//ENTITIES Symbols for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent");
DTDS.put("-//W3C//ENTITIES Special for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent");
}
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
InputSource input_source = null;
if (publicId != null && DTDS.containsKey(publicId)) {
LOG.debug(Logger.EVENT_SUCCESS, "Looking for local copy of [" + publicId + "]");
final String dtd_system_id = DTDS.get(publicId);
final String file_name = dtd_system_id.substring(
dtd_system_id.lastIndexOf('/') + 1, dtd_system_id.length());
InputStream input_stream = FileUtil.readStreamFromClasspath(
file_name, "my/class/path",
getClass().getClassLoader());
if (input_stream != null) {
LOG.debug(Logger.EVENT_SUCCESS, "Found local file [" + file_name + "]!");
input_source = new InputSource(input_stream);
}
}
return input_source;
}
}
类LocalEntityResolver实现EntityResolver{
私有静态最终记录器LOG=ESAPI.getLogger(LocalEntityResolver.class);
专用静态最终地图DTD;
静止的{
DTDS=新的HashMap();
put(“-//W3C//DTDxHTML1.0 Strict//EN”,
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
put(“-//W3C//DTDxHTML1.0//EN”,
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
put(“-//W3C//1表示XHTML//EN”,
"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent");
put(“-//W3C//用于XHTML//EN的实体符号”,
"http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent");
put(“-//W3C//ENTITIES专用于XHTML//EN”,
"http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent");
}
@凌驾
public InputSource resolveEntity(字符串publicId、字符串systemId)
抛出SAXException,IOException{
InputSource input_source=null;
if(publicId!=null&&DTDS.containsKey(publicId)){
LOG.debug(Logger.EVENT_SUCCESS,“查找[“+publicId+”]”的本地副本);
最终字符串dtd_system_id=DTDS.get(publicId);
最终字符串文件\u name=dtd\u system\u id.substring(
dtd_system_id.lastIndexOf('/')+1,dtd_system_id.length();
InputStream input\u stream=FileUtil.readStreamFromClasspath(
文件名,“我的/class/path”,
getClass().getClassLoader());
if(输入_流!=null){
LOG.debug(Logger.EVENT_SUCCESS,“找到本地文件[“+文件名+”!”);
输入源=新的输入源(输入流);
}
}
返回输入源;
}
}
我的文档生成器工厂实现是:
com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl啊,我终于找到了问题。很抱歉让你们调试代码,问题是在DOM解析发生之前,我的代码中有一个对JTidy.parse的调用,这导致要解析的内容为空,我甚至没有捕捉到,实际错误是,
来自SAX的文件过早结束
多亏了Matt Gibson,当我在编写一个简短的输入文档时,我发现了这个bug
我的代码现在包括一个检查,看看内容是否为空
/**
* parses String content into a valid XML document.
* @param content the content to be parsed.
* @return the parsed document or <tt>null</tt>
*/
private static Document parse(final String content) {
Document document = null;
try {
if (StringUtil.isNull(content)) {
throw new IllegalArgumentException("cannot parse null "
+ "content into a DOM object!");
}
InputStream is = new ByteArrayInputStream(content
.getBytes(CONTEXT.getEncoding()));
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setEntityResolver(new LocalEntityResolver());
document = builder.parse(is);
} catch (Exception ex) {
LOG.error(Logger.EVENT_FAILURE, "parsing failed "
+ "for content[" + content + "]", ex);
}
return document;
}
/**
*将字符串内容解析为有效的XML文档。
*@param content要分析的内容。
*@返回已解析的文档或null
*/
私有静态文档解析(最终字符串内容){
单据=空;
试一试{
if(StringUtil.isNull(内容)){
抛出新的IllegalArgumentException(“无法解析null”
+“将内容转换为DOM对象!”);
}
InputStream is=新的ByteArrayInputStream(内容
.getBytes(CONTEXT.getEncoding());
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
DocumentBuilder=dbf.newDocumentBuilder();
setEntityResolver(新的LocalEntityResolver());
document=builder.parse(is);
}捕获(例外情况除外){
LOG.error(Logger.EVENT_失败,“解析失败”
+“对于内容[“+内容+”],例如);
}
归还文件;
}
我认为您需要提供更多详细信息。我或其他人如何重现这个问题?基本上,我希望使用W3C过渡DTD将带有某些未知元素的有效XHTML解析到DOM树中。如果要复制任何有效的XHTML,请添加书签html并尝试解析到dom树中那是什么LocalEntityResolver
?它来自哪里?我在Xerces源代码中找不到任何适合{element}的消息,因为无法识别
@四十二:请查看我的编辑扫描您给我们一个显示问题的完整小示例输入文档?还有另一个参考原因;-)我确实试图重现您的问题,但遇到了困难(例如FileUtil来自哪个库…)不用担心。很高兴你找到了!
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setEntityResolver(new LocalEntityResolver());
document = builder.parse(is);
class LocalEntityResolver implements EntityResolver {
private static final Logger LOG = ESAPI.getLogger(LocalEntityResolver.class);
private static final Map<String, String> DTDS;
static {
DTDS = new HashMap<String, String>();
DTDS.put("-//W3C//DTD XHTML 1.0 Strict//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
DTDS.put("-//W3C//DTD XHTML 1.0 Transitional//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
DTDS.put("-//W3C//ENTITIES Latin 1 for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent");
DTDS.put("-//W3C//ENTITIES Symbols for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent");
DTDS.put("-//W3C//ENTITIES Special for XHTML//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent");
}
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
InputSource input_source = null;
if (publicId != null && DTDS.containsKey(publicId)) {
LOG.debug(Logger.EVENT_SUCCESS, "Looking for local copy of [" + publicId + "]");
final String dtd_system_id = DTDS.get(publicId);
final String file_name = dtd_system_id.substring(
dtd_system_id.lastIndexOf('/') + 1, dtd_system_id.length());
InputStream input_stream = FileUtil.readStreamFromClasspath(
file_name, "my/class/path",
getClass().getClassLoader());
if (input_stream != null) {
LOG.debug(Logger.EVENT_SUCCESS, "Found local file [" + file_name + "]!");
input_source = new InputSource(input_stream);
}
}
return input_source;
}
}
/**
* parses String content into a valid XML document.
* @param content the content to be parsed.
* @return the parsed document or <tt>null</tt>
*/
private static Document parse(final String content) {
Document document = null;
try {
if (StringUtil.isNull(content)) {
throw new IllegalArgumentException("cannot parse null "
+ "content into a DOM object!");
}
InputStream is = new ByteArrayInputStream(content
.getBytes(CONTEXT.getEncoding()));
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = dbf.newDocumentBuilder();
builder.setEntityResolver(new LocalEntityResolver());
document = builder.parse(is);
} catch (Exception ex) {
LOG.error(Logger.EVENT_FAILURE, "parsing failed "
+ "for content[" + content + "]", ex);
}
return document;
}