Java 从JAXP SAX ContentHandler发出XML的内存效率最高的方法是什么?
我有一个类似的情况。我正在分析SAX ContentHandler中的数据,同时将其序列化为流。我怀疑链接问题中的解决方案——尽管它正是我在API方面所寻找的——不是内存有效的,因为它涉及XSLT处理器的身份转换。我希望程序的内存消耗是有限制的,而不是随着输入大小而增长 我如何能够轻松地将参数转发到ContentHandler方法,并将其转发到序列化程序,而无需执行杂技以适应(例如,StAX到SAX),或者更糟糕的是,将SAX事件内容复制到输出流 编辑:这里有一个我想要的最简单的例子Java 从JAXP SAX ContentHandler发出XML的内存效率最高的方法是什么?,java,xml-serialization,sax,jaxp,Java,Xml Serialization,Sax,Jaxp,我有一个类似的情况。我正在分析SAX ContentHandler中的数据,同时将其序列化为流。我怀疑链接问题中的解决方案——尽管它正是我在API方面所寻找的——不是内存有效的,因为它涉及XSLT处理器的身份转换。我希望程序的内存消耗是有限制的,而不是随着输入大小而增长 我如何能够轻松地将参数转发到ContentHandler方法,并将其转发到序列化程序,而无需执行杂技以适应(例如,StAX到SAX),或者更糟糕的是,将SAX事件内容复制到输出流 编辑:这里有一个我想要的最简单的例子thingI
thingIWant
应该只写入指定给它的输出流。正如我所说,前面的问题有一个TransformerHandler,它为我提供了正确的API,但它使用XSLT处理器,而不仅仅是简单的序列化
public class MyHandler implements ContentHandler {
ContentHandler thingIWant;
MyHandler(OutputStream outputStream) {
thingIWant = setup(outputStream);
}
public void startDocument() throws SAXException {
// parsing logic
thingIWant.startDocument();
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
// parsing logic
thingIWant.startElement(uri, localName, qName, atts);
}
public void characters(char[] ch, int start, int length) throws SAXException {
// parsing logic
thingIWant.characters(ch, start, length);
}
// etc...
}
编辑:包括默认的JDK版本 最有效的是实现
ContentHandler
的XMLWriter
。简而言之,您正在从IO缓冲区读取和写入IO缓冲区。下面使用的DOM4J中有一个。您可以子类化XMLWriter
或使用XMLFilter
进行分析。在本例中,我使用的是XMLFilter
。请注意,XMLFilter
也是一个ContentHandler
。这是完整的代码
import org.dom4j.io.XMLWriter;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLFilterImpl;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.PrintStream;
public class XMLPipeline {
public static void main(String[] args) throws Exception {
String inputFile = "build.xml";
PrintStream outputStream = System.out;
new XMLPipeline().pipe(inputFile, outputStream);
}
//dom4j
public void pipe(String inputFile, OutputStream outputStream) throws
SAXException, ParserConfigurationException, IOException {
XMLWriter xwriter = new XMLWriter(outputStream);
XMLReader xreader = XMLReaderFactory.createXMLReader();
XMLAnalyzer analyzer = new XMLAnalyzer(xreader);
analyzer.setContentHandler(xwriter);
analyzer.parse(inputFile);
//do what you want with analyzer
System.err.println(analyzer.elementCount);
}
//default JDK
public void pipeTrax(String inputFile, OutputStream outputStream) throws
SAXException, ParserConfigurationException,
IOException, TransformerException {
StreamResult xwriter = new StreamResult(outputStream);
XMLReader xreader = XMLReaderFactory.createXMLReader();
XMLAnalyzer analyzer = new XMLAnalyzer(xreader);
TransformerFactory stf = SAXTransformerFactory.newInstance();
SAXSource ss = new SAXSource(analyzer, new InputSource(inputFile));
stf.newTransformer().transform(ss, xwriter);
System.out.println(analyzer.elementCount);
}
//This method simply reads from a file, runs it through SAX parser and dumps it
//to dom4j writer
public void dom4jNoop(String inputFile, OutputStream outputStream) throws
IOException, SAXException {
XMLWriter xwriter = new XMLWriter(outputStream);
XMLReader xreader = XMLReaderFactory.createXMLReader();
xreader.setContentHandler(xwriter);
xreader.parse(inputFile);
}
//Simplest way to read a file and write it back to an output stream
public void traxNoop(String inputFile, OutputStream outputStream)
throws TransformerException {
TransformerFactory stf = SAXTransformerFactory.newInstance();
stf.newTransformer().transform(new StreamSource(inputFile),
new StreamResult(outputStream));
}
//this analyzer counts the number of elements in sax stream
public static class XMLAnalyzer extends XMLFilterImpl {
int elementCount = 0;
public XMLAnalyzer(XMLReader xmlReader) {
super(xmlReader);
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
super.startElement(uri, localName, qName, atts);
elementCount++;
}
}
}
第一:不要担心身份转换;它不构建数据的内存表示形式 要实现“tee”功能,您必须创建一个内容处理程序来侦听解析器生成的事件流,并将它们传递给转换器为您提供的处理程序。不幸的是,这并不像听起来那么容易:解析器希望将事件发送到,而转换器希望从中读取事件。前者是一个抽象类,后者是一个接口。JDK还提供了类,该类实现了
DefaultHandler
的所有接口,但不从中扩展。。。这就是将两个不同的项目合并为“参考实现”的结果
因此,您需要在这两者之间编写一个桥接类:
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLFilterImpl;
/**
* Uses a decorator ContentHandler to insert a "tee" into a SAX parse/serialize
* stream.
*/
public class SaxTeeExample
{
public static void main(String[] argv)
throws Exception
{
StringReader src = new StringReader("<root><child>text</child></root>");
StringWriter dst = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
XMLReader reader = new MyReader(SAXParserFactory.newInstance().newSAXParser());
xform.transform(new SAXSource(reader, new InputSource(src)),
new StreamResult(dst));
System.out.println(dst.toString());
}
private static class MyReader
extends XMLFilterImpl
{
private SAXParser _parser;
public MyReader(SAXParser parser)
{
_parser = parser;
}
@Override
public void parse(InputSource input)
throws SAXException, IOException
{
_parser.parse(input, new XMLFilterBridge(this));
}
// this is an example of a "tee" function
@Override
public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException
{
System.out.println("startElement: " + name);
super.startElement(uri, localName, name, atts);
}
}
private static class XMLFilterBridge
extends DefaultHandler
{
private XMLFilterImpl _filter;
public XMLFilterBridge(XMLFilterImpl myFilter)
{
_filter = myFilter;
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException
{
_filter.characters(ch, start, length);
}
// override all other methods of DefaultHandler
// ...
}
}
import java.io.IOException;
导入java.io.StringReader;
导入java.io.StringWriter;
导入javax.xml.parsers.SAXParser;
导入javax.xml.parsers.SAXParserFactory;
导入javax.xml.transform.Transformer;
导入javax.xml.transform.TransformerFactory;
导入javax.xml.transform.sax.SAXSource;
导入javax.xml.transform.stream.StreamResult;
导入org.xml.sax.Attributes;
导入org.xml.sax.InputSource;
导入org.xml.sax.Locator;
导入org.xml.sax.SAXException;
导入org.xml.sax.SAXParseException;
导入org.xml.sax.XMLReader;
导入org.xml.sax.helpers.DefaultHandler;
导入org.xml.sax.helpers.XMLFilterImpl;
/**
*使用decorator ContentHandler将“tee”插入SAX解析/序列化
*小溪。
*/
公共类示例
{
公共静态void main(字符串[]argv)
抛出异常
{
StringReader src=新的StringReader(“文本”);
StringWriter dst=新的StringWriter();
Transformer xform=TransformerFactory.newInstance().newTransformer();
XMLReader reader=newMyReader(SAXParserFactory.newInstance().newSAXParser());
transform.transform(新SAXSource(reader,新InputSource(src)),
新结果(dst);
System.out.println(dst.toString());
}
私有静态类MyReader
扩展XMLFilterImpl
{
专用SAXParser\u解析器;
公共MyReader(SAXParser解析器)
{
_解析器=解析器;
}
@凌驾
公共void解析(InputSource输入)
抛出SAXException,IOException
{
_parse(输入,新XMLFilterBridge(this));
}
//这是一个“tee”函数的示例
@凌驾
public void startElement(字符串uri、字符串localName、字符串名称、属性atts)引发异常
{
System.out.println(“startElement:+名称”);
super.startElement(uri、localName、name、atts);
}
}
私有静态类XMLFilterBridge
扩展DefaultHandler
{
私有XMLFilterImpl_过滤器;
公共XMLFilterBridge(XMLFilterImpl myFilter)
{
_filter=myFilter;
}
@凌驾
公共无效字符(字符[]ch,整数开始,整数长度)
抛出SAX异常
{
_filter.字符(ch、开始、长度);
}
//重写DefaultHandler的所有其他方法
// ...
}
}
main
方法设置变压器。有趣的是,SAXSource
是围绕MyReader
构建的。当转换器准备好处理事件时,它将调用该对象的parse()
方法,并将指定的InputSource
传递给它
下一部分并不明显:XMLFilterImpl
遵循Decorator模式。在开始转换之前,转换器将在此对象上调用各种setter方法,并传递自己的处理程序。我不重写的任何方法(例如,startDocument()
)都将简单地调用委托。例如,我正在startElement()
中进行“分析”(只是一个println)。您可能会重写其他ContentHandler
方法
最后,
XMLFilterBridge
是DefaultHandler
和XmlReader
之间的桥梁;它也是一个装饰器,每个方法都只调用委托。我显示了一个覆盖,但您必须全部执行。我最近遇到了类似的问题。这是我写的课程,让你得到想要的东西:
import java.io.OutputStream;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerException;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import org.xml.sax.*;
public class XMLSerializer implements ContentHandler {
static final private TransformerFactory tf = TransformerFactory.newInstance();
private ContentHandler ch;
public XMLSerializer(OutputStream os) throws SAXException {
try {
final Transformer t = tf.newTransformer();
t.transform(new SAXSource(
new XMLReader() {
public ContentHandler getContentHandler() { return ch; }
public DTDHandler getDTDHandler() { return null; }
public EntityResolver getEntityResolver() { return null; }
public ErrorHandler getErrorHandler() { return null; }
public boolean getFeature(String name) { return false; }
public Object getProperty(String name) { return null; }
public void parse(InputSource input) { }
public void parse(String systemId) { }
public void setContentHandler(ContentHandler handler) { ch = handler; }
public void setDTDHandler(DTDHandler handler) { }
public void setEntityResolver(EntityResolver resolver) { }
public void setErrorHandler(ErrorHandler handler) { }
public void setFeature(String name, boolean value) { }
public void setProperty(String name, Object value) { }
}, new InputSource()),
new StreamResult(os));
}
catch (TransformerException e) {
throw new SAXException(e);
}
if (ch == null)
throw new SAXException("Transformer didn't set ContentHandler");
}
public void setDocumentLocator(Locator locator) {
ch.setDocumentLocator(locator);
}
public void startDocument() throws SAXException {
ch.startDocument();
}
public void endDocument() throws SAXException {
ch.endDocument();
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
ch.startPrefixMapping(prefix, uri);
}
public void endPrefixMapping(String prefix) throws SAXException {
ch.endPrefixMapping(prefix);
}
public void startElement(String uri, String localName, String qName, Attributes atts)
throws SAXException {
ch.startElement(uri, localName, qName, atts);
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
ch.endElement(uri, localName, qName);
}
public void characters(char[] ch, int start, int length)
throws SAXException {
this.ch.characters(ch, start, length);
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
this.ch.ignorableWhitespace(ch, start, length);
}
public void processingInstruction(String target, String data)
throws SAXException {
ch.processingInstruction(target, data);
}
public void skippedEntity(String name) throws SAXException {
ch.skippedEntity(name);
}
}
基本上,它拦截了转换器对parse()的调用