Java JDOM SAXBuilder-假定特定标记为CDATA
我使用org.jdom.input.SAXBuilder将输入流(XML文件)解析为jdom文档Java JDOM SAXBuilder-假定特定标记为CDATA,java,xml,cdata,jdom,Java,Xml,Cdata,Jdom,我使用org.jdom.input.SAXBuilder将输入流(XML文件)解析为jdom文档 SAXBuilder builder = new SAXBuilder(JavaScriptParser.class.getName()); org.jdom.Document document = builder.build(stream); 我的XML流中有一些特定的标记,这些标记中的内容必须视为CDATA 例如: <text><![CDATA[ Any text... ]]
SAXBuilder builder = new SAXBuilder(JavaScriptParser.class.getName());
org.jdom.Document document = builder.build(stream);
我的XML流中有一些特定的标记,这些标记中的内容必须视为CDATA
例如:
<text><![CDATA[ Any text... ]]></text>
<javascript><![CDATA[ function doSomething(){} ]]></javascript>
<text>Any text...</text>
<javascript>function doSomething(){}</javascript>
您不应该在
else
中调用return super.scanContent()
?(if(“javascript.equals(fCurrentElement.rawname)){…}否则返回super.scanContent()
)。它可能会做两倍的工作。我也一直使用默认的XMLReader,所以我的猜测很疯狂。
public class JavaScriptScanner extends org.apache.xerces.impl.XMLNSDocumentScannerImpl
{
private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
public JavaScriptScanner()
{
super();
}
@Override
protected int scanContent() throws IOException, XNIException
{
if ("javascript".equals(fCurrentElement.rawname))
{
scanCDATASection();
setScannerState(SCANNER_STATE_CONTENT);
}
return super.scanContent();
}
protected boolean scanCDATASection() throws IOException, XNIException
{
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.startCDATA(null);
}
while (true) {
fStringBuffer.clear();
if (!fEntityScanner.scanData("</javascript>", fStringBuffer) ||
fStringBuffer.toString().contains("</javascript")) {
if (fDocumentHandler != null && fStringBuffer.length > 0) {
fDocumentHandler.characters(fStringBuffer, null);
}
int brackets = 0;
while (fEntityScanner.skipChar(']')) {
brackets++;
}
if (fDocumentHandler != null && brackets > 0) {
fStringBuffer.clear();
if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
// Handle large sequences of ']'
int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE;
int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE;
for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
fStringBuffer.append(']');
}
for (int i = 0; i < chunks; i++) {
fDocumentHandler.characters(fStringBuffer, null);
}
if (remainder != 0) {
fStringBuffer.length = remainder;
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
for (int i = 0; i < brackets; i++) {
fStringBuffer.append(']');
}
fDocumentHandler.characters(fStringBuffer, null);
}
}
if (fEntityScanner.skipChar('>')) {
break;
}
if (fDocumentHandler != null) {
fStringBuffer.clear();
fStringBuffer.append("]]");
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
if (fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
}
int c = fEntityScanner.peekChar();
if (c != -1 && isInvalidLiteral(c)) {
if (XMLChar.isHighSurrogate(c)) {
fStringBuffer.clear();
scanSurrogates(fStringBuffer);
if (fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
reportFatalError("InvalidCharInCDSect",
new Object[]{Integer.toString(c,16)});
fEntityScanner.scanChar();
}
}
}
}
fMarkupDepth--;
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.endCDATA(null);
}
return true;
}
}