Java 使用字符串错误解析xml提要

Java 使用字符串错误解析xml提要,java,xml,parsing,sax,feed,Java,Xml,Parsing,Sax,Feed,我想解析rss提要,但它在某些字符上失败,例如“>”字符和该字符之前的所有字符 例如: <title>[Maths I &gt; Theory] Maths I, T1.pdf: One file added.</title> 这是我的Rshandler: public class RSSHandler extends DefaultHandler { final int state_unknown = 0; final int state_title = 1

我想解析rss提要,但它在某些字符上失败,例如“>”字符和该字符之前的所有字符

例如:

<title>[Maths I &gt; Theory] Maths I, T1.pdf: One file added.</title>
这是我的Rshandler:

public class RSSHandler extends DefaultHandler {

final int state_unknown = 0;
final int state_title = 1;
final int state_description = 2;
final int state_link = 3;
final int state_pubdate = 4;
int currentState = state_unknown;

RSSFeed feed;
RSSItem item;

boolean itemFound = false;

RSSHandler(){
}

RSSFeed getFeed(){
return feed;
}

@Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
feed = new RSSFeed();
item = new RSSItem();

}

@Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
}

@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
// TODO Auto-generated method stub

if (localName.equalsIgnoreCase("item")){
itemFound = true;
item = new RSSItem();
currentState = state_unknown;
}
else if (localName.equalsIgnoreCase("title")){
currentState = state_title;
}
else if (localName.equalsIgnoreCase("description")){
currentState = state_description;
}
else if (localName.equalsIgnoreCase("link")){
currentState = state_link;
}
else if (localName.equalsIgnoreCase("pubdate")){
currentState = state_pubdate;
}
else{
currentState = state_unknown;
}

}

@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("item")){
feed.addItem(item);
}
}

@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
// TODO Auto-generated method stub

String strCharacters = new String(ch,start,length);

if (itemFound==true){
// "item" tag found, it's item's parameter
switch(currentState){
case state_title:
 item.setTitle(strCharacters);
 break;
case state_description:
 item.setDescription(strCharacters);
 break;
case state_link:
 item.setLink(strCharacters);
 break;
case state_pubdate:
 item.setPubdate(strCharacters);
 break;
default:
 break;
}
}
else{
// not "item" tag found, it's feed's parameter
switch(currentState){
case state_title:
 feed.setTitle(strCharacters);
 break;
case state_description:
 feed.setDescription(strCharacters);
 break;
case state_link:
 feed.setLink(strCharacters);
 break;
case state_pubdate:
 feed.setPubdate(strCharacters);
 break;
default:
 break;
}
}

currentState = state_unknown;
}


}

您至少会收到5次对
characters
方法的回调:

1st: [Maths I 
2nd: >
3rd: Theory
4th: ]
5th: Maths I, T1.pdf: One file added.

您不应该更改
characters
方法最后一行中的
当前状态
,您必须缓冲
字符串
,然后将其连接起来。

这里有一个稍微修改的版本,可以很好地解析RSS文件。我希望有帮助

首先,一个
状态
枚举:

public enum State {

    unknown, title, description, link, pubdate

}
然后,处理程序类:

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class RSSHandler extends DefaultHandler {

    private State currentState = State.unknown;

    private RSSFeed feed;
    private RSSItem item;

    private boolean itemFound = false;

    private StringBuilder tagContent;

    public RSSHandler() {
    }

    @Override
    public void startDocument() throws SAXException {
        feed = new RSSFeed();
        item = new RSSItem();
    }

    @Override
    public void startElement(final String uri, final String localName, 
            final String qName, final Attributes attributes)
            throws SAXException {
        currentState = State.unknown;
        tagContent = new StringBuilder();
        if (localName.equalsIgnoreCase("item")) {
            itemFound = true;
            item = new RSSItem();
            currentState = State.unknown;
        } else if (localName.equalsIgnoreCase("title")) {
            currentState = State.title;
        } else if (localName.equalsIgnoreCase("description")) {
            currentState = State.description;
        } else if (localName.equalsIgnoreCase("link")) {
            currentState = State.link;
        } else if (localName.equalsIgnoreCase("pubdate")) {
            currentState = State.pubdate;
        }
        System.out.println("new state: " + currentState);

    }

    @Override
    public void endElement(final String uri, final String localName, 
            final String qName) throws SAXException {
        if (localName.equalsIgnoreCase("item")) {
            feed.addItem(item);
        }
        if (itemFound == true) {
            // "item" tag found, it's item's parameter
            switch (currentState) {
                case title:
                    item.setTitle(tagContent.toString());
                    break;
                case description:
                    item.setDescription(tagContent.toString());
                    break;
                case link:
                    item.setLink(tagContent.toString());
                    break;
                case pubdate:
                    item.setPubdate(tagContent.toString());
                    break;
                default:
                    break;
            }
        } else {
            // not "item" tag found, it's feed's parameter
            switch (currentState) {
                case title:
                    feed.setTitle(tagContent.toString());
                    break;
                case description:
                    feed.setDescription(tagContent.toString());
                    break;
                case link:
                    feed.setLink(tagContent.toString());
                    break;
                case pubdate:
                    feed.setPubdate(tagContent.toString());
                    break;
                default:
                    break;
            }
        }
    }

    @Override
    public void characters(final char[] ch, final int start, final int length) 
            throws SAXException {
        tagContent.append(ch, start, length);
    }

    public RSSFeed getFeed() {
        return feed;
    }

}

虽然有效的XML不应该包含原始的
,但我不知道如何进行连接,但我不确定会出现什么问题。请提供帮助。请使用
StringBuilder
。也许这会有所帮助:您知道任何支持&;的rss提要阅读器示例吗;等等?非常感谢你,你是最棒的!!谢谢
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class RSSHandler extends DefaultHandler {

    private State currentState = State.unknown;

    private RSSFeed feed;
    private RSSItem item;

    private boolean itemFound = false;

    private StringBuilder tagContent;

    public RSSHandler() {
    }

    @Override
    public void startDocument() throws SAXException {
        feed = new RSSFeed();
        item = new RSSItem();
    }

    @Override
    public void startElement(final String uri, final String localName, 
            final String qName, final Attributes attributes)
            throws SAXException {
        currentState = State.unknown;
        tagContent = new StringBuilder();
        if (localName.equalsIgnoreCase("item")) {
            itemFound = true;
            item = new RSSItem();
            currentState = State.unknown;
        } else if (localName.equalsIgnoreCase("title")) {
            currentState = State.title;
        } else if (localName.equalsIgnoreCase("description")) {
            currentState = State.description;
        } else if (localName.equalsIgnoreCase("link")) {
            currentState = State.link;
        } else if (localName.equalsIgnoreCase("pubdate")) {
            currentState = State.pubdate;
        }
        System.out.println("new state: " + currentState);

    }

    @Override
    public void endElement(final String uri, final String localName, 
            final String qName) throws SAXException {
        if (localName.equalsIgnoreCase("item")) {
            feed.addItem(item);
        }
        if (itemFound == true) {
            // "item" tag found, it's item's parameter
            switch (currentState) {
                case title:
                    item.setTitle(tagContent.toString());
                    break;
                case description:
                    item.setDescription(tagContent.toString());
                    break;
                case link:
                    item.setLink(tagContent.toString());
                    break;
                case pubdate:
                    item.setPubdate(tagContent.toString());
                    break;
                default:
                    break;
            }
        } else {
            // not "item" tag found, it's feed's parameter
            switch (currentState) {
                case title:
                    feed.setTitle(tagContent.toString());
                    break;
                case description:
                    feed.setDescription(tagContent.toString());
                    break;
                case link:
                    feed.setLink(tagContent.toString());
                    break;
                case pubdate:
                    feed.setPubdate(tagContent.toString());
                    break;
                default:
                    break;
            }
        }
    }

    @Override
    public void characters(final char[] ch, final int start, final int length) 
            throws SAXException {
        tagContent.append(ch, start, length);
    }

    public RSSFeed getFeed() {
        return feed;
    }

}