在Java中解析XML以仅获取文本
我有以下XML在Java中解析XML以仅获取文本,java,xml,Java,Xml,我有以下XML <?xml version="1.0" encoding="UTF-8"?> <wddxPacket version="1.0"> <header /> <data> <string> <char code="0d" /> <char code="0a" /> Provider: HERO - 2.xx
<?xml version="1.0" encoding="UTF-8"?>
<wddxPacket version="1.0">
<header />
<data>
<string>
<char code="0d" />
<char code="0a" />
Provider: HERO - 2.xx
<char code="0d" />
<char code="0a" />
<char code="0d" />
<char code="0a" />
<char code="0d" />
<char code="0a" />
DBvendor=EPA
<char code="0d" />
<char code="0a" />
Text-encoding=UTF-8
<char code="0d" />
<char code="0a" />
<char code="0d" />
<char code="0a" />
TY - RPRT
<char code="0d" />
<char code="0a" />
LB - 94742
<char code="0d" />
<char code="0a" />
AU - IARC,
<char code="0d" />
<char code="0a" />
LU - International Agency for Research on Cancer
<char code="0d" />
<char code="0a" />
PY - 1985
<char code="0d" />
<char code="0a" />
TY - JOUR
<char code="0d" />
<char code="0a" />
LB - 94743
<char code="0d" />
<char code="0a" />
AU - Shamilov, T. A.
<char code="0d" />
<char code="0a" />
AU - Abasov, D. M.
<char code="0d" />
<char code="0a" />
PY - 1973
<char code="0d" />
<char code="0a" />
J2 - Med Tr Prom Ekol
<char code="0d" />
<char code="0a" />
T2 - Meditsina Truda i Promyshlennaya Ekologiya
<char code="0d" />
<char code="0a" />
JF - Meditsina Truda i Promyshlennaya Ekologiya
<char code="0d" />
<char code="0a" />
SP - 12-15
<char code="0d" />
<char code="0a" />
SN - ISSN 1026-9428
<char code="0d" />
<char code="0a" />
TI - Effect of allyl chloride on animals under experimental conditions
<char code="0d" />
<char code="0a" />
VL - 8
<char code="0d" />
<char code="0a" />
ER -
<char code="0d" />
<char code="0a" />
<char code="0d" />
<char code="0a" />
TY - JOUR
<char code="0d" />
<char code="0a" />
</string>
</data>
</wddxPacket>
我需要从TY开始的文本(这是一个RIS格式的文件),但我仍然可以管理,如果我可以得到所有的文本。我在网上试过,但在那里找不到什么。我需要用Java来做这个
我试过了
Document doc = null;
DocumentBuilderFactory dbf = null;
DocumentBuilder docBuild = null;
dbf = DocumentBuilderFactory.newInstance();
docBuild = dbf.newDocumentBuilder();
doc = docBuild.parse(file);
Node node = doc.getDocumentElement();
XPathFactory xfact = XPathFactory.newInstance();
XPath xpath = xfact.newXPath();
String xpathStr = "/wddxPacket/header/";
Object res = xpath.evaluate(xpathStr, doc, XPathConstants.NODESET);
NodeList nodeList = (NodeList) res;
但是我什么都没有。您需要xpath:
//string/text()
来获取文本值。
下面的java代码将为您提供文本值列表
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse( new File( file ) );
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile( "//string/text()");
Object eval = expr.evaluate( doc, XPathConstants.NODESET );
List<String> textValues = new ArrayList<String>();
if ( eval != null && eval instanceof NodeList )
{
NodeList list = (NodeList)eval;
for ( int i = 0 ; i < list.getLength(); i++ )
{
Node node = list.item(i);
String text = node.getNodeValue().trim();
if ( !text.isEmpty() )
{
System.out.println( text );
textValues.add( text );
}
}
}
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
DocumentBuilder db=dbf.newDocumentBuilder();
Document doc=db.parse(新文件(File));
XPathFactory XPathFactory=XPathFactory.newInstance();
XPath=xPathFactory.newXPath();
XPathExpression expr=xpath.compile(“//string/text()”);
Object eval=expr.evaluate(doc,XPathConstants.NODESET);
List textValues=new ArrayList();
if(eval!=null&&eval节点列表实例)
{
节点列表=(节点列表)评估;
对于(int i=0;i
文本值收集在变量
textValues()
中。您需要xpath://string/text()
来获取文本值。
下面的java代码将为您提供文本值列表
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse( new File( file ) );
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile( "//string/text()");
Object eval = expr.evaluate( doc, XPathConstants.NODESET );
List<String> textValues = new ArrayList<String>();
if ( eval != null && eval instanceof NodeList )
{
NodeList list = (NodeList)eval;
for ( int i = 0 ; i < list.getLength(); i++ )
{
Node node = list.item(i);
String text = node.getNodeValue().trim();
if ( !text.isEmpty() )
{
System.out.println( text );
textValues.add( text );
}
}
}
DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();
DocumentBuilder db=dbf.newDocumentBuilder();
Document doc=db.parse(新文件(File));
XPathFactory XPathFactory=XPathFactory.newInstance();
XPath=xPathFactory.newXPath();
XPathExpression expr=xpath.compile(“//string/text()”);
Object eval=expr.evaluate(doc,XPathConstants.NODESET);
List textValues=new ArrayList();
if(eval!=null&&eval节点列表实例)
{
节点列表=(节点列表)评估;
对于(int i=0;i
文本值收集在变量
textValues()
中,您可以使用stax进行此操作
public void getText() {
String yourSampleFile = "44167076.xml";
StringBuilder result = new StringBuilder();
XMLStreamReader r = null;
try (InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile)) {
XMLInputFactory factory = XMLInputFactory.newInstance();
r = factory.createXMLStreamReader(in);
while (r.hasNext()) {
switch (r.getEventType()) {
case XMLStreamConstants.CHARACTERS:
result.append(r.getText());
break;
default:
break;
}
r.next();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (r != null) {
try {
r.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
System.out.println(result.toString().replaceAll("(?m)^[ \t]*\r?\n", ""));
}
印刷品
Provider: HERO - 2.xx
DBvendor=EPA
Text-encoding=UTF-8
TY - RPRT
LB - 94742
AU - IARC,
LU - International Agency for Research on Cancer
PY - 1985
TY - JOUR
LB - 94743
AU - Shamilov, T. A.
AU - Abasov, D. M.
PY - 1973
J2 - Med Tr Prom Ekol
T2 - Meditsina Truda i Promyshlennaya Ekologiya
JF - Meditsina Truda i Promyshlennaya Ekologiya
SP - 12-15
SN - ISSN 1026-9428
TI - Effect of allyl chloride on animals under experimental conditions
VL - 8
ER -
TY - JOUR
你可以用斯塔克斯来做
public void getText() {
String yourSampleFile = "44167076.xml";
StringBuilder result = new StringBuilder();
XMLStreamReader r = null;
try (InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile)) {
XMLInputFactory factory = XMLInputFactory.newInstance();
r = factory.createXMLStreamReader(in);
while (r.hasNext()) {
switch (r.getEventType()) {
case XMLStreamConstants.CHARACTERS:
result.append(r.getText());
break;
default:
break;
}
r.next();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (r != null) {
try {
r.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
System.out.println(result.toString().replaceAll("(?m)^[ \t]*\r?\n", ""));
}
印刷品
Provider: HERO - 2.xx
DBvendor=EPA
Text-encoding=UTF-8
TY - RPRT
LB - 94742
AU - IARC,
LU - International Agency for Research on Cancer
PY - 1985
TY - JOUR
LB - 94743
AU - Shamilov, T. A.
AU - Abasov, D. M.
PY - 1973
J2 - Med Tr Prom Ekol
T2 - Meditsina Truda i Promyshlennaya Ekologiya
JF - Meditsina Truda i Promyshlennaya Ekologiya
SP - 12-15
SN - ISSN 1026-9428
TI - Effect of allyl chloride on animals under experimental conditions
VL - 8
ER -
TY - JOUR
将自动连接任何匹配元素的文本内容。无需显式遍历节点列表
XPathFactory xfact = XPathFactory.newInstance();
XPath xpath = xfact.newXPath();
String xpathStr = "/wddxPacket/data";
String text;
try (Reader reader = Files.newBufferedReader(Paths.get(filename))) {
text = xpath.evaluate(xpathStr, new InputSource(reader));
}
for (String line : text.split("\\r?\\n")) {
line = line.trim();
if (!line.isEmpty()) {
System.out.println(line);
}
}
将自动连接任何匹配元素的文本内容。无需显式遍历节点列表
XPathFactory xfact = XPathFactory.newInstance();
XPath xpath = xfact.newXPath();
String xpathStr = "/wddxPacket/data";
String text;
try (Reader reader = Files.newBufferedReader(Paths.get(filename))) {
text = xpath.evaluate(xpathStr, new InputSource(reader));
}
for (String line : text.split("\\r?\\n")) {
line = line.trim();
if (!line.isEmpty()) {
System.out.println(line);
}
}
你能发布xml而不仅仅是链接吗?发布并更新了问题,到目前为止你尝试了什么?你能发布xml而不仅仅是链接吗?发布并更新了问题,到目前为止你尝试了什么?