Java解析XML以获取子元素值
我有一个java字符串,其中包含以下XML代码:Java解析XML以获取子元素值,java,xml,parsing,dom,Java,Xml,Parsing,Dom,我有一个java字符串,其中包含以下XML代码: <?xml version="1.0" encoding="utf-8"?> <Chart> <request> <zip>12345</zip> <city>Miami</city> </request> </Chart> 12345 迈阿
<?xml version="1.0" encoding="utf-8"?>
<Chart>
<request>
<zip>12345</zip>
<city>Miami</city>
</request>
</Chart>
12345
迈阿密
解析此字符串以提取值的最简单方法是什么
<zip> (in this case 12345)
(在本例中为12345)
无需深入使用Java解析xml的黑暗世界,您可以使用正则表达式:
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class FindZip {
public static void main(String[] args) {
Pattern pattern =
Pattern.compile("<zip>(\\d+)</zip>");
String zip_code;
Matcher matcher = pattern.matcher(
"<?xml version=\"1.0\" encoding=\"utf-8\"?>" +
"<Chart>" +
" <request>" +
" <zip>12345</zip>" +
" <city>Miami</city>" +
" </request>" +
"</Chart>"
);
boolean found = false;
while (matcher.find()) {
zip_code = matcher.group(1);
System.out.printf(
"I found the zip code \"%s\" starting at index %d and ending at index %d.%n",
zip_code,
matcher.start(1),
matcher.end(1)
);
found = true;
}
if (!found) {
System.out.println("No match found.");
}
}
}
import java.util.regex.Pattern;
导入java.util.regex.Matcher;
公共类FindZip{
公共静态void main(字符串[]args){
图案图案=
模式编译(“\\d+”);
字符串邮政编码;
Matcher Matcher=pattern.Matcher(
"" +
"" +
" " +
" 12345" +
“迈阿密”+
" " +
""
);
布尔值=false;
while(matcher.find()){
邮政编码=matcher.group(1);
System.out.printf(
“我发现邮政编码\%s\”从索引%d开始,在索引%d结束。%n”,
邮政编码,
匹配器启动(1),
匹配器。结束(1)
);
发现=真;
}
如果(!找到){
System.out.println(“未找到匹配项”);
}
}
}
这种方法有明显的缺点和局限性,但至少你得到了你的邮政编码,如果你有XML,最好是将它解析为XML,然后直接使用XPATH
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
String xml="<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n" +
" <Chart>\r\n" +
" <request>\r\n" +
" <zip>12345</zip>\r\n" +
" <city>Miami</city>\r\n" +
" </request>\r\n" +
" </Chart>";
DocumentBuilderFactory builderFactory =DocumentBuilderFactory.newInstance();
builderFactory.setNamespaceAware(true);
DocumentBuilder builder = builderFactory.newDocumentBuilder();
// PARSE XML
Document document = builder.parse(new InputSource(new StringReader(xml)));
// XPATH
XPath xPath = XPathFactory.newInstance().newXPath();
// your path
String expression = "//Chart/request/zip";
NodeList nodes = (NodeList) xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
for(int i=0; i<nodes.getLength(); i++)
{
Node the_node = nodes.item(i);
if(the_node instanceof Element)
{
Element the_element=(Element) the_node;
System.out.println("element="+the_element.getTextContent());
break; // STOP at the first
}
}
import javax.xml.namespace.NamespaceContext;
导入javax.xml.parsers.DocumentBuilder;
导入javax.xml.parsers.DocumentBuilderFactory;
导入javax.xml.transform.OutputKeys;
导入javax.xml.transform.Transformer;
导入javax.xml.transform.TransformerFactory;
导入javax.xml.transform.dom.DOMSource;
导入javax.xml.transform.stream.StreamResult;
导入javax.xml.xpath.XPathConstants;
导入javax.xml.xpath.XPathExpression;
导入javax.xml.xpath.xpath;
导入javax.xml.xpath.XPathFactory;
字符串xml=“\r\n”+
“\r\n”+
“\r\n”+
“12345\r\n”+
“迈阿密\r\n”+
“\r\n”+
" ";
DocumentBuilderFactory builderFactory=DocumentBuilderFactory.newInstance();
setNamespaceAware(true);
DocumentBuilder=builderFactory.newDocumentBuilder();
//解析XML
documentdocument=builder.parse(新的InputSource(新的StringReader(xml));
//XPATH
XPath=XPathFactory.newInstance().newXPath();
//你的道路
字符串表达式=“//Chart/request/zip”;
NodeList节点=(NodeList)xPath.compile(expression).evaluate(document,xpathcontents.NODESET);
对于(int i=0;iRead:。这种情况下的XPath表达式可以简单到:“//zip”感谢您的回复。XPath是否需要库?如何使用正则表达式修复“找不到符号”,如何修改您提供的代码以将zip值放入字符串?您可以使用string zip\u code=matcher.group(1);
。我已经修改了答案。如果要从XML中提取许多不同的字段,可能需要使用另一个答案。但是,要回答您的问题,用于捕获货币的正则表达式可能会因格式而异。货币值是否始终为整数?是否包含美分?是否包含货币符号bol?会有负值吗?您最好先捕获值和货币类型,然后使用格式化程序,如中所述。感谢您的帮助。如果标记包含url,我应该将“\\d+”更改为什么?下面是一个正则表达式与url匹配的示例:(https?:\\/\\/)?([\\da-z\\.-]+\。([a-z.\\\]{2,6})([\\/\\w\\.-]*)*\\/?
。它不是完美或完整的,但正如您所见,它很复杂。或者,您可以使用非常通用的方法,如*
。然后解析如下值:String url=new url(matcher..find())
。不过,在这一点上,正确解析XML并使用XPath访问所需字段会更容易/更好。