来自同级XML项的数据帧
我有几个XML项来自同级XML项的数据帧,xml,r,Xml,R,我有几个XML项系列结构为 <Series> <SeriesKey> <Value concept="LOCATION" value="NOR"/> <Value concept="TRANSACT" value="B1_GA"/> <Value concept="MEASURE" value="CXC"/> </SeriesKey> <Attributes> <Value co
系列
结构为
<Series>
<SeriesKey>
<Value concept="LOCATION" value="NOR"/>
<Value concept="TRANSACT" value="B1_GA"/>
<Value concept="MEASURE" value="CXC"/>
</SeriesKey>
<Attributes>
<Value concept="TIME_FORMAT" value="P1Y"/>
<Value concept="UNIT" value="USD"/>
<Value concept="POWERCODE" value="6"/>
</Attributes>
<Obs>
<Time>1970</Time>
<ObsValue value="12729.77490809"/>
</Obs>
<Obs>
<Time>1971</Time>
<ObsValue value="14505.3225330031"/>
</Obs>
<Obs>
<Time>1972</Time>
<ObsValue value="17219.5954919744"/>
</Obs>
</Series>
<Series>
<SeriesKey>
<Value concept="LOCATION" value="USA"/>
<Value concept="TRANSACT" value="B1_GA"/>
<Value concept="MEASURE" value="CXC"/>
</SeriesKey>
<Attributes>
<Value concept="TIME_FORMAT" value="P1Y"/>
<Value concept="UNIT" value="USD"/>
<Value concept="POWERCODE" value="6"/>
</Attributes>
...
</Series>
从存储在这里的原始文档
url <- "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/SNA_TABLE1/NOR+CAN+FRA+DEU+GBR+USA+ITA+JAP.B1_GA+B1G_P119+B1G+B1GVA+B1GVB_E+B1GVC+B1GVF+B1GVG_I+B1GVJ+B1GVK+B1GVL+B1GVM_N+B1GVO_Q+B1GVR_U+D21_D31+D21S1+D31S1+DB1_GA.CXC/all?startTime=1950&endTime=2013"
但是我找不到任何简单的方法将每个
Obs
分配给它的序列键
远不是优雅的,但仍然
df <- data.frame()
for (i in 1:length(xpathSApply(xml, "//message:MessageGroup/def:DataSet/def:Series", namespaces=ns))) {
location <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='LOCATION']/@value"), namespaces=ns)
transact <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='TRANSACT']/@value"), namespaces=ns)
measure <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='MEASURE']/@value"), namespaces=ns)
time <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:Obs/def:Time"), xmlValue,
namespaces=ns)
value <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:Obs/def:ObsValue/@value"),
namespaces=ns)
tmp <- data.frame(location=rep(location, length(time)),
transact=rep(transact, length(time)),
measure=rep(measure, length(time)),
time=time,
value=value)
df <- rbind(df, tmp)
}
df不是很好,但是。。。获取数据和名称空间
xml <- xmlParse(url)
ns <- xmlNamespaceDefinitions(xml, simplify=TRUE)
names(ns)[1] = "def"
xml您在示例xml中没有显示名称空间,但从您的代码看,似乎实际存在名称空间。您是否可以更新以更好地显示xml@RichardScriven整个文件是啊,我明白了。我认为,如果对根文档进行操作,即xmlRoot
length(xpathSApply(xml, "//message:MessageGroup/def:DataSet/def:Series/def:SeriesKey/def:Value[@concept='LOCATION']/@value", namespaces=ns))
# [1] 123
length(xpathSApply(xml, "//message:MessageGroup/def:DataSet/def:Series/def:Obs", namespaces=ns))
# [1] 3959
df <- data.frame()
for (i in 1:length(xpathSApply(xml, "//message:MessageGroup/def:DataSet/def:Series", namespaces=ns))) {
location <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='LOCATION']/@value"), namespaces=ns)
transact <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='TRANSACT']/@value"), namespaces=ns)
measure <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:SeriesKey/def:Value[@concept='MEASURE']/@value"), namespaces=ns)
time <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:Obs/def:Time"), xmlValue,
namespaces=ns)
value <- xpathSApply(xml, paste0("//message:MessageGroup/def:DataSet/def:Series[",i,"]/def:Obs/def:ObsValue/@value"),
namespaces=ns)
tmp <- data.frame(location=rep(location, length(time)),
transact=rep(transact, length(time)),
measure=rep(measure, length(time)),
time=time,
value=value)
df <- rbind(df, tmp)
}
xml <- xmlParse(url)
ns <- xmlNamespaceDefinitions(xml, simplify=TRUE)
names(ns)[1] = "def"
series <- getNodeSet(xml, "//def:Series", namespaces=ns)
q <- ".//def:SeriesKey/def:Value[@concept='LOCATION']/@value"
LOCATION <- sapply(series, xpathSApply, q, namespaces=ns)
LOCATION <- sapply(series, function(elt) xpathSApply(elt, q, namespaces=ns))
q <- ".//def:Obs/def:Time"
TIME <- lapply(series, xpathSApply, q, xmlValue, namespaces=ns)
q <- ".//def:Obs/def:ObsValue/@value"
VALUE <- lapply(series, xpathSApply, q, namespaces=ns)
geom <- sapply(TIME, length)
df <- data.frame(LOCATION=rep(unname(LOCATION), geom),
TIME=unlist(TIME, use.names=FALSE),
VALUE=unlist(VALUE, use.names=FALSE))
series <- getNodeSet(xml, "//def:Series", namespaces=ns)
geom <- sapply(series, xpathSApply, "count(.//def:Obs)", namespaces=ns)
LOCATION <- xpathSApply(xml, "//def:Value[@concept='LOCATION']/@value",
namespaces=ns)
TIME <- xpathSApply(xml, "//def:Time/text()", xmlValue, namespaces=ns)