使用R从csv文件创建xml文件_R_Xml_Apply_Lapply

使用R从csv文件创建xml文件

r xml

使用R从csv文件创建xml文件,r,xml,apply,lapply,R,Xml,Apply,Lapply,我正在尝试使用CSV文件中的xml包创建一个xml文件。我的CSV文件如下所示： >head(patient) Source Target weight 1 Bacteroides Lachnospiraceae 3.80735493 2 Bacteroides Klebsiella -1.61890983 3 Bacteroides Lachnoclostridium 3.80735493 4 Bacteroides

我正在尝试使用CSV文件中的xml包创建一个xml文件。我的CSV文件如下所示：

>head(patient)
  Source         Target             weight
1 Bacteroides   Lachnospiraceae  3.80735493
2 Bacteroides        Klebsiella -1.61890983
3 Bacteroides Lachnoclostridium  3.80735493
4 Bacteroides     Streptococcus -1.77760758
5 Streptococcus   Clostridium    1.19264508
6 Streptococcus [Eubacterium]    5.58496251

<?xml version="1.0" encoding="iso-8859-1"?>
<gxl>
    <graph id="graph id= ExtendedCallGraph edgeids=true edgemode=undirected">
        <node id="1">
            <attr name="Bacteroides">
            </attr>
        </node>
        <edge from="Bacteroides" to="Lachnospiraceae" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to=" Klebsiella" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to="Lachnoclostridium" isdirected="False" id="1--3">
        </edge>
        <edge from="Bacteroides" to=" Streptococcus" isdirected="False" id="1--4">
        </edge>
        <node id="2">
            <attr name="Streptococcus">
            </attr>
        </node>
        <edge from="Streptococcus" to="Clostridium" isdirected="False" id="2--3">
        </edge>
          <edge from="Streptococcus" to="Eubacterium" isdirected="False" id="2--4">
        </edge>
        :
        :
        :
        :
    </graph>
 </gxl>

我的愿望输出xml应如下所示：

>head(patient)
  Source         Target             weight
1 Bacteroides   Lachnospiraceae  3.80735493
2 Bacteroides        Klebsiella -1.61890983
3 Bacteroides Lachnoclostridium  3.80735493
4 Bacteroides     Streptococcus -1.77760758
5 Streptococcus   Clostridium    1.19264508
6 Streptococcus [Eubacterium]    5.58496251

<?xml version="1.0" encoding="iso-8859-1"?>
<gxl>
    <graph id="graph id= ExtendedCallGraph edgeids=true edgemode=undirected">
        <node id="1">
            <attr name="Bacteroides">
            </attr>
        </node>
        <edge from="Bacteroides" to="Lachnospiraceae" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to=" Klebsiella" isdirected="False" id="1--2">
        </edge>
        <edge from="Bacteroides" to="Lachnoclostridium" isdirected="False" id="1--3">
        </edge>
        <edge from="Bacteroides" to=" Streptococcus" isdirected="False" id="1--4">
        </edge>
        <node id="2">
            <attr name="Streptococcus">
            </attr>
        </node>
        <edge from="Streptococcus" to="Clostridium" isdirected="False" id="2--3">
        </edge>
          <edge from="Streptococcus" to="Eubacterium" isdirected="False" id="2--4">
        </edge>
        :
        :
        :
        :
    </graph>
 </gxl>


:
:
:
:

我尝试了以下代码：

DD = xmlHashTree()
top1<-addNode(xmlNode("gxl"), character(), DD)
addNode(xmlNode("graph id= ExtendedCallGraph edgeids=true edgemode=directed"),top1,DD,close=FALSE)

lapply(unique(patient_1$Source),function(x){
  b=addNode(xmlNode("node",attrs = c('id' = as.integer(x))),top1,DD)
  c=addNode(xmlNode("attr",attrs = c('name' = as.character(x))),b,DD)})

   #####I am trying to add edge node from source to Target########
    apply(unique(patient_1[,1:2]),1,function(x){
  e=addNode(xmlNode("edge",attrs = c("from"= as.character(patient_1$Source[1]), 
                    "to"=as.character(patient_1$target[1]), isdirected="false")),top1,DD)})

DD=xmlHashTree（）
top1考虑一个更简单的嵌套for
循环，该循环遍历每个唯一的源及其子集合观测值。与应用族解决方案不同，您可以保留所需的@id
属性的迭代编号，并增长XML树。此外，考虑使用<代码> NeXMLNobs/COD>方法来构建元素及其<代码> Atts< /Calp>参数，其中的属性是用<代码>（）（< /代码> ）传递给命名向量的。
#创建XML文件
doc=newXMLDoc（）
root=newXMLNode（“gxl”，doc=doc）
graph=newXMLNode（“graph”，父节点=root，
attrs=c（id=“ExtendedCallGraph”，edgeids=“true”，edgemode=“directed”））
#编写XML节点和数据
源考虑一个更简单的嵌套for
循环，它沿着每个唯一的源及其子集合观察值进行遍历。与应用族解决方案不同，您可以保留所需的@id
属性的迭代编号，并增长XML树。此外，考虑使用<代码> NeXMLNobs/COD>方法来构建元素及其<代码> Atts< /Calp>参数，其中的属性是用<代码>（）（< /代码> ）传递给命名向量的。
#创建XML文件
doc=newXMLDoc（）
root=newXMLNode（“gxl”，doc=doc）
graph=newXMLNode（“graph”，父节点=root，
attrs=c（id=“ExtendedCallGraph”，edgeids=“true”，edgemode=“directed”））
#编写XML节点和数据
源I也使用foreach函数实现。但这需要相当长的时间
Fin_Doc = newXMLDoc()
root = newXMLNode("gxl", doc = Fin_Doc)

graph = newXMLNode("graph", parent = root,
                   attrs = c(id="Co-occurance Network", edgeids="true", edgemode="undirected"))

##########adding the node id and attribute_name##########

foreach(w=as.vector(unique(patient_1$Otu_1)),y = as.vector(unique(patient_1$taxonomy.y)), x=as.vector(patient_1$taxonomy.x)) %do%{
  (grp_node = newXMLNode("node", parent = graph, attrs=c(id= "_")))
  (attr_name = newXMLNode("attr",parent = grp_node, text="", attrs=c(name="OTU")))
  (otu_id=newXMLNode("int",parent = attr_name, text="",w ))
  (bacteria=newXMLNode("attr",parent = grp_node, text="", attrs=c(name="Bacteria")))
  (string_name=newXMLNode("string",parent = bacteria, text="",y))
}

####################edge from otuids########################## 
foreach(w=as.vector(patient_1$Otu_1),q=as.vector(patient_1$Otu_2), z=as.vector(patient_1$patient1))%do% {   (edge_node1 = newXMLNode("edge", parent=graph,text="\n", attrs=c(from= w, to=q)))
 (attrs_node1=newXMLNode("attr", parent=edge_node1, text=" ", attrs=c("logratio")))   
(weight_node1= newXMLNode("float", as.character(z), parent=attrs_node1, text=" ")) }

我还实现了使用foreach函数。但这需要相当长的时间
Fin_Doc = newXMLDoc()
root = newXMLNode("gxl", doc = Fin_Doc)

graph = newXMLNode("graph", parent = root,
                   attrs = c(id="Co-occurance Network", edgeids="true", edgemode="undirected"))

##########adding the node id and attribute_name##########

foreach(w=as.vector(unique(patient_1$Otu_1)),y = as.vector(unique(patient_1$taxonomy.y)), x=as.vector(patient_1$taxonomy.x)) %do%{
  (grp_node = newXMLNode("node", parent = graph, attrs=c(id= "_")))
  (attr_name = newXMLNode("attr",parent = grp_node, text="", attrs=c(name="OTU")))
  (otu_id=newXMLNode("int",parent = attr_name, text="",w ))
  (bacteria=newXMLNode("attr",parent = grp_node, text="", attrs=c(name="Bacteria")))
  (string_name=newXMLNode("string",parent = bacteria, text="",y))
}

####################edge from otuids########################## 
foreach(w=as.vector(patient_1$Otu_1),q=as.vector(patient_1$Otu_2), z=as.vector(patient_1$patient1))%do% {   (edge_node1 = newXMLNode("edge", parent=graph,text="\n", attrs=c(from= w, to=q)))
 (attrs_node1=newXMLNode("attr", parent=edge_node1, text=" ", attrs=c("logratio")))   
(weight_node1= newXMLNode("float", as.character(z), parent=attrs_node1, text=" ")) }

@冻糕。抱歉弄得一团糟。我编辑了这些问题。taxonomy.x、taxonomy.y是源和目标。对于id属性，我想给出每个条目的编号（删除重复项）。例如：对于类杆菌：id为1，对于链球菌，id为2，然后继续。这是一个无向图。那是y@Parfait.. 我看到一个gxl文件的例子，它的格式是相同的。@Parfait，@Parfait。抱歉弄得一团糟。我编辑了这些问题。taxonomy.x、taxonomy.y是源和目标。对于id属性，我想给出每个条目的编号（删除重复项）。例如：对于类杆菌：id为1，对于链球菌，id为2，然后继续。这是一个无向图。那是y@Parfait.. 我看到一个gxl文件的例子，它有相同的格式。@Parfait，它工作得很好。但不会添加到每个边缘标记的末尾。由于边缘
不包含文本，因此它是一个自动关闭标记，与在末尾添加
完全同义。如果您真的需要结束标记，只需在下面添加一个换行符作为文本参数：edge\u node=newXMLNode（“edge”，parent=graph，text=“\n”，…）
。它工作正常。但不会添加到每个边缘标记的末尾。由于边缘
不包含文本，因此它是一个自动关闭标记，与在末尾添加
完全同义。如果您确实需要结束标记，只需在下面添加一个换行符作为文本参数：edge\u node=newXMLNode（“edge”，parent=graph，text=“\n”，…）
。