Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/79.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用R解析xml内容以提取标题信息_R_Xml - Fatal编程技术网

使用R解析xml内容以提取标题信息

使用R解析xml内容以提取标题信息,r,xml,R,Xml,我有一个xml数据 <?xml version="1.0" encoding="UTF-8"?> <ClinVarResult-Set> <ClinVarSet ID="95075"> <RecordStatus>not current</RecordStatus> <Title>MPV17, 26-BP DEL, NT116 AND Navajo neurohepatopathy</Title&g

我有一个xml数据

<?xml version="1.0" encoding="UTF-8"?>
<ClinVarResult-Set>
  <ClinVarSet ID="95075">
    <RecordStatus>not current</RecordStatus>
    <Title>MPV17, 26-BP DEL, NT116 AND Navajo neurohepatopathy</Title>
    <ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2013-04-03" ID="75049">
      <ClinVarAccession Acc="RCV000017546" Version="1" Type="RCV" DateUpdated="2013-04-08"/>
      <RecordStatus>current</RecordStatus>
      <ClinicalSignificance DateLastEvaluated="2011-11-17">
        <ReviewStatus>classified by single submitter</ReviewStatus>
        <Description>pathogenic</Description>
      </ClinicalSignificance>
      <Assertion Type="variation to disease"/>
      <ExternalID DB="NCBI"/>
      <ObservedIn>
        <Sample>
          <Origin>germline</Origin>
          <Species TaxonomyId="9606">human</Species>
          <AffectedStatus>not provided</AffectedStatus>
        </Sample>
        <Method>
          <MethodType>curation</MethodType>
        </Method>
        <ObservedData ID="208542">
          <Attribute Type="Description">See 137960.0003 and Spinazzola et al. (2006).</Attribute>
          <Citation Type="general">
            <ID Source="PubMed">16582910</ID>
          </Citation>
        </ObservedData>
      </ObservedIn>
      <MeasureSet Type="Variant" ID="16163">
        <Measure Type="Deletion" ID="31202">
          <Name>
            <ElementValue Type="Alternate">MPV17, 26-BP DEL, NT116</ElementValue>
            <XRef Type="Allelic variant" ID="137960.0004" DB="OMIM"/>
          </Name>
          <AttributeSet>
            <Attribute Type="nucleotide change">26-BP DEL, NT116</Attribute>
            <XRef Type="Allelic variant" ID="137960.0004" DB="OMIM"/>
          </AttributeSet>
          <MeasureRelationship Type="variant in gene">
            <Name>
              <ElementValue Type="Preferred">MpV17 mitochondrial inner membrane protein</ElementValue>
            </Name>
            <Symbol>
              <ElementValue Type="Preferred">MPV17</ElementValue>
            </Symbol>
            <XRef ID="4358" DB="Gene"/>
            <XRef ID="137960" DB="OMIM" Type="MIM"/>
          </MeasureRelationship>
          <XRef Type="Allelic variant" ID="137960.0004" DB="OMIM"/>
        </Measure>
      </MeasureSet>
      <TraitSet Type="Disease" ID="5245">
        <Trait ID="3439" Type="Disease">
          <Name>
            <ElementValue Type="Preferred">Navajo neurohepatopathy</ElementValue>
            <XRef ID="3972" DB="Office of Rare Diseases"/>
          </Name>
          <Name>
            <ElementValue Type="Alternate">Navajo neuropathy</ElementValue>
          </Name>
          <Name>
            <ElementValue Type="Alternate">MITOCHONDRIAL DNA DEPLETION SYNDROME 6 (HEPATOCEREBRAL TYPE)</ElementValue>
            <XRef Type="MIM" ID="256810" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0002" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0003" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0005" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0004" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0001" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0006" DB="OMIM"/>
            <XRef Type="Allelic variant" ID="137960.0007" DB="OMIM"/>
          </Name>
          <Name>
            <ElementValue Type="Alternate">MPV17- Related Hepatocerebral Mitochondrial DNA Depletion Syndrome</ElementValue>
            <XRef ID="NBK92947" DB="GeneReviews"/>
          </Name>
          <Symbol>
            <ElementValue Type="Preferred">MTDPS6</ElementValue>
            <XRef Type="MIM" ID="256810" DB="OMIM"/>
          </Symbol>
          <Symbol>
            <ElementValue Type="Alternate">NN</ElementValue>
            <XRef Type="MIM" ID="256810" DB="OMIM"/>
            <XRef ID="3972" DB="Office of Rare Diseases"/>
          </Symbol>
          <Symbol>
            <ElementValue Type="Alternate">NNH</ElementValue>
            <XRef Type="MIM" ID="256810" DB="OMIM"/>
          </Symbol>
          <AttributeSet>
            <Attribute Type="age of onset">Childhood</Attribute>
          </AttributeSet>
          <Citation Type="review" Abbrev="GeneReviews">
            <ID Source="PubMed">22593919</ID>
          </Citation>
          <XRef ID="255229" DB="Orphanet"/>
          <XRef ID="C1850406" DB="MedGen"/>
          <XRef ID="NBK92947" DB="GeneReviews"/>
          <XRef Type="MIM" ID="256810" DB="OMIM"/>
        </Trait>
      </TraitSet>
    </ReferenceClinVarAssertion>
    <ClinVarAssertion ID="37818">
      <ClinVarSubmissionID localKey="137960.0004_MITOCHONDRIAL DNA DEPLETION SYNDROME 6 (HEPATOCEREBRAL TYPE)" title="MPV17, 26-BP DEL, NT116 _MITOCHONDRIAL DNA DEPLETION SYNDROME 6 (HEPATOCEREBRAL TYPE)" submitterDate="2011-11-17" submitter="OMIM"/>
      <ClinVarAccession Acc="SCV000037818" OrgID="3" Version="1" Type="SCV" DateUpdated="2013-04-08"/>
      <RecordStatus>current</RecordStatus>
      <ClinicalSignificance DateLastEvaluated="2011-11-17">
        <Description>pathogenic</Description>
      </ClinicalSignificance>
      <Assertion Type="variation to disease"/>
      <ObservedIn>
        <Sample>
          <Origin>germline</Origin>
          <Species>human</Species>
          <AffectedStatus>not provided</AffectedStatus>
        </Sample>
        <Method>
          <MethodType>curation</MethodType>
        </Method>
        <ObservedData>
          <Attribute Type="Description">See 137960.0003 and Spinazzola et al. (2006).</Attribute>
          <Citation>
            <ID Source="PubMed">16582910</ID>
          </Citation>
        </ObservedData>
      </ObservedIn>
      <MeasureSet Type="Variant">
        <Measure Type="Variation">
          <Name>
            <ElementValue Type="Preferred">MPV17, 26-BP DEL, NT116 </ElementValue>
          </Name>
          <AttributeSet>
            <Attribute Type="NonHGVS">26-BP DEL, NT116</Attribute>
          </AttributeSet>
          <MeasureRelationship Type="variant in gene">
            <Symbol>
              <ElementValue Type="Preferred">MPV17</ElementValue>
            </Symbol>
          </MeasureRelationship>
          <XRef DB="OMIM" Type="Allelic variant" ID="137960.0004"/>
        </Measure>
      </MeasureSet>
      <TraitSet Type="Disease">
        <Trait Type="Disease">
          <Name>
            <ElementValue Type="Preferred">MITOCHONDRIAL DNA DEPLETION SYNDROME 6 (HEPATOCEREBRAL TYPE)</ElementValue>
          </Name>
        </Trait>
      </TraitSet>
    </ClinVarAssertion>
  </ClinVarSet>
</ClinVarResult-Set>

但是这些都让我得到了标题的全部内容,当我只需要登录号的时候,我怎么才能得到这个号码呢?我尝试了多种方法,检查了许多例子,但我就是找不到答案

通过以下方式检索rcv_数据:

library(XML)
library(httr)
UA <- "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
id=95075
rcv_search= paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=clinvar&rettype=clinvarset&id=",id,sep="")
rcv_doc <- GET(rcv_search, user_agent(UA))
rcv_data <- xmlParse(content(rcv_doc, "text"))
库(XML)
图书馆(httr)

UA这是我的目标。。。这有用吗

library( xml2 )
id=95075
rcv_search= paste0("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=clinvar&rettype=clinvarset&id=",id)
#read data
rcv_data <- xml2::read_xml(rcv_search)

xml2::xml_find_all( rcv_data, ".//ClinVarAccession" )

# {xml_nodeset (2)}
# [1] <ClinVarAccession Acc="RCV000017546" Version="1" Type="RCV" DateUpdated="2013-04-08"/>
# [2] <ClinVarAccession Acc="SCV000037818" OrgID="3" Version="1" Type="SCV" DateUpdated="2013-04-08"/>
库(xml2)
id=95075
rcv_search=paste0(“https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=clinvar&rettype=clinvarset&id=“,id)
#读取数据

rcv_data下面是两组代码(一组使用dplyr,另一组不使用),它们将返回以“R”开头的登录号(因为您提供的代码返回两个代码,一个以“R”开头,另一个以“S”开头,并且您指定要以“R”开头的代码):

或者,如果没有
dplyr
R库,您可以执行以下操作:

## get the referenced nodes
nodes <- getNodeSet(doc, "//ClinVarAccession") %>%
## get accession numbers from nodes
accs <- sapply(nodes, xmlGetAttr, "Acc") %>%
## return accession number that begin with "R"
accs[grep("^[R].*", accs)]
##获取引用的节点
节点%
##从节点获取登录号
accs%
##返回以“R”开头的登录号
accs[grep(“^[R].*”,accs)]

我希望这有帮助

请包括示例代码中使用的包。另外,在读取数据时,我会得到两个相关节点,
还有:您的输出应该是什么样的?我的预期输出应该是“RCV00017546”,我可能弄错了数字,它应该是“RCV00017546”,我已经更新了我使用的包
library( xml2 )
id=95075
rcv_search= paste0("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=clinvar&rettype=clinvarset&id=",id)
#read data
rcv_data <- xml2::read_xml(rcv_search)

xml2::xml_find_all( rcv_data, ".//ClinVarAccession" )

# {xml_nodeset (2)}
# [1] <ClinVarAccession Acc="RCV000017546" Version="1" Type="RCV" DateUpdated="2013-04-08"/>
# [2] <ClinVarAccession Acc="SCV000037818" OrgID="3" Version="1" Type="SCV" DateUpdated="2013-04-08"/>
## return accession number that begin with "R"
## call dplyr library in order to use pipes
library(dplyr)
## get the referenced nodes
getNodeSet(doc, "//ClinVarAccession") %>%
## get accession numbers from nodes
sapply(xmlGetAttr, "Acc") %>%
## return codes that start with "R"
.[grep("^[R].*", .)]
## get the referenced nodes
nodes <- getNodeSet(doc, "//ClinVarAccession") %>%
## get accession numbers from nodes
accs <- sapply(nodes, xmlGetAttr, "Acc") %>%
## return accession number that begin with "R"
accs[grep("^[R].*", accs)]