Xquery 高阶函数返回存储_Xquery_Exist Db

Xquery 高阶函数返回存储

xquery

Xquery 高阶函数返回存储,xquery,exist-db,Xquery,Exist Db,我希望在existdb（v2.2）中转换并存储~450k xml片段。每15k左右记录一次转换，输出一个无效（但格式正确）的结果。在此MWE中，第二个人的生日不是有效日期1225年2月29日。由于尝试查找和修复这些错误非常耗时，我希望在将每个转换函数的输出存储到数据库中时对其进行验证，因此我不必每次查找异常值时都运行完全验证。我意识到有效片段和有效最终项目之间的区别，但这只是为了避免我喝咖啡过量我提出了两种解决方案，都能达到预期的效果：基于示例输入，我希望验证三个tei:xml片段并将其存储

我希望在existdb（v2.2）中转换并存储~450k xml片段。每15k左右记录一次转换，输出一个无效（但格式正确）的结果。在此MWE中，第二个人的生日不是有效日期1225年2月29日。由于尝试查找和修复这些错误非常耗时，我希望在将每个转换函数的输出存储到数据库中时对其进行验证，因此我不必每次查找异常值时都运行完全验证。我意识到有效片段和有效最终项目之间的区别，但这只是为了避免我喝咖啡过量

我提出了两种解决方案，都能达到预期的效果：基于示例输入，我希望验证三个tei:xml片段并将其存储在数据库中，对于无效的第二条记录，还应在数据库中存储一个附加报告。这两种情况都会产生输出，但我想了解两者行为的差异

情景1 我使用

local:validate fragments

，对象是

local:biog

，在final return子句中作为

xmldb:store

的一部分

xquery version "3.0";

import module namespace xmldb="http://exist-db.org/xquery/xmldb";

 declare namespace tei="http://www.tei-c.org/ns/1.0";
 declare namespace no="http://none";

 declare default element namespace "http://www.tei-c.org/ns/1.0";

 declare function local:biog ($persons as node()*) as item()* {

 for $person in $persons
 return 
     element person {
         attribute ana {'historical'}, 
         attribute xml:id {concat('BIO', $person/text())},
         element persName { $person/../no:c_name/text()},
         element birth {
             attribute when {concat($person/../no:c_birthyear, '-', $person/../no:c_by_month, '-', $person/../no:c_by_day)}
            }
        }
};

 declare function local:validate-fragment($frag as node()*, $loc as xs:string?) as item()* {

 let $id := data($frag/@xml:id)

 let $mini := 
 <TEI xmlns="http://www.tei-c.org/ns/1.0">
  <teiHeader>
      <fileDesc>
         <titleStmt>
            <title>TEI-mini</title>
         </titleStmt>
         <publicationStmt>
            <p>testing ouput of individual functions using this mini tei document </p>
         </publicationStmt>
         <sourceDesc>
            <p>cannot replace proper validation of final output</p>
         </sourceDesc>
      </fileDesc>      
  </teiHeader>
  <text>
      <body>       
         {
         switch ($loc)
         case 'person' return <listPerson ana="chunk"><listPerson ana="block">{$frag}</listPerson></listPerson>
         case 'org' return <listOrg>{$frag}</listOrg>
         case 'place' return <listPlace>{$frag}</listPlace>
         case 'bibl' return <listBibl>{$frag}</listBibl>
         default return (<p>some text here {data($frag)}</p>)
         }         
      </body>
  </text>
 </TEI>

 return 
    if (validation:jing($mini, doc('../templates/tei/tei_all.rng')) = true())
    then ($frag)
    else (($frag, 
      xmldb:store('../reports/',  concat('report-',$id,'.xml'),
      validation:jing-report($mini, doc('../templates/tei/tei_all.rng')))))
};

let $data :=
<root xmlns="http://none">
    <row>
        <c_personid>12907</c_personid>
        <c_name>Huang Zhong</c_name>        
        <c_birthyear>1226</c_birthyear>        
        <c_by_month>08</c_by_month>
        <c_by_day>26</c_by_day>        
    </row>
    <row>
        <c_personid>12908</c_personid>
        <c_name>Fang Yifu</c_name>        
        <c_birthyear>1225</c_birthyear>        
        <c_by_month>02</c_by_month>
        <c_by_day>29</c_by_day>        
    </row>
    <row>
        <c_personid>12909</c_personid>
        <c_name>Fang Linsun</c_name>        
        <c_birthyear>1215</c_birthyear>        
        <c_by_month>11</c_by_month>
        <c_by_day>06</c_by_day>
    </row>    
 </root>

 for $n in $data//no:c_personid[. > 12906][. < 12910]
 return
 xmldb:store('../samples/', concat('cbdb-', data($n), '.xml'),
 local:validate-fragment(local:biog($n), 'person')[1])

所以我的问题是，为什么上面的版本总是在db的同一个文档中写入和删除？有没有更好的方法来获取

$frag

而不是使用俗气的

[1]

？如果没有此谓词，当整个写入操作到达需要写入tei.xml数据文件和错误报告的

位置时，整个写入操作将因错误而停止。另一方面，场景1会立即向系统写入（和删除）。不像

情景2 从转换内部调用验证函数，如下所示：

declare function local:biog ($persons as node()*) as item()* {

 for $person in $persons
 return 
    local:validate-fragment(element person {
        attribute ana {'historical'}, 
         attribute xml:id {concat('BIO', $person/text())},
        element persName { $person/../no:c_name/text()},
        element birth {
            attribute when {concat($person/../no:c_birthyear, '-', $person/../no:c_by_month, '-', $person/../no:c_by_day)}
            }
        }, 'person')

};

for $n in $data//no:c_personid[. > 12906][. < 12910]
 return
 xmldb:store('../samples/', concat('cbdb-', data($n), '.xml'),
 local:biog($n)[1])

…并通过这样的最后条款：

declare function local:biog ($persons as node()*) as item()* {

 for $person in $persons
 return 
    local:validate-fragment(element person {
        attribute ana {'historical'}, 
         attribute xml:id {concat('BIO', $person/text())},
        element persName { $person/../no:c_name/text()},
        element birth {
            attribute when {concat($person/../no:c_birthyear, '-', $person/../no:c_by_month, '-', $person/../no:c_by_day)}
            }
        }, 'person')

};

for $n in $data//no:c_personid[. > 12906][. < 12910]
 return
 xmldb:store('../samples/', concat('cbdb-', data($n), '.xml'),
 local:biog($n)[1])

这对于小数据块来说是可以的，但是当应用到整个数据集时，我担心这会成为一个问题

最重要的是，我不理解在这两种场景中是如何处理xmldb:store的

有没有更聪明的方法来使用xQueries更高级的功能或实现我想要的结果？

您是否尝试过使用monex（或旧的管理应用程序）中的查询分析工具来查看查询的哪些部分占用的时间最多？另外，您提到您在日志中看到了存储和删除同一文档的证据。你能粘贴那个日志条目吗？同样，您提到了处理时间的差异——但您看到的是什么时间？你可以做些什么来澄清你所看到的瓶颈将有所帮助-你的报告中有很多细节。我现在看得不够清楚，无法理解您为什么要使用高阶函数。@joewiz感谢您的快速响应。我很抱歉在这里匆匆发帖。我已经更新了描述，使其更加清晰。根据查询分析器，

xmldb:store

接收10个呼叫，占用4.2ms

local:biog

和

local:vaildate frag

每个人都会在2.5毫秒内接到8个电话<代码>验证：jing在2.5毫秒时也会收到10个呼叫。当在较大的集合上运行时，我可以看到，在一个场景中，集合在运行时会填充记录，而在另一个场景中则不会。但这很难用3张唱片来表现。