XQuery如何生成相似性矩阵?

XQuery如何生成相似性矩阵?,xquery,marklogic,Xquery,Marklogic,让我们假设我们有n个记录。我想计算每个记录和所有其他记录之间的相似性。我想做一个相似矩阵。我不熟悉XQuery,但我正在尽最大努力。我附上了一个屏幕截图,显示了一对记录之间的相似性 它是一个csv字符串。我使用以下for循环生成此示例: for $item1 at $index in /rec:Record let $records:= /rec:Record for $item2 in $records[$index + 1] (: here I call the similarity

让我们假设我们有n个记录。我想计算每个记录和所有其他记录之间的相似性。我想做一个相似矩阵。我不熟悉XQuery,但我正在尽最大努力。我附上了一个屏幕截图,显示了一对记录之间的相似性

它是一个csv字符串。我使用以下for循环生成此示例:

for $item1 at $index in /rec:Record 
let $records:= /rec:Record 
for $item2 in $records[$index + 1]

(: here I call the similarity functions :)

return 
(: csv output :)
我需要编辑for循环,在数据集中的每一对记录之间生成一个相似矩阵。怎么做


注意:相似度函数已经准备好了,我的问题是在计算相似度本身时,不是

您可以这样做。我不确定您的csv是什么样子,也不确定解析器如何加载它。我还模拟了一些你已经指出的函数

declare function local:somefn ($listA as xs:integer*, $listB as xs:integer*) xs:string { "6,7,10,3" };

let $data :=
    <csv>
        <row>1,1,1</row>
        <row>2,2,2</row>
        <row>3,3,3</row>
        <row>4,4,4</row>
    </csv>

for $row1 at $pos in $data/row
for $row2 in $data/row[ position() > $pos ]
    let $x := local:somefn($row1, $row2)
    return $x

你可以这样做。我不确定您的csv是什么样子,也不确定解析器如何加载它。我还模拟了一些你已经指出的函数

declare function local:somefn ($listA as xs:integer*, $listB as xs:integer*) xs:string { "6,7,10,3" };

let $data :=
    <csv>
        <row>1,1,1</row>
        <row>2,2,2</row>
        <row>3,3,3</row>
        <row>4,4,4</row>
    </csv>

for $row1 at $pos in $data/row
for $row2 in $data/row[ position() > $pos ]
    let $x := local:somefn($row1, $row2)
    return $x

编辑:将CSV输出作为文本节点添加到结尾:

考虑一下MarkLogic中映射的威力

下面是用ML表示矩阵的示例。我还介绍了两件事:一个函数作为公式的占位符(包括传递原始序列,以防您需要它进行分析),以及一个小函数来显示如何访问地图的地图

xquery version "1.0-ml";

declare function local:csv($matrix){
  let $nl := "&#10;"
  return text{ 
    for $x in map:keys($matrix)
      let $row := map:get($matrix, $x)
      order by xs:int($x)
      return fn:string-join(for $y in map:keys($row)
        order by xs:int($y)
        return xs:string(map:get($row, $y))
      , ",") || $nl 
  }
};

declare function local:my-formula($x, $y, $seq){
let $foo := "do something"
return "your-formula for " || xs:string($x) || " and " || xs:string($y)
};

declare function local:pretty($matrix){
  <matrix>
  {
    for $x in map:keys($matrix)
      order by xs:int($x)
    return <row>
    {
    let $row := map:get($matrix, $x)
     for $y in map:keys($row)
        order by xs:int($y)
            return <cell x="{$x}" y="{$y}">{map:get($row, $y)}</cell>

    }
    </row>


  }
 </matrix> 
};

let $matrix := map:map()
let $numbers := "1,2,3,4,5,5,6,7,8"
let $seq := fn:tokenize($numbers, ",")

let $_ := for $x in $seq
    let $map := map:map()
    let $_ := for $y in $seq
       return  map:put($map, $y, local:my-formula($x, $y, $seq))
    return map:put($matrix, $x, $map)

return local:pretty($matrix)

编辑:将CSV输出作为文本节点添加到结尾:

考虑一下MarkLogic中映射的威力

下面是用ML表示矩阵的示例。我还介绍了两件事:一个函数作为公式的占位符(包括传递原始序列,以防您需要它进行分析),以及一个小函数来显示如何访问地图的地图

xquery version "1.0-ml";

declare function local:csv($matrix){
  let $nl := "&#10;"
  return text{ 
    for $x in map:keys($matrix)
      let $row := map:get($matrix, $x)
      order by xs:int($x)
      return fn:string-join(for $y in map:keys($row)
        order by xs:int($y)
        return xs:string(map:get($row, $y))
      , ",") || $nl 
  }
};

declare function local:my-formula($x, $y, $seq){
let $foo := "do something"
return "your-formula for " || xs:string($x) || " and " || xs:string($y)
};

declare function local:pretty($matrix){
  <matrix>
  {
    for $x in map:keys($matrix)
      order by xs:int($x)
    return <row>
    {
    let $row := map:get($matrix, $x)
     for $y in map:keys($row)
        order by xs:int($y)
            return <cell x="{$x}" y="{$y}">{map:get($row, $y)}</cell>

    }
    </row>


  }
 </matrix> 
};

let $matrix := map:map()
let $numbers := "1,2,3,4,5,5,6,7,8"
let $seq := fn:tokenize($numbers, ",")

let $_ := for $x in $seq
    let $map := map:map()
    let $_ := for $y in $seq
       return  map:put($map, $y, local:my-formula($x, $y, $seq))
    return map:put($matrix, $x, $map)

return local:pretty($matrix)

$records[position()>$index]
怎么样?如果您能提供一个更完整的运行代码示例,并提供接收到的输出和预期的输出,那么会更容易提供帮助。
$records[position()>$index]
如何?如果您能提供一个更完整的运行代码示例,并提供接收到的与预期的输出,那么会更容易提供帮助。.当我运行此代码时,您的查询返回了一个空序列I,并使用我在本地运行的示例进行了更新。我的xquery引擎使用baseX,因此我认为可能存在差异。我还硬编码了可能的CSV解析在xml中的样子。我得到了一个结果。当我运行这个代码时,我得到了
您的查询返回了一个空序列
我用本地运行的一个示例进行了更新。我的xquery引擎使用baseX,因此我认为可能存在差异。我还硬编码了可能的CSV解析在xml中的样子。我得到一个结果。
 your-formula for 1 and 1,your-formula for 1 and 2,your-formula for 1 and 3,your-formula for 1 and 4,your-formula for 1 and 5,your-formula for 1 and 6,your-formula for 1 and 7,your-formula for 1 and 8
 your-formula for 2 and 1,your-formula for 2 and 2,your-formula for 2 and 3,your-formula for 2 and 4,your-formula for 2 and 5,your-formula for 2 and 6,your-formula for 2 and 7,your-formula for 2 and 8
 your-formula for 3 and 1,your-formula for 3 and 2,your-formula for 3 and 3,your-formula for 3 and 4,your-formula for 3 and 5,your-formula for 3 and 6,your-formula for 3 and 7,your-formula for 3 and 8
 your-formula for 4 and 1,your-formula for 4 and 2,your-formula for 4 and 3,your-formula for 4 and 4,your-formula for 4 and 5,your-formula for 4 and 6,your-formula for 4 and 7,your-formula for 4 and 8
 your-formula for 5 and 1,your-formula for 5 and 2,your-formula for 5 and 3,your-formula for 5 and 4,your-formula for 5 and 5,your-formula for 5 and 6,your-formula for 5 and 7,your-formula for 5 and 8
 your-formula for 6 and 1,your-formula for 6 and 2,your-formula for 6 and 3,your-formula for 6 and 4,your-formula for 6 and 5,your-formula for 6 and 6,your-formula for 6 and 7,your-formula for 6 and 8
 your-formula for 7 and 1,your-formula for 7 and 2,your-formula for 7 and 3,your-formula for 7 and 4,your-formula for 7 and 5,your-formula for 7 and 6,your-formula for 7 and 7,your-formula for 7 and 8
 your-formula for 8 and 1,your-formula for 8 and 2,your-formula for 8 and 3,your-formula for 8 and 4,your-formula for 8 and 5,your-formula for 8 and 6,your-formula for 8 and 7,your-formula for 8 and 8