elasticsearch 使用自定义_分数按嵌套子级的时间戳排序
我对elasticsearch是个新手,一直在绞尽脑汁试图让这个分类工作正常进行。一般的想法是使用嵌套消息和嵌套参与者搜索电子邮件消息线程。目标是在线程级别显示搜索结果,根据执行搜索的参与者和“上一次收到”或“上一次发送”列进行排序,具体取决于它们所在的邮箱 我的理解是,不能在许多嵌套的子对象中按单个子对象的值进行排序。因此,为了做到这一点,我看到了一些关于使用脚本自定义_分数,然后根据分数排序的建议。我的计划是动态更改排序列,然后运行嵌套的自定义评分查询,该查询将其中一个参与者的日期作为评分返回。我注意到两种分数格式都有一些奇怪的问题,例如,结尾总是有4个零,可能没有返回我期望的日期 下面是有关索引和查询的简化版本。如果有人有任何建议,我将不胜感激。仅供参考-我正在使用elasticsearch版本0.20.6 索引:
elasticsearch 使用自定义_分数按嵌套子级的时间戳排序,
elasticsearch,
elasticsearch,我对elasticsearch是个新手,一直在绞尽脑汁试图让这个分类工作正常进行。一般的想法是使用嵌套消息和嵌套参与者搜索电子邮件消息线程。目标是在线程级别显示搜索结果,根据执行搜索的参与者和“上一次收到”或“上一次发送”列进行排序,具体取决于它们所在的邮箱 我的理解是,不能在许多嵌套的子对象中按单个子对象的值进行排序。因此,为了做到这一点,我看到了一些关于使用脚本自定义_分数,然后根据分数排序的建议。我的计划是动态更改排序列,然后运行嵌套的自定义评分查询,该查询将其中一个参与者的日期作为评分返
mappings: {
message_thread: {
properties: {
id: {
type: long
}
subject: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
}
}
participants: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
last_sent_at: {
format: dateOptionalTime
type: date
}
last_received_at: {
format: dateOptionalTime
type: date
}
}
}
messages: {
dynamic: true
properties: {
sender: {
dynamic: true
properties: {
id: {
type: long
}
}
}
id: {
type: long
}
body: {
type: string
}
created_at: {
format: dateOptionalTime
type: date
}
recipient: {
dynamic: true
properties: {
id: {
type: long
}
}
}
}
}
version: {
type: long
}
}
}
}
查询:
{
"query": {
"bool": {
"must": [
{
"term": { "participants.id": 3785 }
},
{
"custom_score": {
"query": {
"filtered": {
"query": { "match_all": {} },
"filter": {
"term": { "participants.id": 3785 }
}
}
},
"params": { "sort_column": "participants.last_received_at" },
"script": "doc[sort_column].value"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"term": { "messages.recipient.id": 3785 }
}
]
}
},
"sort": [ "_score" ]
}
解决方案:
感谢@imotov,这里是最终结果。参与者未正确嵌套在索引中,而消息不需要嵌套。此外,参与者使用include_In_root来简化查询,参与者是小记录,不是真正的大小问题,尽管@imotov也提供了一个没有它的示例。然后,他重新构造了JSON请求,以使用dis_max查询
curl -XDELETE "localhost:9200/test-idx"
curl -XPUT "localhost:9200/test-idx" -d '{
"mappings": {
"message_thread": {
"properties": {
"id": {
"type": "long"
},
"messages": {
"properties": {
"body": {
"type": "string",
"analyzer": "standard"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"id": {
"type": "long"
},
"recipient": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
},
"sender": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
}
}
},
"messages_count": {
"type": "long"
},
"participants": {
"type": "nested",
"include_in_root": true,
"properties": {
"id": {
"type": "long"
},
"last_received_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"last_sent_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"name": {
"type": "string",
"analyzer": "standard"
}
}
},
"subject": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string"
}
}
}
}
}
}
}'
curl -XPUT "localhost:9200/test-idx/message_thread/1" -d '{
"id" : 1,
"subject" : {"name": "Test Thread"},
"participants" : [
{"id" : 87793, "name" : "John Smith", "last_received_at" : null, "last_sent_at" : "2010-10-27T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-27T17:26:58Z", "last_sent_at" : null}
],
"messages" : [{
"id" : 1,
"body" : "This is a test.",
"sender" : { "id" : 87793 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-27T17:26:58Z"
}]
}'
curl -XPUT "localhost:9200/test-idx/message_thread/2" -d '{
"id" : 2,
"subject" : {"name": "Elastic"},
"participants" : [
{"id" : 57834, "name" : "Paul Johnson", "last_received_at" : "2010-11-25T17:26:58Z", "last_sent_at" : "2010-10-25T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-25T17:26:58Z", "last_sent_at" : "2010-11-25T17:26:58Z"}
],
"messages" : [{
"id" : 2,
"body" : "More testing of elasticsearch.",
"sender" : { "id" : 57834 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-25T17:26:58Z"
},{
"id" : 3,
"body" : "Reply message.",
"sender" : { "id" : 3785 },
"recipient" : { "id" : 57834},
"created_at" : "2010-11-25T17:26:58Z"
}]
}'
curl -XPOST localhost:9200/test-idx/_refresh
echo
# Using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"multi_match": {
"query": "test",
"fields": ["subject.name", "participants.name", "messages.body"],
"operator": "and",
"use_dis_max": true
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'
# Not using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"bool": {
"should": [{
"match": {
"subject.name":"test"
}
}, {
"nested" : {
"path": "participants",
"query": {
"match": {
"name":"test"
}
}
}
}, {
"match": {
"messages.body":"test"
}
}
]
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'
这里有几个问题。您询问的是嵌套对象,但参与者在映射中未定义为嵌套对象。第二个可能的问题是score具有float类型,因此它可能没有足够的精度来表示时间戳。如果您能够找出如何将此值适配到float中,可以查看以下示例:。但是,如果您正在开发一个新系统,可能需要谨慎地升级到0.90.0.Beta1,它支持对嵌套字段进行排序。感谢您的反馈。我已经更新了上面的映射和查询,因为您正确地注意到了缺少的嵌套对象。不过,如果你有其他建议的话,我还是很坚持。一旦我让查询工作起来,我就会处理浮点问题。我会尝试这样做。我无法测试它,因为您的映射看起来不像真正的JSON,所以我无法将其应用于我的索引。谢谢您的帮助。我又开始试着让它工作了。我把你的要点转了一圈,现在似乎一点结果也没有。这正是我在正确的JSON中所拥有的映射。。。索引:查询:您能添加一些测试数据吗?您的测试数据格式不正确,与映射不匹配。解决这两个问题后,一切似乎都正常: