基于复合文本搜索和时间戳mongodb的查询文本搜索索引
我有一个收藏,总共有6500万条这样的记录基于复合文本搜索和时间戳mongodb的查询文本搜索索引,mongodb,mongodb-query,full-text-search,query-optimization,compound-index,Mongodb,Mongodb Query,Full Text Search,Query Optimization,Compound Index,我有一个收藏,总共有6500万条这样的记录 { "_id" : ObjectId("5e0b814660da38d499ecf178"), "brands" : null, "client_id" : null, "code_co_owner" : ",7359562, ", "code_segment" : "7359562", "core" : "", "created" : "01-01-2020", "created
{
"_id" : ObjectId("5e0b814660da38d499ecf178"),
"brands" : null,
"client_id" : null,
"code_co_owner" : ",7359562, ",
"code_segment" : "7359562",
"core" : "",
"created" : "01-01-2020",
"created_full" : "01-01-2020 00:00:27",
"created_int" : NumberLong(1577811627),
"email" : ",phamthanhlam17_gmail_com, "
.....
}
我在(email,created_int):{“email”:text,created_int:-1}上做了一个复合索引,用于搜索和筛选created_int范围内的名称
但我发现它的搜索性能很差
我尝试在查询中使用解释:
db.getCollection('profile_20201').explain().find({"$text":{"$search":"phamthanhlam17_gmail_com"},
"created_int":{"$lte":1585627013, "$gte":1583035013}}).count()
结果是:
{
"queryPlanner" : {
"plannerVersion" : 1,
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_int" : {
"$lte" : 1585627013.0
}
},
{
"created_int" : {
"$gte" : 1583035013.0
}
},
{
"$text" : {
"$search" : "phamthanhlam17_gmail_com",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {},
"indexName" : "email_text_created_int_-1",
"parsedTextQuery" : {
"terms" : [
"phamthanhlam17_gmail_com"
],
"negatedTerms" : [],
"phrases" : [],
"negatedPhrases" : []
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"filter" : {
"$and" : [
{
"created_int" : {
"$lte" : 1585627013.0
}
},
{
"created_int" : {
"$gte" : 1583035013.0
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1,
"created_int" : -1.0
},
"indexName" : "email_text_created_int_-1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {}
}
}
}
}
}
},
"rejectedPlans" : []
},
"serverInfo" : {
},
"ok" : 1.0
}
它是:
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "namespace",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"created_int" : {
"$lte" : 1585627013.0
}
},
{
"created_int" : {
"$gte" : 1583035013.0
}
},
{
"$text" : {
"$search" : "phamthanhlam17_gmail_com",
"$language" : "english",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
}
]
},
"winningPlan" : {
"stage" : "COUNT",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {},
"indexName" : "email_text_created_int_-1",
"parsedTextQuery" : {
"terms" : [
"phamthanhlam17_gmail_com"
],
"negatedTerms" : [],
"phrases" : [],
"negatedPhrases" : []
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "OR",
"filter" : {
"$and" : [
{
"created_int" : {
"$lte" : 1585627013.0
}
},
{
"created_int" : {
"$gte" : 1583035013.0
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1,
"created_int" : -1.0
},
"indexName" : "email_text_created_int_-1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {}
}
}
}
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 1499057,
"totalKeysExamined" : 72544123,
"totalDocsExamined" : 39448083,
"executionStages" : {
"stage" : "COUNT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 1483861,
"works" : 72544124,
"advanced" : 0,
"needTime" : 72544123,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"nCounted" : 39448083,
"nSkipped" : 0,
"inputStage" : {
"stage" : "TEXT",
"nReturned" : 39448083,
"executionTimeMillisEstimate" : 1475831,
"works" : 72544124,
"advanced" : 39448083,
"needTime" : 33096040,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"indexPrefix" : {},
"indexName" : "email_text_created_int_-1",
"parsedTextQuery" : {
"terms" : [
"phamthanhlam17_gmail_com"
],
"negatedTerms" : [],
"phrases" : [],
"negatedPhrases" : []
},
"textIndexVersion" : 3,
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : 39448083,
"executionTimeMillisEstimate" : 1473041,
"works" : 72544124,
"advanced" : 39448083,
"needTime" : 33096040,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"docsRejected" : 0,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 39448083,
"executionTimeMillisEstimate" : 1465951,
"works" : 72544124,
"advanced" : 39448083,
"needTime" : 33096040,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 39448083,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "OR",
"filter" : {
"$and" : [
{
"created_int" : {
"$lte" : 1585627013.0
}
},
{
"created_int" : {
"$gte" : 1583035013.0
}
}
]
},
"nReturned" : 39448083,
"executionTimeMillisEstimate" : 439664,
"works" : 72544124,
"advanced" : 39448083,
"needTime" : 33096040,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"dupsTested" : 72544123,
"dupsDropped" : 0,
"recordIdsForgotten" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 72544123,
"executionTimeMillisEstimate" : 291188,
"works" : 72544124,
"advanced" : 72544123,
"needTime" : 0,
"needYield" : 0,
"saveState" : 578233,
"restoreState" : 578233,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"_fts" : "text",
"_ftsx" : 1,
"created_int" : -1.0
},
"indexName" : "email_text_created_int_-1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {},
"keysExamined" : 72544123,
"seeks" : 1,
"dupsTested" : 72544123,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
}
}
},
"serverInfo" : {
},
"ok" : 1.0
}```
So, is the index is cover the query?
Or which index will give me better performance for this problem?
Thank you.
好吧,看起来你已经用
文本创建了复合索引。但在官方的MongoDB中,它说:
复合索引可以包括文本索引键以及升序/降序索引键。但是,这些复合索引有以下限制:
- 复合文本索引不能包括任何其他特殊索引类型,例如多键索引或地理空间索引字段
- 如果复合文本索引包含文本索引键前面的键,则要执行$text搜索,查询谓词必须包含前面键上的相等匹配条件(您在这里使用的是范围查询)
- 创建复合文本索引时,必须在索引规范文档中相邻列出所有文本索引键
所以,这是第一个问题
接下来,我想让您看看,它将帮助您理解如何在查询中使用复合索引
希望这有助于您理解问题:)请使用explain(“executionStats”)
添加查询输出。抱歉,执行统计数据太长,无法添加到评论中,因此我已打开此链接:谢谢。您可以更新您的问题。谢谢,我刚刚更新了问题。谢谢您的回答,所以,如果我必须在一个时间戳范围内搜索文本索引进行查询,文本索引将不支持此目的?我想问一个问题,关于这类指数,还有其他解决办法吗?