Mongodb限制、索引和排序顺序
我正在努力了解Mongodb在过滤、排序和限制方面的性能 我收集了3200万个文档,大小为5GB 我有一个疑问:Mongodb限制、索引和排序顺序,mongodb,mongodb-query,Mongodb,Mongodb Query,我正在努力了解Mongodb在过滤、排序和限制方面的性能 我收集了3200万个文档,大小为5GB 我有一个疑问: db.report_meter_device_audit.find({ "$and": [{ "receivedTime": { "$gt": new ISODate("2020-12-31T00:00:00Z") } }, { "receivedTime": { &q
db.report_meter_device_audit.find({
"$and": [{ "receivedTime": { "$gt": new ISODate("2020-12-31T00:00:00Z") } },
{ "receivedTime": { "$lt": new ISODate("2021-05-31T00:00:00Z") } }]
}).sort({ meterid: 1 }).limit(100);
并建立了这个索引:
(meterid:1 , receivedTime:-1)
此查询返回速度非常快,explain.executionStats显示:
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"meterid" : [
"[MinKey, MaxKey]"
],
"receivedTime" : [
"[MaxKey, MinKey]"
]
}
}
}
},
.
.
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 3,
"totalKeysExamined" : 100,
"totalDocsExamined" : 100,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 101,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 1,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 100,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 0,
"docsExamined" : 100,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 100,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 0,
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"meterid" : [
"[MinKey, MaxKey]"
],
"receivedTime" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 100,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"indexDef" : {
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"direction" : "forward"
}
}
}
}
},
但是,如果我颠倒排序顺序({meterid:-1}),查询速度非常慢,我可以看到使用了相同的索引,但是扫描的文档数量非常大:
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"meterid" : [
"[MaxKey, MinKey]"
],
"receivedTime" : [
"[MinKey, MaxKey]"
]
}
}
}
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 185544,
"totalKeysExamined" : 10292501,
"totalDocsExamined" : 10292501,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 19783,
"works" : 10292502,
"advanced" : 100,
"needTime" : 10292401,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 1,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"nReturned" : 100,
"executionTimeMillisEstimate" : 19698,
"works" : 10292501,
"advanced" : 100,
"needTime" : 10292401,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 0,
"docsExamined" : 10292501,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10292501,
"executionTimeMillisEstimate" : 1945,
"works" : 10292501,
"advanced" : 10292501,
"needTime" : 0,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 0,
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"meterid" : [
"[MaxKey, MinKey]"
],
"receivedTime" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 10292501,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
即使我尝试添加这样的索引:
(meterid:-1 , receivedTime:-1)
我可以看出它没有被使用。以前的索引仍在使用
因此,问题是:
不幸的是,我无法给出令人满意的答案,理论上,Mongo可以使用相同的索引进行升序和降序排序,因为它可以双向扫描索引树,这意味着“新”索引有些冗余 我怀疑正在发生的事情是,您的数据并不像您想象的那样“随机”,而且“更大的”
meterid
具有一些带有下划线的行为(使用频率较低,不推荐使用,…)
这意味着查找100个匹配的文档更加困难,mongo被迫扫描更多文档以找到它们
一个好的健全性检查是获取“最大的”meterid,看看我的理论是否正确,他们在该日期范围内没有(或只有很少)数据
如果你能在这种情况下提供更新,我将不胜感激,因为我有兴趣了解更多
至于您可以做些什么来改进此特定查询,请构建一个新索引
(receivedTime:-1,meterid:-1)
或(receivedTime:-1,meterid:1)
,这将(假设我是对的)使为查询检查的数据量更小,并且可能会提高“较慢”查询的性能,您可以使用强制Mongo使用特定索引,这将有助于证实任何可能的解释。什么是meterid
?它是随机生成的还是单调递增的?实际上是给定的米数,所以你可以说它是随机生成的。ThanksI的思路与最后一段相同。由于receivedTime
成为前缀索引,mongodb将能够在给定的检查范围内优化提取文档。但是,出于同样的原因,meterid
上的sort
操作将需要更长的时间才能执行。但是,由于排序操作在较小的数据集上工作,因此总体性能可能会更快。使用反向索引的提示-(meterid:-1,receivedTime:-1)-可以提高降序排序顺序({meterid:-1})的性能。我不喜欢使用提示,更不喜欢依靠计划者,所以这个计划者bahviour仍然不太明白。