PostgreSQL和MongoDB在条件中的位置
对不起,我的英语不好。我在博士后中有一个表,如:PostgreSQL和MongoDB在条件中的位置,mongodb,postgresql,Mongodb,Postgresql,对不起,我的英语不好。我在博士后中有一个表,如: CREATE TABLE report ( id serial primary key, sensor_id integer, datetime timestamp, ...(many other fields) ); select * from report where sensor_id in (10234, 10236, 10250, 10251, 10253, 10255, 10262, 1026
CREATE TABLE report
(
id serial primary key,
sensor_id integer,
datetime timestamp,
...(many other fields)
);
select *
from report
where sensor_id in (10234, 10236, 10250, 10251, 10253, 10255, 10262, 10263, 10264, 10265, 10267)
order by datetime desc
limit 100;
mongo中的集合存储相同的数据。
Postgres中的索引(我分别创建了所有索引,但结果相同):
Mongo中的索引:
...createIndex({"sensor_id" : 1, "datetime" : -1})
表\集合有超过1亿条记录
所以,我的问题是为什么在postgres中进行查询,比如:
CREATE TABLE report
(
id serial primary key,
sensor_id integer,
datetime timestamp,
...(many other fields)
);
select *
from report
where sensor_id in (10234, 10236, 10250, 10251, 10253, 10255, 10262, 10263, 10264, 10265, 10267)
order by datetime desc
limit 100;
执行数秒(请参见下面的解释),但在mongo中有类似的请求:
db.getCollection('report').find({sensor_id: {$in: [
ObjectId("5f1833aa44be73ead6de88c9"),
ObjectId("5f1833ad44be73ead6de8977"),
ObjectId("5f18a0867c9195aa25ac0bb0"),
ObjectId("5f18a0867c9195aa25ac0bb5"),
ObjectId("5f18a0a47c9195aa25ac1241"),
ObjectId("5f1927b87c9195aa25b5203c"),
ObjectId("5f1a5f617c9195aa25e2d4df"),
ObjectId("5f1a5f647c9195aa25e2db3b"),
ObjectId("5f1a5f647c9195aa25e2db42"),
ObjectId("5f1a5f647c9195aa25e2db59"),
ObjectId("5f1a69027c9195aa25e4f84d")
]}}).sort({datetime:-1}).limit(100)
以毫秒为单位生成结果
如何改进Postgres查询或索引以获得相同的结果?我不相信postgres不能像mongo那样快速完成这个查询。
Mongo解释:
db.getCollection('report').find({sensor_id: {$in: [
ObjectId("5f1833aa44be73ead6de88c9"),
ObjectId("5f1833ad44be73ead6de8977"),
ObjectId("5f18a0867c9195aa25ac0bb0"),
ObjectId("5f18a0867c9195aa25ac0bb5"),
ObjectId("5f18a0a47c9195aa25ac1241"),
ObjectId("5f1927b87c9195aa25b5203c"),
ObjectId("5f1a5f617c9195aa25e2d4df"),
ObjectId("5f1a5f647c9195aa25e2db3b"),
ObjectId("5f1a5f647c9195aa25e2db42"),
ObjectId("5f1a5f647c9195aa25e2db59"),
ObjectId("5f1a69027c9195aa25e4f84d")
]}}).sort({datetime:-1}).limit(100).explain()
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "patrol.report",
"indexFilterSet" : false,
"parsedQuery" : {
"sensor_id" : {
"$in" : [
ObjectId("5f1833aa44be73ead6de88c9"),
ObjectId("5f1833ad44be73ead6de8977"),
ObjectId("5f18a0867c9195aa25ac0bb0"),
ObjectId("5f18a0867c9195aa25ac0bb5"),
ObjectId("5f18a0a47c9195aa25ac1241"),
ObjectId("5f1927b87c9195aa25b5203c"),
ObjectId("5f1a5f617c9195aa25e2d4df"),
ObjectId("5f1a5f647c9195aa25e2db3b"),
ObjectId("5f1a5f647c9195aa25e2db42"),
ObjectId("5f1a5f647c9195aa25e2db59"),
ObjectId("5f1a69027c9195aa25e4f84d")
]
}
},
"queryHash" : "C4968B0D",
"planCacheKey" : "58DC1ACB",
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "SORT_MERGE",
"sortPattern" : {
"datetime" : -1.0
},
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1833aa44be73ead6de88c9'), ObjectId('5f1833aa44be73ead6de88c9')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1833ad44be73ead6de8977'), ObjectId('5f1833ad44be73ead6de8977')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f18a0867c9195aa25ac0bb0'), ObjectId('5f18a0867c9195aa25ac0bb0')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f18a0867c9195aa25ac0bb5'), ObjectId('5f18a0867c9195aa25ac0bb5')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f18a0a47c9195aa25ac1241'), ObjectId('5f18a0a47c9195aa25ac1241')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1927b87c9195aa25b5203c'), ObjectId('5f1927b87c9195aa25b5203c')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a5f617c9195aa25e2d4df'), ObjectId('5f1a5f617c9195aa25e2d4df')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a5f647c9195aa25e2db3b'), ObjectId('5f1a5f647c9195aa25e2db3b')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a5f647c9195aa25e2db42'), ObjectId('5f1a5f647c9195aa25e2db42')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a5f647c9195aa25e2db59'), ObjectId('5f1a5f647c9195aa25e2db59')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : 1,
"datetime" : -1
},
"indexName" : "sensor_datetime",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : [],
"datetime" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a69027c9195aa25e4f84d'), ObjectId('5f1a69027c9195aa25e4f84d')]"
],
"datetime" : [
"[MaxKey, MinKey]"
]
}
}
]
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"datetime" : -1.0
},
"limitAmount" : 100,
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"sensor_id" : -1
},
"indexName" : "sensor_id",
"isMultiKey" : false,
"multiKeyPaths" : {
"sensor_id" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"sensor_id" : [
"[ObjectId('5f1a69027c9195aa25e4f84d'), ObjectId('5f1a69027c9195aa25e4f84d')]",
"[ObjectId('5f1a5f647c9195aa25e2db59'), ObjectId('5f1a5f647c9195aa25e2db59')]",
"[ObjectId('5f1a5f647c9195aa25e2db42'), ObjectId('5f1a5f647c9195aa25e2db42')]",
"[ObjectId('5f1a5f647c9195aa25e2db3b'), ObjectId('5f1a5f647c9195aa25e2db3b')]",
"[ObjectId('5f1a5f617c9195aa25e2d4df'), ObjectId('5f1a5f617c9195aa25e2d4df')]",
"[ObjectId('5f1927b87c9195aa25b5203c'), ObjectId('5f1927b87c9195aa25b5203c')]",
"[ObjectId('5f18a0a47c9195aa25ac1241'), ObjectId('5f18a0a47c9195aa25ac1241')]",
"[ObjectId('5f18a0867c9195aa25ac0bb5'), ObjectId('5f18a0867c9195aa25ac0bb5')]",
"[ObjectId('5f18a0867c9195aa25ac0bb0'), ObjectId('5f18a0867c9195aa25ac0bb0')]",
"[ObjectId('5f1833ad44be73ead6de8977'), ObjectId('5f1833ad44be73ead6de8977')]",
"[ObjectId('5f1833aa44be73ead6de88c9'), ObjectId('5f1833aa44be73ead6de88c9')]"
]
}
}
}
}
}
]
},
"serverInfo" : {
"host" : "mongo",
"port" : 27017,
"version" : "4.2.12",
"gitVersion" : "5593fd8e33b60c75802edab304e23998fa0ce8a5"
},
"ok" : 1.0
}
博士后解释:
explain (analyze, buffers)
select *
from report
where sensor_id in (10234, 10236, 10250, 10251, 10253, 10255, 10262, 10263, 10264, 10265, 10267)
order by datetime desc
limit 100;
Limit (cost=0.57..3984.27 rows=100 width=312) (actual time=147481.199..154733.025 rows=100 loops=1)
Buffers: shared hit=16330933 read=631496
-> Index Scan using report_datetime on report (cost=0.57..69625427.65 rows=1747758 width=312) (actual time=147481.195..154732.770 rows=100 loops=1)
Filter: (sensor_id = ANY ('{10234,10236,10250,10251,10253,10255,10262,10263,10264,10265,10267}'::integer[]))
Rows Removed by Filter: 17276154
Buffers: shared hit=16330933 read=631496
Planning:
Buffers: shared hit=128 read=3
Planning Time: 14.662 ms
Execution Time: 154733.156 ms
补充问题。当我使用相同的参数运行Postgres查询时,它执行得很快,但任何添加\删除\替换传感器\ id都会再次变慢。Mongo对任何参数都执行快速响应。例如,第二次Postgres查询(但也太慢):
Postgres配置(Mongo有默认配置):
我认为关键的区别在于检查的行数 Mongod使用索引
{“sensor_id”:1,“datetime”:-1}
对$in
数组中的每个不同值进行单独扫描,由于限制,它可以在找到100个匹配项后停止,总共1100个键(您可以在mongo中运行explain来验证该数字)。这11个列表中的每一个都将按照日期时间进行排序,因此它会将结果合并在一起,并返回前100名
Postgres看起来像是在使用on report上的索引(datetime desc,sensor_id),我相信这意味着它正在以datetime
顺序扫描所有值,并选择那些具有与其中一个输入值匹配的sensor_id
的值。解释显示它检查行=1747758
,这比mongo可能检查的1100行多得多
第二次运行查看相同数量的行,但完成速度要快得多,这表明大部分所需数据已经在缓存中。查询执行时间的差异是因为第一次执行必须从磁盘读取更多8kB的块:比较
共享读取=631496
和共享读取=30359
PostgreSQL决定不对WHERE
条件使用索引,而是使用支持ORDER BY
的索引。请注意,由于中的,因此无法对WHERE
条件和ORDER BY
使用一个索引–这仅适用于使用=
作为比较运算符的WHERE
条件
因此,PostgreSQL必须做出选择,而且它可能做出了错误的选择:因为它的统计数据告诉优化器有许多行满足WHERE
条件,它决定按顺序读取中的行,并丢弃与WHERE
条件不匹配的行,直到找到100个结果行。不幸的是,匹配的行似乎不靠近表的开头,PostgreSQL必须扫描许多行(被筛选器删除的行:17276154
)
要使其对WHERE
条件使用索引扫描,请修改ORDER BY
子句,使PostgreSQL无法对其使用索引:
ORDER BY datetime + INTERVAL '0 seconds' DESC
因为这里没有多列索引,所以最好的索引是
CREATE INDEX ON report (sensor_id);
好的。数据已加载
对于查询“WHERE sensor_id=constant ORDER BY datetime DESC”,postgres可以选择最佳计划,即扫描(sensor_id,datetime DESC)上的索引,该索引以正确的顺序给出行,这意味着可以获取100行,然后由于LIMIT子句而停止,并且完成了,它不会获取更多行
但是对于查询“WHERE sensor_id IN(几个值)ORDER BY datetime DESC”,它不知道如何执行该操作,因此它将获取所需sensor_id的所有行,然后对它们进行排序,并应用限制。这意味着它将获取比需要多得多的行,这意味着大量的IO,再加上排序需要时间
因此,诀窍是通过向它提供一个可以应用它的查询,使它使用第一个(快速)计划。但此查询只能处理传感器id的一个值,因此让我们将其固定在一个循环中以处理列表中的所有值:
explain -- (analyze, buffers)
select *
from (VALUES (87),(116),(71),(68),(51),(70),(52),(80),(132),(84),(92),(101)) sids
CROSS JOIN LATERAL (SELECT * FROM report WHERE sensor_id=sids.column1 ORDER BY datetime DESC LIMIT 100) o
ORDER BY datetime DESC limit 100;
横向联接类型使联接子查询依赖于前面的表,这些表在这里只是值,但可以是任何值。这意味着将对来自值的每一行执行子查询,这正是这里所需要的。对于每个传感器id,它将按datetime desc获取100个最新条目
注意,限制必须复制到此依赖子查询中。博士后们还不会自己做这件事。但是我们知道,如果整个结果有“ORDER BY x LIMIT 100”,那么每个子查询也可以是“ORDER BY x LIMIT 100”,并且不会缺少行
因此,这给出了以下计划,这是非常快的:
Limit (cost=3842.00..3842.25 rows=100 width=81) (actual time=16.205..16.241 rows=100 loops=1)
Buffers: shared hit=163 read=1069
-> Sort (cost=3842.00..3845.00 rows=1200 width=81) (actual time=16.204..16.224 rows=100 loops=1)
Sort Key: report.datetime DESC
Sort Method: top-N heapsort Memory: 53kB
Buffers: shared hit=163 read=1069
-> Nested Loop (cost=0.57..3796.14 rows=1200 width=81) (actual time=0.059..15.051 rows=1200 loops=1)
Buffers: shared hit=163 read=1069
-> Values Scan on "*VALUES*" (cost=0.00..0.15 rows=12 width=4) (actual time=0.001..0.028 rows=12 loops=1)
-> Limit (cost=0.57..314.33 rows=100 width=77) (actual time=0.031..1.184 rows=100 loops=12)
Buffers: shared hit=163 read=1069
-> Index Scan using report_datetime_1 on report (cost=0.57..2575446.78 rows=820820 width=77) (actual time=0.029..1.151 rows=100 loops=12)
Index Cond: (sensor_id = "*VALUES*".column1)
Buffers: shared hit=163 read=1069
Planning Time: 0.238 ms
Execution Time: 16.321 ms
第一次执行查询时速度较慢(下一次需要3毫秒),因为缓存不是热的,所以它必须执行一些IO来获取堆页:“Buffers:shared hit=163 read=1069”--重要的是它必须读取1069个缓冲区,所以是1069个随机IO。在SSD上也可以,但在随机访问时iops为100的硬盘上,需要10秒钟
这有点浪费,因为它会扔掉大部分行。每个子查询返回100行,我为sensor_id设置了12个值,因此将获取1200行,但在最终排序之后只保留100行,这意味着1100个堆获取IO被浪费。最好只进行索引扫描
我认为(sensor_id,timestamp)或多或少是唯一的,所以让我们取而代之的是取回它,然后只在应用了最终限制之后取回整行
WITH p AS (
SELECT sensor_id, datetime FROM (
VALUES (87),(116),(71),(68),(51),(70),(52),(80),(132),(84),(92),(101)) sids
CROSS JOIN LATERAL (
SELECT sensor_id,datetime FROM report WHERE sensor_id=sids.column1 ORDER BY datetime DESC LIMIT 100) o
ORDER BY datetime DESC limit 100)
SELECT r.* FROM p JOIN report r USING (sensor_id,datetime)
ORDER BY datetime DESC limit 100;
这会在子查询中执行仅索引扫描,从而显著减少IO量。如果您以一种相当扭曲的方式编写查询,则可以在PostgreSQL中获得合并追加。如果你能写信就好了
WITH p AS (
SELECT sensor_id, datetime FROM (
VALUES (87),(116),(71),(68),(51),(70),(52),(80),(132),(84),(92),(101)) sids
CROSS JOIN LATERAL (
SELECT sensor_id,datetime FROM report WHERE sensor_id=sids.column1 ORDER BY datetime DESC LIMIT 100) o
ORDER BY datetime DESC limit 100)
SELECT r.* FROM p JOIN report r USING (sensor_id,datetime)
ORDER BY datetime DESC limit 100;
(select * from report where sensor_id=10234 order by datetime desc limit 100)
UNION ALL
(select * from report where sensor_id=10236 order by datetime desc limit 100)
UNION ALL
(select * from report where sensor_id=10250 order by datetime desc limit 100)
UNION ALL
(select * from report where sensor_id=10251 order by datetime desc limit 100)
/* ... */
order by datetime desc limit 100