Postgresql 未使用Postgres GIN索引,始终完成序列扫描
我已经创建了一个表来进行令牌搜索Postgresql 未使用Postgres GIN索引,始终完成序列扫描,postgresql,indexing,full-text-search,tsvector,Postgresql,Indexing,Full Text Search,Tsvector,我已经创建了一个表来进行令牌搜索 CREATE TABLE tsvector_business_objects ( id int4 NULL, "type" varchar NULL, value varchar NULL, label varchar NULL, synonyms _text NULL, label_tsvector tsvector NULL ); CREATE INDEX label_tsvector_id
CREATE TABLE tsvector_business_objects (
id int4 NULL,
"type" varchar NULL,
value varchar NULL,
label varchar NULL,
synonyms _text NULL,
label_tsvector tsvector NULL
);
CREATE INDEX label_tsvector_idx ON tsvector_business_objects USING gin (label_tsvector);
CREATE INDEX lower_case_synonym_tsvector_business_objects ON tsvector_business_objects USING gin (synonyms);
CREATE INDEX txt_label_tsvector_business_objects ON tsvector_business_objects USING gin (to_tsvector('english'::regconfig, lower((label)::text)));
CREATE INDEX txt_value_tsvector_business_objects ON tsvector_business_objects USING gin (to_tsvector('english'::regconfig, lower((value)::text)));
CREATE INDEX type_tsvector_business_objects ON tsvector_business_objects USING btree (type);
CREATE INDEX type_value_label_lower_case_tsvector_business_objects ON tsvector_business_objects USING btree (lower((type)::text), lower((value)::text), lower((label)::text));
我在数据中有一些类似OID-0127820的文本,当我使用ts_vector Postgres搜索托克OID时,总是使用序列扫描,而不是使用GIN索引
有500k条记录具有OID文本。但当我禁用序列扫描时,索引正在使用
SET enable_seqscan = ON;
explain (analyze,verbose,buffers,timing,costs)
SELECT TYPE, value, label, synonyms, ((to_tsvector('english', lower(value)) @@ plainto_tsquery('english', 'OID') AND numnode(plainto_tsquery('english', 'OID')) > 0) OR (lower(value) LIKE '% OID %' AND numnode(plainto_tsquery('english', 'OID')) = 0) OR ((to_tsvector('english', lower(label))) @@ plainto_tsquery('english', 'OID') AND numnode(plainto_tsquery('english', 'OID')) > 0) OR (lower(label) LIKE '% OID %' AND numnode(plainto_tsquery('english', 'OID')) = 0)) as partial_value_label_match, (lower(value) ='OID' OR lower(label) ='OID') as exact_value_label_match, (synonyms @> '{OID}') is true as synonym_match FROM tsvector_business_objects AS business_objects_alias
WHERE synonyms @> '{OID}'
OR (to_tsvector('english', lower(value)) @@ plainto_tsquery('english', 'OID') AND numnode(plainto_tsquery('english', 'OID')) > 0)
OR (lower(value) LIKE '% OID %' AND numnode(plainto_tsquery('english', 'OID')) = 0)
OR ((to_tsvector('english', lower(label))) @@ plainto_tsquery('english', 'OID') AND numnode(plainto_tsquery('english', 'OID')) > 0)
OR (lower(label) LIKE '% OID %' AND numnode(plainto_tsquery('english', 'OID')) = 0) OR (lower(value) ='OID' OR lower(label) ='OID')
limit 30
QUERY PLAN |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
Limit (cost=0.00..41.91 rows=30 width=82) (actual time=3804.998..3805.163 rows=30 loops=1) |
Output: type, value, label, synonyms, (((to_tsvector('english'::regconfig, lower((value)::text)) @@ '''oid'''::tsquery) OR (to_tsvector('english'::regconfig, lower((label)::text)) @@ '''oid'''::tsquery))), (((lower((value)::text) = 'OID'::text) OR (lowe|
Buffers: shared hit=21217 |
-> Seq Scan on mdlz_performancebenchmarking.tsvector_business_objects business_objects_alias (cost=0.00..717278.98 rows=513425 width=82) (actual time=3804.997..3805.159 rows=30 loops=1) |
Output: type, value, label, synonyms, ((to_tsvector('english'::regconfig, lower((value)::text)) @@ '''oid'''::tsquery) OR (to_tsvector('english'::regconfig, lower((label)::text)) @@ '''oid'''::tsquery)), ((lower((value)::text) = 'OID'::text) OR (l|
Filter: ((business_objects_alias.synonyms @> '{OID}'::text[]) OR (to_tsvector('english'::regconfig, lower((business_objects_alias.value)::text)) @@ '''oid'''::tsquery) OR (to_tsvector('english'::regconfig, lower((business_objects_alias.label)::tex|
Rows Removed by Filter: 575042 |
Buffers: shared hit=21217 |
Planning time: 0.226 ms |
Execution time: 3805.210 ms
输出数据
type |value |label |synonyms|label_tsvector |partial_value_label_match|exact_value_label_match|synonym_match|
---------------------|-----------|-----------|--------|--------------------|-------------------------|-----------------------|-------------|
orderid_1621409737948|OID-0127820|OID-0127820|NULL |'-0127820':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355880|OID-0355880|NULL |'-0355880':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0041048|OID-0041048|NULL |'-0041048':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0499716|OID-0499716|NULL |'-0499716':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219268|OID-0219268|NULL |'-0219268':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0000560|OID-0000560|NULL |'-0000560':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355656|OID-0355656|NULL |'-0355656':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355628|OID-0355628|NULL |'-0355628':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219380|OID-0219380|NULL |'-0219380':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0063896|OID-0063896|NULL |'-0063896':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0054740|OID-0054740|NULL |'-0054740':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0447020|OID-0447020|NULL |'-0447020':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0040964|OID-0040964|NULL |'-0040964':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0499744|OID-0499744|NULL |'-0499744':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219800|OID-0219800|NULL |'-0219800':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355180|OID-0355180|NULL |'-0355180':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0530348|OID-0530348|NULL |'-0530348':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219996|OID-0219996|NULL |'-0219996':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0220024|OID-0220024|NULL |'-0220024':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0446936|OID-0446936|NULL |'-0446936':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0220108|OID-0220108|NULL |'-0220108':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0020692|OID-0020692|NULL |'-0020692':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354872|OID-0354872|NULL |'-0354872':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0097496|OID-0097496|NULL |'-0097496':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354648|OID-0354648|NULL |'-0354648':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0128268|OID-0128268|NULL |'-0128268':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354536|OID-0354536|NULL |'-0354536':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0530432|OID-0530432|NULL |'-0530432':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0128324|OID-0128324|NULL |'-0128324':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354256|OID-0354256|NULL |'-0354256':2 'oid':1|true |false |false |
随机页面成本为1.1
我认为Postgres查询计划员认为使用序列扫描会更快,但事实似乎并非如此。问题在于许多条件高度相关,因为它们测试几乎相同的东西。因为优化器不知道这一点,并且将它们视为统计上独立的,所以它得出了错误的估计
您可以尝试简化
WHERE
条件,或者您可以升级到PostgreSQL v14,在那里提交引入了表达式的扩展统计信息。一个基本问题是,计划者认为它将在seq扫描中很早找到30行,因此可以很早停止。这是错误的,显然是因为与您的条件匹配的行很少出现在表的早期部分。仅通过改进选择性估计就很难解决这一问题,因为即使行估计准确无误,但行并不是均匀分布在表中,也会犯此错误。即使只有一个ts_向量where子句或(to_tsvector('english',lower(value))@@plainto_tsquery('english',OID')和numnode(plainto_tsquery('english','OID'))>0)Postgres没有使用和的索引,也不是或。我想我不明白你想说什么。对不起,我的意思是。如果我只是在where子句(to_tsvector('english',lower(value))@@plainto_tsquery('english','OID和numode(plainto_tsquery('english','OID))中添加一个条件>0)仍然不使用indeI see。嗯,第二个条件是多余的。但这应该不是问题。也许OID
在您的文本中是一个非常常见的值?是的,有超过500K条记录使用OID
type |value |label |synonyms|label_tsvector |partial_value_label_match|exact_value_label_match|synonym_match|
---------------------|-----------|-----------|--------|--------------------|-------------------------|-----------------------|-------------|
orderid_1621409737948|OID-0127820|OID-0127820|NULL |'-0127820':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355880|OID-0355880|NULL |'-0355880':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0041048|OID-0041048|NULL |'-0041048':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0499716|OID-0499716|NULL |'-0499716':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219268|OID-0219268|NULL |'-0219268':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0000560|OID-0000560|NULL |'-0000560':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355656|OID-0355656|NULL |'-0355656':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355628|OID-0355628|NULL |'-0355628':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219380|OID-0219380|NULL |'-0219380':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0063896|OID-0063896|NULL |'-0063896':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0054740|OID-0054740|NULL |'-0054740':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0447020|OID-0447020|NULL |'-0447020':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0040964|OID-0040964|NULL |'-0040964':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0499744|OID-0499744|NULL |'-0499744':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219800|OID-0219800|NULL |'-0219800':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0355180|OID-0355180|NULL |'-0355180':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0530348|OID-0530348|NULL |'-0530348':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0219996|OID-0219996|NULL |'-0219996':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0220024|OID-0220024|NULL |'-0220024':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0446936|OID-0446936|NULL |'-0446936':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0220108|OID-0220108|NULL |'-0220108':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0020692|OID-0020692|NULL |'-0020692':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354872|OID-0354872|NULL |'-0354872':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0097496|OID-0097496|NULL |'-0097496':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354648|OID-0354648|NULL |'-0354648':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0128268|OID-0128268|NULL |'-0128268':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354536|OID-0354536|NULL |'-0354536':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0530432|OID-0530432|NULL |'-0530432':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0128324|OID-0128324|NULL |'-0128324':2 'oid':1|true |false |false |
orderid_1621409737948|OID-0354256|OID-0354256|NULL |'-0354256':2 'oid':1|true |false |false |