如何在sqlite中使用where子句在表上创建部分索引?

如何在sqlite中使用where子句在表上创建部分索引?,sqlite,group-by,distinct,rdbms,Sqlite,Group By,Distinct,Rdbms,我有一张200万行的桌子。我已经在(唯一列的表达式)上创建了索引。该表达式将列的值映射到较小的数字空间,这意味着该表达式的结果不是唯一的。现在我想在(该表达式的DISTINCT)上创建索引。我希望再次在表达式上创建索引的原因是DISTINCT和GROUP BY查询运行缓慢(~900ms) 我需要的是这个 CREATE INDEX idx ON mytable(DISTINCT myColumn|9223372036854775806); //invalid 我已经知道的是 CREA

我有一张200万行的桌子。我已经在(唯一列的表达式)上创建了索引。该表达式将列的值映射到较小的数字空间,这意味着该表达式的结果不是唯一的。现在我想在(
该表达式的DISTINCT
)上创建索引。我希望再次在表达式上创建索引的原因是DISTINCT和GROUP BY查询运行缓慢(~900ms)

我需要的是这个

CREATE INDEX idx ON mytable(DISTINCT myColumn|9223372036854775806);       //invalid
我已经知道的是

CREATE INDEX idx ON mytable(myColumn|9223372036854775806);

我的最终目标是使下面的查询更快(DISTINCT只能用作SELECT的一部分,即删除重复的行

DISTINCT的补码是ALL,这是默认值,因此您很少看到ALL

索引必须包含它所覆盖的所有行的一个条目,因此DISTINCT将违反索引的目的,因为所覆盖的某些行将被忽略

在mytable(myColumn | 9223372036854775806)
上创建索引idx所做的是将索引拆分为两个主要组/分支,因为
9223372036854775806
会导致
9223372036854775806
9223372036854775807
,因为这样的不同会导致两行(通常)例如使用

SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable;
结果(根据下面的测试代码,从填充有100万行随机值的表中):-

  • i、 e 2行,与行数无关
    • (除非数字非常低,并且当只有一行时,
      myColumn | 9223372036854775806
      的所有值都是相同的结果)
部分索引 部分索引是使用WHERE子句减少索引行的索引,如果查询满足两个规则之一,则选择该索引

从理论上讲,您可以使用(只需固定两行):-

  • 但是,如果部分索引中的子句
因此,您似乎希望确定创建索引的方法,例如,您可以使用以下方法将索引拆分为10个部分索引:-

CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10;
CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20;
CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30;
...... and so on
CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90;
在mytable(mycolumn)上创建索引idx10,其中mycolumn<9223372036854775806/10;
在mytable(mycolumn)上创建索引idx20,其中mycolumn>=9223372036854775806/10和mycolumn<9223372036854775806/20;
在mytable(mycolumn)上创建索引idx30,其中mycolumn>=9223372036854775806/20和mycolumn<9223372036854775806/30;
等等
在mytable(mycolumn)上创建索引idx100,其中mycolumn>=9223372036854775806/90;
在这种情况下:-

EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;
解释查询计划
从mytable中选择mycolumn,其中mycolumn<9223372036854775806/10 LIMIT 100;
结果:-

  • i、 eidx10已由查询计划器选择,因为它已确定SELECT的WHERE子句适合使用索引ix10

  • 注意以上只是部分索引的一个示例,它可能不适合您的结果。

测试代码 以上部分内容是通过使用以下代码进行测试生成的,您可能会发现这些代码很有用:-

DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
CREATE TABLE IF NOT EXISTS mytable (id INTEGER PRIMARY KEY, mycolumn INTEGER DEFAULT (CAST(abs(random()) AS INTEGER)));
CREATE INDEX idx1 ON mytable(myColumn|9223372036854775806);
-- CREATE INDEX idx2 ON mytable(DISTINCT myColumn|9223372036854775806); CANNOT BE USED AS DISTINCT is syntax error.
-- CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn =  (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10;
CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20;
CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30;
CREATE INDEX idx40 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 30 AND mycolumn < 9223372036854775806 / 40;
CREATE INDEX idx50 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 40 AND mycolumn < 9223372036854775806 / 50;
CREATE INDEX idx60 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 50 AND mycolumn < 9223372036854775806 / 60;
CREATE INDEX idx70 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 60 AND mycolumn < 9223372036854775806 / 70;
CREATE INDEX idx80 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 70 AND mycolumn < 9223372036854775806 / 80;
CREATE INDEX idx90 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 80 AND mycolumn < 9223372036854775806 / 90;
CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90;
/* Load some data */
WITH RECURSIVE cte(x,y) AS (
    SELECT 1,CAST(abs(random()) AS INTEGER)
    UNION ALL SELECT x+1,CAST(abs(random()) AS INTEGER) FROM cte LIMIT 1000000
)
INSERT INTO mytable (mycolumn) SELECT y FROM cte;

/* Show some sample data */
SELECT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;

/* LOOK AT WHAT THE QUERY PLANNER COMES UP WITH */
EXPLAIN QUERY PLAN
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable;
EXPLAIN QUERY PLAN
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;


SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;

EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;

SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;

/* CLEAN UP */
DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
如果mytable存在,则删除表;
如果存在idx1,则删除索引;
如果存在idx2,则删除索引;
如果存在,则删除索引idx10;
如果存在idx20,则删除索引;
如果存在idx30,则删除索引;
如果存在idx40,则删除索引;
如果存在idx50,则删除索引;
如果存在idx60,则删除索引;
如果存在idx70,则删除索引;
如果存在idx80,则删除索引;
如果存在idx90,则删除索引;
如果存在,则删除索引idx100;
如果不存在,则创建表mytable(id INTEGER主键,mycolumn INTEGER默认值(CAST(abs(random())AS INTEGER));
在mytable(myColumn | 9223372036854775806)上创建索引idx1;
--在mytable上创建索引idx2(DISTINCT myColumn | 9223372036854775806);不能使用,因为DISTINCT is语法错误。
--在mytable(myColumn)上创建索引idx3,其中myColumn=(从mytable中选择不同的myColumn | 9223372036854775806);
在mytable(mycolumn)上创建索引idx10,其中mycolumn<9223372036854775806/10;
在mytable(mycolumn)上创建索引idx20,其中mycolumn>=9223372036854775806/10和mycolumn<9223372036854775806/20;
在mytable(mycolumn)上创建索引idx30,其中mycolumn>=9223372036854775806/20和mycolumn<9223372036854775806/30;
在mytable(mycolumn)上创建索引idx40,其中mycolumn>=9223372036854775806/30和mycolumn<9223372036854775806/40;
在mytable(mycolumn)上创建索引idx50,其中mycolumn>=9223372036854775806/40和mycolumn<9223372036854775806/50;
在mytable(mycolumn)上创建索引idx60,其中mycolumn>=9223372036854775806/50和mycolumn<9223372036854775806/60;
在mytable(mycolumn)上创建索引idx70,其中mycolumn>=9223372036854775806/60和mycolumn<9223372036854775806/70;
在mytable(mycolumn)上创建索引idx80,其中mycolumn>=9223372036854775806/70和mycolumn<9223372036854775806/80;
在mytable(mycolumn)上创建索引idx90,其中mycolumn>=9223372036854775806/80和mycolumn<9223372036854775806/90;
在mytable(mycolumn)上创建索引idx100,其中mycolumn>=9223372036854775806/90;
/*加载一些数据*/
递归cte(x,y)为(
选择1,强制转换(abs(random())为整数)
UNION ALL选择x+1,从cte LIMIT 1000000中强制转换(abs(random())为整数)
)
插入mytable(mycolumn)中,从cte中选择y;
/*显示一些示例数据*/
从mytable LIMIT 100中选择mycolumn,mycolumn | 9223372036854775806作为x;
/*看看查询计划器得出的结果*/
解释查询计划
从mytable中选择不同的mycolumn,mycolumn | 9223372036854775806作为x;
解释查询计划
按mycolumn | 9223372036854775806从mytable组中选择/*(从mytable中选择count()作为行,*/*,mycolumn | 9223372036854775806作为x;
从mytable LIMIT 100中选择不同的mycolumn,mycolumn | 9223372036854775806作为x;
选择/*(从mytable中选择count())作为行,*/*,mycolumn | 922
EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;
DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
CREATE TABLE IF NOT EXISTS mytable (id INTEGER PRIMARY KEY, mycolumn INTEGER DEFAULT (CAST(abs(random()) AS INTEGER)));
CREATE INDEX idx1 ON mytable(myColumn|9223372036854775806);
-- CREATE INDEX idx2 ON mytable(DISTINCT myColumn|9223372036854775806); CANNOT BE USED AS DISTINCT is syntax error.
-- CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn =  (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10;
CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20;
CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30;
CREATE INDEX idx40 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 30 AND mycolumn < 9223372036854775806 / 40;
CREATE INDEX idx50 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 40 AND mycolumn < 9223372036854775806 / 50;
CREATE INDEX idx60 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 50 AND mycolumn < 9223372036854775806 / 60;
CREATE INDEX idx70 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 60 AND mycolumn < 9223372036854775806 / 70;
CREATE INDEX idx80 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 70 AND mycolumn < 9223372036854775806 / 80;
CREATE INDEX idx90 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 80 AND mycolumn < 9223372036854775806 / 90;
CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90;
/* Load some data */
WITH RECURSIVE cte(x,y) AS (
    SELECT 1,CAST(abs(random()) AS INTEGER)
    UNION ALL SELECT x+1,CAST(abs(random()) AS INTEGER) FROM cte LIMIT 1000000
)
INSERT INTO mytable (mycolumn) SELECT y FROM cte;

/* Show some sample data */
SELECT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;

/* LOOK AT WHAT THE QUERY PLANNER COMES UP WITH */
EXPLAIN QUERY PLAN
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable;
EXPLAIN QUERY PLAN
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;


SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;

EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;

SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;

/* CLEAN UP */
DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
DROP TABLE IF EXISTS mytable
> OK
> Time: 1.173s


DROP INDEX IF EXISTS idx1
> OK
> Time: 0s


DROP INDEX IF EXISTS idx2
> OK
> Time: 0s


DROP INDEX IF EXISTS idx10
> OK
> Time: 0s


DROP INDEX IF EXISTS idx20
> OK
> Time: 0s


DROP INDEX IF EXISTS idx30
> OK
> Time: 0s


DROP INDEX IF EXISTS idx40
> OK
> Time: 0s


DROP INDEX IF EXISTS idx50
> OK
> Time: 0s


DROP INDEX IF EXISTS idx60
> OK
> Time: 0s


DROP INDEX IF EXISTS idx70
> OK
> Time: 0s


DROP INDEX IF EXISTS idx80
> OK
> Time: 0s


DROP INDEX IF EXISTS idx90
> OK
> Time: 0s


DROP INDEX IF EXISTS idx100
> OK
> Time: 0s


CREATE TABLE IF NOT EXISTS mytable (id INTEGER PRIMARY KEY, mycolumn INTEGER DEFAULT (CAST(abs(random()) AS INTEGER)))
> OK
> Time: 0.056s


CREATE INDEX idx1 ON mytable(myColumn|9223372036854775806)
> OK
> Time: 0.024s


-- CREATE INDEX idx2 ON mytable(DISTINCT myColumn|9223372036854775806); CANNOT BE USED AS DISTINCT is syntax error.
-- CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn =  (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10
> OK
> Time: 0.024s


CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20
> OK
> Time: 0.024s


CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30
> OK
> Time: 0.024s


CREATE INDEX idx40 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 30 AND mycolumn < 9223372036854775806 / 40
> OK
> Time: 0.024s


CREATE INDEX idx50 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 40 AND mycolumn < 9223372036854775806 / 50
> OK
> Time: 0.024s


CREATE INDEX idx60 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 50 AND mycolumn < 9223372036854775806 / 60
> OK
> Time: 0.024s


CREATE INDEX idx70 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 60 AND mycolumn < 9223372036854775806 / 70
> OK
> Time: 0.024s


CREATE INDEX idx80 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 70 AND mycolumn < 9223372036854775806 / 80
> OK
> Time: 0.024s


CREATE INDEX idx90 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 80 AND mycolumn < 9223372036854775806 / 90
> OK
> Time: 0.024s


CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90
> OK
> Time: 0.024s


/* Load some data */
WITH RECURSIVE cte(x,y) AS (
    SELECT 1,CAST(abs(random()) AS INTEGER)
    UNION ALL SELECT x+1,CAST(abs(random()) AS INTEGER) FROM cte LIMIT 1000000
)
INSERT INTO mytable (mycolumn) SELECT y FROM cte
> Affected rows: 1000000
> Time: 14.096s


/* Show some sample data */
SELECT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100
> OK
> Time: 0.001s


/* LOOK AT WHAT THE QUERY PLANNER COMES UP WITH */
EXPLAIN QUERY PLAN
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable
> OK
> Time: 0s


EXPLAIN QUERY PLAN
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806
> OK
> Time: 0s


SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100
> OK
> Time: 0.001s


SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806
> OK
> Time: 0.093s


EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100
> OK
> Time: 0s


SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100
> OK
> Time: 0s


/* CLEAN UP */
DROP TABLE IF EXISTS mytable
> OK
> Time: 0.793s


DROP INDEX IF EXISTS idx1
> OK
> Time: 0s


DROP INDEX IF EXISTS idx2
> OK
> Time: 0s


DROP INDEX IF EXISTS idx10
> OK
> Time: 0s


DROP INDEX IF EXISTS idx20
> OK
> Time: 0s


DROP INDEX IF EXISTS idx30
> OK
> Time: 0s


DROP INDEX IF EXISTS idx40
> OK
> Time: 0s


DROP INDEX IF EXISTS idx50
> OK
> Time: 0s


DROP INDEX IF EXISTS idx60
> OK
> Time: 0s


DROP INDEX IF EXISTS idx70
> OK
> Time: 0s


DROP INDEX IF EXISTS idx80
> OK
> Time: 0s


DROP INDEX IF EXISTS idx90
> OK
> Time: 0s


DROP INDEX IF EXISTS idx100
> OK
> Time: 0s