在不合并列的情况下高效查找聚合行的详细信息(MySQL)
我正在帮助的一个开源项目是通过存储一个列来查找聚合的详细信息,该列是值和行ID的组合。这几乎给我带来了身体上的痛苦 然而,我似乎无法在不影响性能的情况下将其更改为其他实现。 我正在最新的MySQL 8.0上进行测试 生成演示数据 在我的桌面上运行大约需要3分钟在不合并列的情况下高效查找聚合行的详细信息(MySQL),mysql,performance,group-by,Mysql,Performance,Group By,我正在帮助的一个开源项目是通过存储一个列来查找聚合的详细信息,该列是值和行ID的组合。这几乎给我带来了身体上的痛苦 然而,我似乎无法在不影响性能的情况下将其更改为其他实现。 我正在最新的MySQL 8.0上进行测试 生成演示数据 在我的桌面上运行大约需要3分钟 DROP TABLE IF EXISTS `demo_data`; CREATE TABLE `demo_data` ( `id` INT NOT NULL AUTO_INCREMENT, `cat_a` INT NOT NULL
DROP TABLE IF EXISTS `demo_data`;
CREATE TABLE `demo_data` (
`id` INT NOT NULL AUTO_INCREMENT,
`cat_a` INT NOT NULL,
`cat_b` INT NOT NULL,
`cat_c` INT NOT NULL,
`value` INT NOT NULL,
`details` VARCHAR(100) NOT NULL DEFAULT '', # representing several columns, including connections to other tables for joins
PRIMARY KEY (`id`)
);
DROP PROCEDURE IF EXISTS generate_data;
DELIMITER $$
CREATE PROCEDURE generate_data()
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0;
WHILE i < 2200000 DO
INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
ROUND(RAND()*50000),
1,
2,
ROUND(RAND()*120000),
'important details'
);
SET i = i + 1;
IF i%1000=0 THEN
COMMIT;
END IF;
END WHILE;
SET i = 0;
WHILE i < 300000 DO
INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
ROUND(RAND()*50000),
ROUND(RAND()*3),
ROUND(RAND()*3),
ROUND(RAND()*120000),
'important details'
);
SET i = i + 1;
IF i%1000=0 THEN
COMMIT;
END IF;
END WHILE;
SET autocommit =1;
COMMIT;
END$$
DELIMITER ;
CALL generate_data();
DROP TABLE IF EXISTS demo_data_concise;
CREATE TABLE demo_data_concise AS
SELECT
result.id,
value,
valueAndId,
cat_a,
cat_b,
cat_c
FROM (
SELECT MIN(value * 1000000000 + result.id) valueAndId
FROM demo_data result
WHERE value > 0
GROUP BY cat_a, cat_b, cat_c
) MinValuesWithId
JOIN demo_data result ON result.id = valueAndId % 1000000000;
SELECT
result.*
FROM (
SELECT MIN(valueAndId) valueAndId
FROM demo_data_concise
WHERE 1
AND value > 0 # some of the real values are negative, leaving it here just for the reference
AND cat_b = 1 # cat_b clause is optional
AND cat_c = 2 # cat_c clause is optional
GROUP BY cat_a
ORDER BY valueAndId
LIMIT 100 # limit can change
) top
JOIN demo_data result ON result.id = valueAndId % 1000000000
ORDER BY value, cat_a;
简明表格的使用
在我的桌面上不超过200毫秒
DROP TABLE IF EXISTS `demo_data`;
CREATE TABLE `demo_data` (
`id` INT NOT NULL AUTO_INCREMENT,
`cat_a` INT NOT NULL,
`cat_b` INT NOT NULL,
`cat_c` INT NOT NULL,
`value` INT NOT NULL,
`details` VARCHAR(100) NOT NULL DEFAULT '', # representing several columns, including connections to other tables for joins
PRIMARY KEY (`id`)
);
DROP PROCEDURE IF EXISTS generate_data;
DELIMITER $$
CREATE PROCEDURE generate_data()
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0;
WHILE i < 2200000 DO
INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
ROUND(RAND()*50000),
1,
2,
ROUND(RAND()*120000),
'important details'
);
SET i = i + 1;
IF i%1000=0 THEN
COMMIT;
END IF;
END WHILE;
SET i = 0;
WHILE i < 300000 DO
INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
ROUND(RAND()*50000),
ROUND(RAND()*3),
ROUND(RAND()*3),
ROUND(RAND()*120000),
'important details'
);
SET i = i + 1;
IF i%1000=0 THEN
COMMIT;
END IF;
END WHILE;
SET autocommit =1;
COMMIT;
END$$
DELIMITER ;
CALL generate_data();
DROP TABLE IF EXISTS demo_data_concise;
CREATE TABLE demo_data_concise AS
SELECT
result.id,
value,
valueAndId,
cat_a,
cat_b,
cat_c
FROM (
SELECT MIN(value * 1000000000 + result.id) valueAndId
FROM demo_data result
WHERE value > 0
GROUP BY cat_a, cat_b, cat_c
) MinValuesWithId
JOIN demo_data result ON result.id = valueAndId % 1000000000;
SELECT
result.*
FROM (
SELECT MIN(valueAndId) valueAndId
FROM demo_data_concise
WHERE 1
AND value > 0 # some of the real values are negative, leaving it here just for the reference
AND cat_b = 1 # cat_b clause is optional
AND cat_c = 2 # cat_c clause is optional
GROUP BY cat_a
ORDER BY valueAndId
LIMIT 100 # limit can change
) top
JOIN demo_data result ON result.id = valueAndId % 1000000000
ORDER BY value, cat_a;
备选方案
我已经尝试过使用自连接、行数和侧向生成简明表——在性能方面似乎没有什么可以接近。
一定有办法以更好的方式实现这些结果 使用
联接时
,请确保用表名(或别名)限定每一列。请提供解释选择…