Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/mysql/56.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在不合并列的情况下高效查找聚合行的详细信息(MySQL)_Mysql_Performance_Group By - Fatal编程技术网

在不合并列的情况下高效查找聚合行的详细信息(MySQL)

在不合并列的情况下高效查找聚合行的详细信息(MySQL),mysql,performance,group-by,Mysql,Performance,Group By,我正在帮助的一个开源项目是通过存储一个列来查找聚合的详细信息,该列是值和行ID的组合。这几乎给我带来了身体上的痛苦 然而,我似乎无法在不影响性能的情况下将其更改为其他实现。 我正在最新的MySQL 8.0上进行测试 生成演示数据 在我的桌面上运行大约需要3分钟 DROP TABLE IF EXISTS `demo_data`; CREATE TABLE `demo_data` ( `id` INT NOT NULL AUTO_INCREMENT, `cat_a` INT NOT NULL

我正在帮助的一个开源项目是通过存储一个列来查找聚合的详细信息,该列是值和行ID的组合。这几乎给我带来了身体上的痛苦

然而,我似乎无法在不影响性能的情况下将其更改为其他实现。 我正在最新的MySQL 8.0上进行测试

生成演示数据

在我的桌面上运行大约需要3分钟

DROP TABLE IF EXISTS `demo_data`;
CREATE TABLE `demo_data` (
  `id` INT NOT NULL AUTO_INCREMENT,
  `cat_a` INT NOT NULL,
  `cat_b` INT NOT NULL,
  `cat_c` INT NOT NULL,
  `value` INT NOT NULL,
  `details` VARCHAR(100) NOT NULL DEFAULT '', # representing several columns, including connections to other tables for joins
  PRIMARY KEY (`id`)
);


DROP PROCEDURE IF EXISTS generate_data;
DELIMITER $$
CREATE PROCEDURE generate_data()
BEGIN
  DECLARE i INT DEFAULT 0;
  SET autocommit = 0; 

  WHILE i < 2200000 DO
    INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
      ROUND(RAND()*50000),  
      1,      
      2,      
      ROUND(RAND()*120000),
      'important details'
    );
    SET i = i + 1;

    IF i%1000=0 THEN 
        COMMIT;     
    END IF;
  END WHILE;

  SET i = 0;
  WHILE i < 300000 DO
    INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
      ROUND(RAND()*50000),  
      ROUND(RAND()*3),      
      ROUND(RAND()*3),      
      ROUND(RAND()*120000),
      'important details'
    );
    SET i = i + 1;

    IF i%1000=0 THEN 
        COMMIT;     
    END IF;
  END WHILE;

  SET autocommit =1;
  COMMIT;
END$$
DELIMITER ;

CALL generate_data();
DROP TABLE IF EXISTS demo_data_concise;

CREATE TABLE demo_data_concise AS 
          SELECT
            result.id,
            value,
            valueAndId,
            cat_a,
            cat_b,
            cat_c
          FROM (
              SELECT MIN(value * 1000000000 + result.id) valueAndId
              FROM demo_data result
              WHERE value > 0
              GROUP BY cat_a, cat_b, cat_c
            ) MinValuesWithId
            JOIN demo_data result ON result.id = valueAndId % 1000000000;
        SELECT
          result.*
        FROM (
          SELECT MIN(valueAndId) valueAndId
          FROM demo_data_concise
          WHERE 1
            AND value > 0 # some of the real values are negative, leaving it here just for the reference
            AND cat_b = 1 # cat_b clause is optional
            AND cat_c = 2 # cat_c clause is optional
          GROUP BY cat_a
          ORDER BY valueAndId
          LIMIT 100 # limit can change
        ) top
        JOIN demo_data result ON result.id = valueAndId % 1000000000
        ORDER BY value, cat_a;
简明表格的使用

在我的桌面上不超过200毫秒

DROP TABLE IF EXISTS `demo_data`;
CREATE TABLE `demo_data` (
  `id` INT NOT NULL AUTO_INCREMENT,
  `cat_a` INT NOT NULL,
  `cat_b` INT NOT NULL,
  `cat_c` INT NOT NULL,
  `value` INT NOT NULL,
  `details` VARCHAR(100) NOT NULL DEFAULT '', # representing several columns, including connections to other tables for joins
  PRIMARY KEY (`id`)
);


DROP PROCEDURE IF EXISTS generate_data;
DELIMITER $$
CREATE PROCEDURE generate_data()
BEGIN
  DECLARE i INT DEFAULT 0;
  SET autocommit = 0; 

  WHILE i < 2200000 DO
    INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
      ROUND(RAND()*50000),  
      1,      
      2,      
      ROUND(RAND()*120000),
      'important details'
    );
    SET i = i + 1;

    IF i%1000=0 THEN 
        COMMIT;     
    END IF;
  END WHILE;

  SET i = 0;
  WHILE i < 300000 DO
    INSERT INTO `demo_data` (`cat_a`,`cat_b`,`cat_c`,`value`,`details`) VALUES (
      ROUND(RAND()*50000),  
      ROUND(RAND()*3),      
      ROUND(RAND()*3),      
      ROUND(RAND()*120000),
      'important details'
    );
    SET i = i + 1;

    IF i%1000=0 THEN 
        COMMIT;     
    END IF;
  END WHILE;

  SET autocommit =1;
  COMMIT;
END$$
DELIMITER ;

CALL generate_data();
DROP TABLE IF EXISTS demo_data_concise;

CREATE TABLE demo_data_concise AS 
          SELECT
            result.id,
            value,
            valueAndId,
            cat_a,
            cat_b,
            cat_c
          FROM (
              SELECT MIN(value * 1000000000 + result.id) valueAndId
              FROM demo_data result
              WHERE value > 0
              GROUP BY cat_a, cat_b, cat_c
            ) MinValuesWithId
            JOIN demo_data result ON result.id = valueAndId % 1000000000;
        SELECT
          result.*
        FROM (
          SELECT MIN(valueAndId) valueAndId
          FROM demo_data_concise
          WHERE 1
            AND value > 0 # some of the real values are negative, leaving it here just for the reference
            AND cat_b = 1 # cat_b clause is optional
            AND cat_c = 2 # cat_c clause is optional
          GROUP BY cat_a
          ORDER BY valueAndId
          LIMIT 100 # limit can change
        ) top
        JOIN demo_data result ON result.id = valueAndId % 1000000000
        ORDER BY value, cat_a;
备选方案

我已经尝试过使用自连接、行数和侧向生成简明表——在性能方面似乎没有什么可以接近。
一定有办法以更好的方式实现这些结果

使用
联接时
,请确保用表名(或别名)限定每一列。请提供
解释选择…