Mysql 优化SQL重复搜索
我们正在制作一个大约有1300万行的表格。我们的目标是只为一家餐厅(约300000行)在该表中查找重复项。我们的复制标准是相同的姓氏、相同的名字前两个字母以及相同的电话或电子邮件。每一个都是他们自己的专栏。我们现在的策略是为餐厅的所有行创建两个相同的临时表,然后根据上述条件将它们连接起来,然后从第一个表返回id、名字、姓氏、电话和电子邮件Mysql 优化SQL重复搜索,mysql,Mysql,我们正在制作一个大约有1300万行的表格。我们的目标是只为一家餐厅(约300000行)在该表中查找重复项。我们的复制标准是相同的姓氏、相同的名字前两个字母以及相同的电话或电子邮件。每一个都是他们自己的专栏。我们现在的策略是为餐厅的所有行创建两个相同的临时表,然后根据上述条件将它们连接起来,然后从第一个表返回id、名字、姓氏、电话和电子邮件 SELECT DISTINCT t1.id, t1.firstname, t1.lastname, t1.phone, t1.email FROM
SELECT
DISTINCT t1.id, t1.firstname, t1.lastname, t1.phone, t1.email
FROM
(
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
) AS t1
INNER JOIN
(
SELECT lmoc2.id, lmoc2.firstname, lmoc2.lastname, lmoc2.phone, lmoc2.email
FROM loyalty_member_opentable_customer lmoc2
WHERE lmoc2.opentable_restaurant_id=2296
AND lmoc2.lastname NOT LIKE '%Tour%'
) AS t2
ON STRCMP(t1.lastname,t2.lastname)=0
AND t1.id!=t2.id
AND STRCMP(LEFT(t1.firstname,2),LEFT(t2.firstname,2))=0
AND (STRCMP(t1.phone,t2.phone)=0 OR STRCMP(t1.email,t2.email)=0)
ORDER BY t1.lastname, t1.firstname
问题是这个查询需要48小时才能运行。有人能想出一个更有效的方法来运行这个吗?我们需要所有的复制品,这样餐厅就可以根据需要将它们组合起来。为什么不干脆这样做呢
SELECT lmoc.lastname, lmoc.firstname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
GROUP BY lmoc.lastname, LEFT(lmoc.firstname, 2), lmoc.phone, lmoc.email
HAVING COUNT(*) > 1;
为什么不干脆做呢
SELECT lmoc.lastname, lmoc.firstname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
GROUP BY lmoc.lastname, LEFT(lmoc.firstname, 2), lmoc.phone, lmoc.email
HAVING COUNT(*) > 1;
为什么不干脆做呢
SELECT lmoc.lastname, lmoc.firstname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
GROUP BY lmoc.lastname, LEFT(lmoc.firstname, 2), lmoc.phone, lmoc.email
HAVING COUNT(*) > 1;
为什么不干脆做呢
SELECT lmoc.lastname, lmoc.firstname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
GROUP BY lmoc.lastname, LEFT(lmoc.firstname, 2), lmoc.phone, lmoc.email
HAVING COUNT(*) > 1;
?此SQL将帮助您找到重复项
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
HAVING COUNT(*) > 1
如果您有一个主键,那么您可以使用此SQL轻松地保留最近的主键并清除旧的主键
DELETE
lmoc.primary_id
FROM loyalty_member_opentable_customer lmoc
LEFT JOIN
(SELECT
MAX(lmoc.primary_id) AS id
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
) nodup
ON adjuster.id = nodup.id
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
AND nodup.id IS NULL";
此SQL将帮助您找到重复项
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
HAVING COUNT(*) > 1
如果您有一个主键,那么您可以使用此SQL轻松地保留最近的主键并清除旧的主键
DELETE
lmoc.primary_id
FROM loyalty_member_opentable_customer lmoc
LEFT JOIN
(SELECT
MAX(lmoc.primary_id) AS id
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
) nodup
ON adjuster.id = nodup.id
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
AND nodup.id IS NULL";
此SQL将帮助您找到重复项
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
HAVING COUNT(*) > 1
如果您有一个主键,那么您可以使用此SQL轻松地保留最近的主键并清除旧的主键
DELETE
lmoc.primary_id
FROM loyalty_member_opentable_customer lmoc
LEFT JOIN
(SELECT
MAX(lmoc.primary_id) AS id
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
) nodup
ON adjuster.id = nodup.id
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
AND nodup.id IS NULL";
此SQL将帮助您找到重复项
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
HAVING COUNT(*) > 1
如果您有一个主键,那么您可以使用此SQL轻松地保留最近的主键并清除旧的主键
DELETE
lmoc.primary_id
FROM loyalty_member_opentable_customer lmoc
LEFT JOIN
(SELECT
MAX(lmoc.primary_id) AS id
FROM loyalty_member_opentable_customer lmoc
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
GROUP BY lmoc.opentable_restaurant_id, lmoc.id, LEFT(lmoc.firstname,2), lmoc.lastname, lmoc.phone, lmoc.email
) nodup
ON adjuster.id = nodup.id
WHERE lmoc.opentable_restaurant_id=2296
AND lmoc.lastname NOT LIKE '%Tour%'
AND lmoc.lastname BETWEEN 'ha' AND 'i'
AND nodup.id IS NULL";
您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。使用所需的所有数据创建一个真正的临时表(
SELECT-INTO
)
这就是我要尝试的:
/* Creating a temporary table */
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
INTO tempRestaurant
FROM loyalty_member_opentable_customer AS lmoc
WHERE
lmoc.opentable_restaurant_id=2296 AND
lmoc.lastname NOT LIKE '%Tour%'
/* Select duplicates */
SELECT * FROM
tempRestaurant AS t1
INNER JOIN tempRestaurant AS t2 ON
STRCMP(t1.lastname,t2.lastname)=0
AND t1.id!=t2.id
WHERE
STRCMP(LEFT(t1.firstname,2), LEFT(t2.firstname,2))=0 AND
( STRCMP(t1.phone,t2.phone)=0 OR STRCMP(t1.email,t2.email)=0 )
您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。使用所需的所有数据创建一个真正的临时表(
SELECT-INTO
)
这就是我要尝试的:
/* Creating a temporary table */
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
INTO tempRestaurant
FROM loyalty_member_opentable_customer AS lmoc
WHERE
lmoc.opentable_restaurant_id=2296 AND
lmoc.lastname NOT LIKE '%Tour%'
/* Select duplicates */
SELECT * FROM
tempRestaurant AS t1
INNER JOIN tempRestaurant AS t2 ON
STRCMP(t1.lastname,t2.lastname)=0
AND t1.id!=t2.id
WHERE
STRCMP(LEFT(t1.firstname,2), LEFT(t2.firstname,2))=0 AND
( STRCMP(t1.phone,t2.phone)=0 OR STRCMP(t1.email,t2.email)=0 )
您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。使用所需的所有数据创建一个真正的临时表(
SELECT-INTO
)
这就是我要尝试的:
/* Creating a temporary table */
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
INTO tempRestaurant
FROM loyalty_member_opentable_customer AS lmoc
WHERE
lmoc.opentable_restaurant_id=2296 AND
lmoc.lastname NOT LIKE '%Tour%'
/* Select duplicates */
SELECT * FROM
tempRestaurant AS t1
INNER JOIN tempRestaurant AS t2 ON
STRCMP(t1.lastname,t2.lastname)=0
AND t1.id!=t2.id
WHERE
STRCMP(LEFT(t1.firstname,2), LEFT(t2.firstname,2))=0 AND
( STRCMP(t1.phone,t2.phone)=0 OR STRCMP(t1.email,t2.email)=0 )
您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。使用所需的所有数据创建一个真正的临时表(
SELECT-INTO
)
这就是我要尝试的:
/* Creating a temporary table */
SELECT lmoc.id, lmoc.firstname, lmoc.lastname, lmoc.phone, lmoc.email
INTO tempRestaurant
FROM loyalty_member_opentable_customer AS lmoc
WHERE
lmoc.opentable_restaurant_id=2296 AND
lmoc.lastname NOT LIKE '%Tour%'
/* Select duplicates */
SELECT * FROM
tempRestaurant AS t1
INNER JOIN tempRestaurant AS t2 ON
STRCMP(t1.lastname,t2.lastname)=0
AND t1.id!=t2.id
WHERE
STRCMP(LEFT(t1.firstname,2), LEFT(t2.firstname,2))=0 AND
( STRCMP(t1.phone,t2.phone)=0 OR STRCMP(t1.email,t2.email)=0 )
听起来是个不错的策略。玩得开心。这个问题似乎是离题的,因为没有这样的问题。如果发布表结构和SQL查询,它将非常有用。此外,有关当前绩效的一些信息将有助于衡量哪些方面可以改进。试着把它换成一个问题。神圣的无格式疑问之母。。。。你需要看看如何提问。。。aka提供一些数据以了解问题。。提供可读取的格式化查询。。并给出一个预期的输出您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。创建一个真正的临时表,其中包含您需要的所有数据(
SELECT-INTO
)。这听起来是个不错的策略。玩得开心。这个问题似乎是离题的,因为没有这样的问题。如果发布表结构和SQL查询,它将非常有用。此外,有关当前绩效的一些信息将有助于衡量哪些方面可以改进。试着把它换成一个问题。神圣的无格式疑问之母。。。。你需要看看如何提问。。。aka提供一些数据以了解问题。。提供可读取的格式化查询。。并给出一个预期的输出您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。创建一个真正的临时表,其中包含您需要的所有数据(SELECT-INTO
)。这听起来是个不错的策略。玩得开心。这个问题似乎是离题的,因为没有这样的问题。如果发布表结构和SQL查询,它将非常有用。此外,有关当前绩效的一些信息将有助于衡量哪些方面可以改进。试着把它换成一个问题。神圣的无格式疑问之母。。。。你需要看看如何提问。。。aka提供一些数据以了解问题。。提供可读取的格式化查询。。并给出一个预期的输出您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。创建一个真正的临时表,其中包含您需要的所有数据(SELECT-INTO
)。这听起来是个不错的策略。玩得开心。这个问题似乎是离题的,因为没有这样的问题。如果发布表结构和SQL查询,它将非常有用。此外,有关当前绩效的一些信息将有助于衡量哪些方面可以改进。试着把它换成一个问题。神圣的无格式疑问之母。。。。你需要看看如何提问。。。aka提供一些数据以了解问题。。提供可读取的格式化查询。。并给出一个预期的输出您不是在创建一个临时表,而是在使用子查询,这将很慢,有1300万行。创建一个真正的临时表,其中包含您需要的所有数据(SELECT INTO
)。这消除了标准中电话或电子邮件匹配的方面。有些复制件有匹配的电话,有些复制件有匹配的电子邮件,但很少有两个都有。我们还希望拥有两个副本的ID,以便我们可以将它们合并。这消除了标准中电话或电子邮件匹配的方面。有些复制件有匹配的电话,有些复制件有匹配的电子邮件,但很少有两个都有。我们还希望拥有两个副本的ID,以便我们可以将它们合并。这消除了标准中电话或电子邮件匹配的方面。有些复制品有匹配的