如何优化postgresql过程
我有6100万封带有状态的非唯一电子邮件。 此电子邮件需要按状态使用逻辑进行重复数据消除 我编写了存储过程,但这个过程运行时间很长 如何优化此过程的执行时间如何优化postgresql过程,postgresql,stored-procedures,plpgsql,Postgresql,Stored Procedures,Plpgsql,我有6100万封带有状态的非唯一电子邮件。 此电子邮件需要按状态使用逻辑进行重复数据消除 我编写了存储过程,但这个过程运行时间很长 如何优化此过程的执行时间 CREATE OR REPLACE FUNCTION public.load_oxy_emails() RETURNS boolean AS $$ DECLARE row record; rec record; new_id int; BEGIN FOR row IN SEL
CREATE OR REPLACE FUNCTION public.load_oxy_emails() RETURNS boolean AS $$
DECLARE
row record;
rec record;
new_id int;
BEGIN
FOR row IN SELECT * FROM oxy_email ORDER BY id LOOP
SELECT * INTO rec FROM oxy_emails_clean WHERE email = row.email;
IF rec IS NOT NULL THEN
IF row.status = 3 THEN
UPDATE oxy_emails_clean SET status = 3 WHERE id = rec.id;
END IF;
ELSE
INSERT INTO oxy_emails_clean(id, email, status) VALUES(nextval('oxy_emails_clean_id_seq'), row.email, row.status);
SELECT currval('oxy_emails_clean_id_seq') INTO new_id;
INSERT INTO oxy_emails_clean_websites_relation(oxy_emails_clean_id, website_id) VALUES(new_id, row.website_id);
END IF;
END LOOP;
RETURN true;
END;
$$
LANGUAGE 'plpgsql';
如何优化此过程的执行时间
CREATE OR REPLACE FUNCTION public.load_oxy_emails() RETURNS boolean AS $$
DECLARE
row record;
rec record;
new_id int;
BEGIN
FOR row IN SELECT * FROM oxy_email ORDER BY id LOOP
SELECT * INTO rec FROM oxy_emails_clean WHERE email = row.email;
IF rec IS NOT NULL THEN
IF row.status = 3 THEN
UPDATE oxy_emails_clean SET status = 3 WHERE id = rec.id;
END IF;
ELSE
INSERT INTO oxy_emails_clean(id, email, status) VALUES(nextval('oxy_emails_clean_id_seq'), row.email, row.status);
SELECT currval('oxy_emails_clean_id_seq') INTO new_id;
INSERT INTO oxy_emails_clean_websites_relation(oxy_emails_clean_id, website_id) VALUES(new_id, row.website_id);
END IF;
END LOOP;
RETURN true;
END;
$$
LANGUAGE 'plpgsql';
不要做循环
执行逐行处理(也称为逐行慢处理)几乎总是比执行批量更改(其中一条语句一次性处理大量行)慢得多
使用一条语句即可轻松更改状态:
update oxy_emails_clean oec
SET status = 3
from oxy_email oe
where oe.id = oec.id
and oe.status = 3;
行的复制可以使用以下链接完成:
如何优化此过程的执行时间
CREATE OR REPLACE FUNCTION public.load_oxy_emails() RETURNS boolean AS $$
DECLARE
row record;
rec record;
new_id int;
BEGIN
FOR row IN SELECT * FROM oxy_email ORDER BY id LOOP
SELECT * INTO rec FROM oxy_emails_clean WHERE email = row.email;
IF rec IS NOT NULL THEN
IF row.status = 3 THEN
UPDATE oxy_emails_clean SET status = 3 WHERE id = rec.id;
END IF;
ELSE
INSERT INTO oxy_emails_clean(id, email, status) VALUES(nextval('oxy_emails_clean_id_seq'), row.email, row.status);
SELECT currval('oxy_emails_clean_id_seq') INTO new_id;
INSERT INTO oxy_emails_clean_websites_relation(oxy_emails_clean_id, website_id) VALUES(new_id, row.website_id);
END IF;
END LOOP;
RETURN true;
END;
$$
LANGUAGE 'plpgsql';
不要做循环
执行逐行处理(也称为逐行慢处理)几乎总是比执行批量更改(其中一条语句一次性处理大量行)慢得多
使用一条语句即可轻松更改状态:
update oxy_emails_clean oec
SET status = 3
from oxy_email oe
where oe.id = oec.id
and oe.status = 3;
行的复制可以使用以下链接完成:
如何优化此过程的执行时间?不使用带有游标/循环的过程。相反,您可以使用两个单独的SQL语句,这两个SQL语句可能由一个链接的CTE粘在一起。如何优化此过程的执行时间?不使用带有游标/循环的过程。相反,您可以使用两个单独的SQL语句,它们可能由一个链接的CTE粘在一起