Sql server 使用临时表删除其他表中的重复行及其依赖项
我有一个产品表wish包含重复的行,其uniq Id在其他6个表中引用 我想删除产品表中以及依赖于我的产品的其他表中的这些重复行 我考虑使用临时表来: 为与我的产品相关的每个表创建全局临时表 获取重复记录 获取它的依赖项 使用最小Id保存第一行 删除其他行 我有这个想法,但我不知道如何很好地实施它 我不知道,临时表的选择是否正确Sql server 使用临时表删除其他表中的重复行及其依赖项,sql-server,tsql,duplicates,temp-tables,Sql Server,Tsql,Duplicates,Temp Tables,我有一个产品表wish包含重复的行,其uniq Id在其他6个表中引用 我想删除产品表中以及依赖于我的产品的其他表中的这些重复行 我考虑使用临时表来: 为与我的产品相关的每个表创建全局临时表 获取重复记录 获取它的依赖项 使用最小Id保存第一行 删除其他行 我有这个想法,但我不知道如何很好地实施它 我不知道,临时表的选择是否正确 感谢您的帮助和建议。一种方法是将重复id存储在变量中,并根据id删除重复记录。假设ProductRefTable是参考表,ProductId是外键 create tab
感谢您的帮助和建议。一种方法是将重复id存储在变量中,并根据id删除重复记录。假设ProductRefTable是参考表,ProductId是外键
create table dbo.hasduplicates
(
id int identity,
--assume colA, colB is the entity/unique combo
colA varchar(10),
colB int,
someOtherColumn varchar(40)
);
insert into dbo.hasduplicates(colA, colB, someOtherColumn)
values
('A', 1, 'A1 - 1'),
('A', 1, 'A1 - 2'),
('A', 1, 'A1 - 3'),
--
('A', 2, 'A2 - 1'),
('A', 2, 'A2 - 2'),
--
('B', 1, 'B1 - 1'),
('B', 1, 'B1 - 2'),
('B', 1, 'B1 - 3');
select *
from dbo.hasduplicates;
--temp table holding the to-be-deleted ids (of the duplicates)
create table #ToBedeleted(IdToDelete int);
with dup
as
(
select *, row_number() over (partition by colA, colB /*<--cols of your entity go here*/ order by id) as RowNum
from dbo.hasduplicates
)
insert into #ToBedeleted(IdToDelete)
select Id
from dup
where RowNum >= 2;
--contains the ids for deletion
select * from #ToBedeleted;
--cleanup the referencing tables
/*
DELETE FROM dbo.Table1 WHERE Table1Id IN (SELECT IdToDelete FROM #ToBedeleted);
DELETE FROM dbo.Table2 WHERE Table2Id IN (SELECT IdToDelete FROM #ToBedeleted);
.............
DELETE FROM dbo.Table6 WHERE Table6Id IN (SELECT IdToDelete FROM #ToBedeleted);
--finally cleanup your products table
DELETE FROM dbo.hasduplicates WHERE Id IN (SELECT IdToDelete FROM #ToBedeleted);
*/
--/*
drop table #ToBedeleted;
drop table dbo.hasduplicates;
--*/
CREATE TABLE Product
(
ID INT NOT NULL IDENTITY(1,1),
Value INT,
CONSTRAINT PK_ID PRIMARY KEY(ID)
)
INSERT INTO Product([Value])
VALUES(1),(2),(3),(4),(5),(5),(3),(5)
假设重复产品具有相同的product.name 如果dup有更多标准,则相应地调整EXISTS中的标准
create table #tmpProductsToDelete (product_id int primary key);
--
-- collect the products that have a higher id with the same name in the temp table
--
insert into #tmpProductsToDelete
select id
from dbo.Product t1
where exists
(
select 1
from dbo.Product t2
where t2.name = t1.name
-- and t2.colA = t1.colA
-- and t2.colB = t1.colB
and t2.id > t1.id
);
然后再次检查这些是否是要删除的产品
select *
from dbo.Product
where id in (select product_id from #tmpProductsToDelete);
也许先把这些复制品复制到产品中。
对于产品表中带有FK的6个表也是如此
IF OBJECT_ID('dbo.cpyProduct', 'U') IS NULL
BEGIN
SELECT TOP 0 *, GetDate() as RemoveOn
INTO dbo.cpyProduct FROM dbo.Product
UNION
SELECT TOP 0 *, NULL FROM dbo.Product;
END;
INSERT INTO dbo.cpyProduct
SELECT *, GetDate() AS RemoveOn
FROM dbo.Product
WHERE id IN (select product_id from #tmpProductsToDelete);
IF OBJECT_ID('dbo.cpyTable1', 'U') IS NULL
BEGIN
SELECT TOP 0 *, GetDate() as RemoveOn
INTO dbo.cpyTable1 FROM dbo.Table1
UNION ALL
SELECT TOP 0 *, NULL FROM dbo.Table1;
END;
INSERT INTO dbo.cpyTable1
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table1
WHERE product_id IN (select product_id from #tmpProductsToDelete);
IF OBJECT_ID('dbo.cpyTable2', 'U') IS NULL
BEGIN
SELECT TOP 0 *, GetDate() as RemoveOn
INTO dbo.cpyTable2 FROM dbo.Table2
UNION ALL
SELECT TOP 0 *, NULL FROM dbo.Table2;
END;
INSERT INTO dbo.cpyTable2
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table2
WHERE product_id IN (select product_id from #tmpProductsToDelete);
-- Rinse & repeat for the other 4 tables
SELECT * FROM dbo.cpyProduct;
SELECT * FROM dbo.cpyTable1;
SELECT * FROM dbo.cpyTable2;
然后清理
--
-- delete them from the 6 tables with a FK to the products table
--
delete from dbo.Table1
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table2
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table3
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table4
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table5
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table6
where product_id in (select product_id from #tmpProductsToDelete);
-- remove the dups from the base table
delete from dbo.Product
where id in (select product_id from #tmpProductsToDelete);
在rextester上测试输入和预期输出?输入:产品表包含重复的行,其相关表包含太多重复的行=>输出:删除所有重复的行@xxx您可以共享您的数据模型吗?示例?…这6个引用表都有吗?SSMS只是一个访问数据库的工具。Sql Server的版本更重要。但这可能不需要花哨的新功能。只需在1个临时表中加载要删除的product.id,然后从6个表中删除它们,然后再从product表中删除它们。
--
-- delete them from the 6 tables with a FK to the products table
--
delete from dbo.Table1
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table2
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table3
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table4
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table5
where product_id in (select product_id from #tmpProductsToDelete);
delete from dbo.Table6
where product_id in (select product_id from #tmpProductsToDelete);
-- remove the dups from the base table
delete from dbo.Product
where id in (select product_id from #tmpProductsToDelete);