Sql server 使用临时表删除其他表中的重复行及其依赖项_Sql Server_Tsql_Duplicates_Temp Tables

Sql server 使用临时表删除其他表中的重复行及其依赖项

sql-server tsql

Sql server 使用临时表删除其他表中的重复行及其依赖项,sql-server,tsql,duplicates,temp-tables,Sql Server,Tsql,Duplicates,Temp Tables,我有一个产品表wish包含重复的行，其uniq Id在其他6个表中引用我想删除产品表中以及依赖于我的产品的其他表中的这些重复行我考虑使用临时表来：为与我的产品相关的每个表创建全局临时表获取重复记录获取它的依赖项使用最小Id保存第一行删除其他行我有这个想法，但我不知道如何很好地实施它我不知道，临时表的选择是否正确感谢您的帮助和建议。一种方法是将重复id存储在变量中，并根据id删除重复记录。假设ProductRefTable是参考表，ProductId是外键 create tab

我有一个产品表wish包含重复的行，其uniq Id在其他6个表中引用

我想删除产品表中以及依赖于我的产品的其他表中的这些重复行

我考虑使用临时表来：

为与我的产品相关的每个表创建全局临时表获取重复记录获取它的依赖项使用最小Id保存第一行删除其他行我有这个想法，但我不知道如何很好地实施它

我不知道，临时表的选择是否正确

感谢您的帮助和建议。

一种方法是将重复id存储在变量中，并根据id删除重复记录。假设ProductRefTable是参考表，ProductId是外键

create table dbo.hasduplicates
(
    id int identity,
    --assume colA, colB is the entity/unique combo
    colA varchar(10),
    colB int,
    someOtherColumn varchar(40)
);


insert into dbo.hasduplicates(colA, colB, someOtherColumn)
values
('A', 1, 'A1 - 1'),
('A', 1, 'A1 - 2'),
('A', 1, 'A1 - 3'),
--
('A', 2, 'A2 - 1'),
('A', 2, 'A2 - 2'),
--
('B', 1, 'B1 - 1'),
('B', 1, 'B1 - 2'),
('B', 1, 'B1 - 3');


select *
from dbo.hasduplicates;


--temp table holding the to-be-deleted ids (of the duplicates)
create table #ToBedeleted(IdToDelete int);

with dup
as
(
    select *, row_number() over (partition by colA, colB /*<--cols of your entity go here*/ order by id) as RowNum
    from dbo.hasduplicates
)
insert into #ToBedeleted(IdToDelete)
select Id
from dup
where RowNum >= 2;

--contains the ids for deletion
select * from #ToBedeleted;

--cleanup the referencing tables
/*
DELETE FROM dbo.Table1 WHERE Table1Id IN (SELECT IdToDelete FROM #ToBedeleted);
DELETE FROM dbo.Table2 WHERE Table2Id IN (SELECT IdToDelete FROM #ToBedeleted);
.............
DELETE FROM dbo.Table6 WHERE Table6Id IN (SELECT IdToDelete FROM #ToBedeleted);
--finally cleanup your products table
DELETE FROM dbo.hasduplicates WHERE Id IN (SELECT IdToDelete FROM #ToBedeleted);
*/

--/*
drop table #ToBedeleted;
drop table dbo.hasduplicates;
--*/

  CREATE TABLE Product
    (
     ID INT NOT NULL IDENTITY(1,1),
     Value INT,
     CONSTRAINT PK_ID PRIMARY KEY(ID)  
    )


    INSERT INTO Product([Value])
    VALUES(1),(2),(3),(4),(5),(5),(3),(5)

假设重复产品具有相同的product.name

如果dup有更多标准，则相应地调整EXISTS中的标准

create table #tmpProductsToDelete (product_id int primary key);

-- 
-- collect the products that have a higher id with the same name in the temp table
--
insert into #tmpProductsToDelete
select id
from dbo.Product t1
where exists
(
    select 1
    from dbo.Product t2
    where t2.name = t1.name
      -- and t2.colA = t1.colA
      -- and t2.colB = t1.colB
      and t2.id > t1.id
);

然后再次检查这些是否是要删除的产品

select * 
from dbo.Product
where id in (select product_id from #tmpProductsToDelete);

也许先把这些复制品复制到产品中。对于产品表中带有FK的6个表也是如此

IF OBJECT_ID('dbo.cpyProduct', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyProduct FROM dbo.Product
  UNION
  SELECT TOP 0 *, NULL FROM dbo.Product;
END;

INSERT INTO dbo.cpyProduct
SELECT *, GetDate() AS RemoveOn
FROM dbo.Product
WHERE id IN (select product_id from #tmpProductsToDelete); 

IF OBJECT_ID('dbo.cpyTable1', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyTable1 FROM dbo.Table1
  UNION ALL
  SELECT TOP 0 *, NULL FROM dbo.Table1;
END;

INSERT INTO dbo.cpyTable1
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table1
WHERE product_id IN (select product_id from #tmpProductsToDelete); 

IF OBJECT_ID('dbo.cpyTable2', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyTable2 FROM dbo.Table2
  UNION ALL
  SELECT TOP 0 *, NULL FROM dbo.Table2;
END;

INSERT INTO dbo.cpyTable2
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table2
WHERE product_id IN (select product_id from #tmpProductsToDelete); 

-- Rinse & repeat for the other 4 tables

SELECT * FROM dbo.cpyProduct;
SELECT * FROM dbo.cpyTable1;
SELECT * FROM dbo.cpyTable2;

然后清理

--
-- delete them from the 6 tables with a FK to the products table
--

delete from dbo.Table1
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table2
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table3
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table4
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table5
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table6
where product_id in (select product_id from #tmpProductsToDelete);

-- remove the dups from the base table
delete from dbo.Product
where id in (select product_id from #tmpProductsToDelete);

在rextester上测试输入和预期输出？输入：产品表包含重复的行，其相关表包含太多重复的行=>输出：删除所有重复的行@xxx您可以共享您的数据模型吗？示例？…这6个引用表都有吗？SSMS只是一个访问数据库的工具。Sql Server的版本更重要。但这可能不需要花哨的新功能。只需在1个临时表中加载要删除的product.id，然后从6个表中删除它们，然后再从product表中删除它们。

--
-- delete them from the 6 tables with a FK to the products table
--

delete from dbo.Table1
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table2
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table3
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table4
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table5
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table6
where product_id in (select product_id from #tmpProductsToDelete);

-- remove the dups from the base table
delete from dbo.Product
where id in (select product_id from #tmpProductsToDelete);