Sql 6',A2',P3',NULL),('G1',6295930',NULL,'P1',NULL),('G1',3007595',A2',P3',NULL)**第二个解决方案似乎是一个性能问题,因为即使对于这个示例数据,也需要10分钟以上的时间;然后断开连接而
Sql 6',A2',P3',NULL),('G1',6295930',NULL,'P1',NULL),('G1',3007595',A2',P3',NULL)**第二个解决方案似乎是一个性能问题,因为即使对于这个示例数据,也需要10分钟以上的时间;然后断开连接而,sql,sql-server,Sql,Sql Server,6',A2',P3',NULL),('G1',6295930',NULL,'P1',NULL),('G1',3007595',A2',P3',NULL)**第二个解决方案似乎是一个性能问题,因为即使对于这个示例数据,也需要10分钟以上的时间;然后断开连接而不检索结果(我将数据保存在物理表中)。看起来它进入了无限递归。谢谢你的努力。我不确定你是否对评论中的信息感到困惑。所有匹配必须仅在同一主组(G1)内。这意味着当存在多个主要组时:G1、G2、G3,我们不必在不同的主要群体之间寻找任何匹配的记录。
6',A2',P3',NULL),('G1',6295930',NULL,'P1',NULL),('G1',3007595',A2',P3',NULL)**第二个解决方案似乎是一个性能问题,因为即使对于这个示例数据,也需要10分钟以上的时间;然后断开连接而不检索结果(我将数据保存在物理表中)。看起来它进入了无限递归。谢谢你的努力。我不确定你是否对评论中的信息感到困惑。所有匹配必须仅在同一主组(G1)内。这意味着当存在多个主要组时:G1、G2、G3,我们不必在不同的主要群体之间寻找任何匹配的记录。也就是说,如果G1主组中的M1与G8中的M25匹配,我们就不必寻找它。必须检查M25是否与G8中的记录匹配。这有意义吗?当然,只需将组条件添加到CTEsThanks中,它就可以扩展。对于递归CTE,性能似乎是一个问题,因为实际数据包含约1300万条记录(所有组的总计数)。如前所述,需求本身需要大量资源,为了获得性能,您需要重新设计解决方案,在第二段中,您可以找到我推荐的@Daniel Brughera,我可以解决性能问题。。我设置了一个外部循环,以成批处理完整的记录计数,每批处理1000条记录。谢谢
DECLARE @table TABLE ([Group] varchar(3), Member varchar(3), Address varchar(3), Phone varchar(3), Email varchar(3))
insert @table values
('G1', 'M1', 'A1', 'P1', 'E1'),
('G1', 'M2', 'A2', 'P2', 'E2'),
('G1', 'M3', 'A1', 'P3', 'E1'),
('G1', 'M4', 'A4', 'P3', 'E4'),
('G1', 'M5', 'A5', 'P5', 'E2'),
('G1', 'M6', 'A6', 'P6', 'E6'),
('G1', 'M7', 'A7', 'P6', 'E7'),
('G1', 'M8', 'A8', 'P8', 'E4'),
('G1', 'M9', 'A9', 'P9', 'E7'),
('G1', 'M10', 'A10', 'P10', 'E10')
WITH
[Matches] AS
(
SELECT
D1.[Group],
D1.[Member],
D2.[Member] AS [PreviousMatchingMember]
FROM
[DataTable] AS D1
OUTER APPLY (SELECT TOP (1) [Member]
FROM [DataTable]
WHERE
[Group] = D1.[Group] AND
[Member] < D1.[Member] AND
([Address] = D1.[Address] OR
[Phone] = D1.[Phone] OR
[Email] = D1.[Email])
ORDER BY
[Member]) AS D2
),
[Groups] AS
(
SELECT
[Group],
[Member],
[PreviousMatchingMember],
'NG' + LTRIM(ROW_NUMBER() OVER (ORDER BY [Group], [Member])) AS [NewGroup]
FROM
[Matches]
WHERE
[PreviousMatchingMember] IS NULL
UNION ALL
SELECT
M.[Group],
M.[Member],
M.[PreviousMatchingMember],
G.[NewGroup]
FROM
[Groups] AS G
INNER JOIN [Matches] AS M ON
M.[Group] = G.[Group] AND
M.[PreviousMatchingMember] = G.[Member]
)
SELECT
G.[NewGroup],
G.[Member],
D.[Address],
D.[Phone],
D.[Email]
FROM
[Groups] AS G
INNER JOIN [DataTable] AS D ON
D.[Group] = G.[Group] AND
D.[Member] = G.[Member]
ORDER BY
G.[NewGroup],
G.[Member];
DECLARE @table TABLE (id int not null identity, [Group] varchar(3), Member varchar(3), Address varchar(3), Phone varchar(3), Email varchar(3))
insert @table values
('G1', 'M1', 'A1', 'P1', 'E1'),
('G1', 'M2', 'A2', 'P2', 'E2'),
('G1', 'M3', 'A1', 'P3', 'E1'),
('G1', 'M4', 'A4', 'P3', 'E4'),
('G1', 'M5', 'A5', 'P5', 'E2'),
('G1', 'M6', 'A6', 'P6', 'E6'),
('G1', 'M7', 'A7', 'P6', 'E7'),
('G1', 'M8', 'A8', 'P8', 'E4'),
('G1', 'M9', 'A9', 'P9', 'E7'),
('G1', 'M10', 'A10', 'P10', 'E10');
with
/* Find all matches
id Member MatchWith
1 M1 M3
2 M2 M5
3 M3 M1
3 M3 M4 ...
*/
matches as (
SELECT t.id, t.[Group], t.Member, a.member as MatchWith
from
@table t
outer apply (
select distinct member
from @table
where member <> t.member and [group] = t.[group] and (Address = t.Address OR Phone = t.Phone OR Email = t.Email)
) a
)
/* Stuffing the matches per member
id Member AllMatches
1 M1 M1,M3
2 M2 M2,M5
3 M3 M1,M3,M4 .....
*/
, matchsummary as (
SELECT DISTINCT id, [Group], Member, STUFF((
SELECT ',' + Member FROM (
SELECT m.Member
UNION ALL
SELECT DISTINCT MatchWith
FROM matches
WHERE Member = m.Member) U
ORDER BY Member
FOR XML PATH('')
), 1, 1, '') as AllMatches
FROM matches m
)
/* Recursive CTE to find "cousins" records (M1, M3 matches on Address and Email; M3 in turn matches with M4 on Phone)
id Member AllMatches gr
1 M1 M1,M3 1
2 M2 M2,M5 2
3 M3 M1,M3,M4 1
4 M4 M3,M4,M8 1
*/
, tree as (
select *, ROW_NUMBER() over (order by id) as gr
from matchsummary where AllMatches LIKE member+'%'
/* The groups are created using the Members who are the first one in their matches
id Member AllMatches gr
1 M1 M1,M3 1
2 M2 M2,M5 2
6 M6 M6,M7 3
10 M10 M10 4
*/
union all
select s.*, t.gr
from matchsummary s
join tree t on s.Member <> t.Member and s.[Group] = t.[Group] and s.AllMatches NOT LIKE s.member+'%' and t.AllMatches like '%' + s.Member
)
select * from tree
order by id
option(maxrecursion 0)
IF OBJECT_ID('tempdb..#table') IS NOT NULL
DROP TABLE #table;
CREATE TABLE #table ([Group] varchar(3), Member varchar(3), Address varchar(3), Phone varchar(3), Email varchar(3))
INSERT #table ([Group], Member, Address, Phone, Email)
VALUES
('G1', 'M1', 'A1', 'P1', 'E1'),
('G1', 'M2', 'A2', 'P2', 'E2'),
('G1', 'M3', 'A1', 'P3', 'E1'),
('G1', 'M4', 'A4', 'P3', 'E4'),
('G1', 'M5', 'A5', 'P5', 'E2'),
('G1', 'M6', 'A6', 'P6', 'E6'),
('G1', 'M7', 'A7', 'P6', 'E7'),
('G1', 'M8', 'A8', 'P8', 'E4'),
('G1', 'M9', 'A9', 'P9', 'E7'),
('G1', 'M10', 'A10', 'P10', 'E10');
ALTER TABLE #table ADD newGroup INT
/******************************************************************
START HERE
******************************************************************/
IF OBJECT_ID('tempdb..#Groups') IS NOT NULL
DROP TABLE #Groups;
SELECT DISTINCT [Group] INTO #Groups FROM #table
DECLARE @Group VARCHAR(3)
WHILE EXISTS (SELECT 1 FROM #Groups)
BEGIN
SELECT TOP 1 @Group = [Group] FROM #Groups
UPDATE #table SET newGroup = NULL
WHERE [Group] = @Group
DECLARE @newGroup INT = 1
DECLARE @member varchar(3)
WHILE EXISTS (SELECT 1 FROM #table WHERE [Group] = @Group AND newGroup IS NULL)
BEGIN
SELECT TOP 1 @member = member FROM #table WHERE [group] = @group AND newGroup IS NULL
UPDATE #table SET newGroup = @newGroup
WHERE Member = @member
WHILE @@ROWCOUNT > 0
BEGIN
UPDATE T
SET newGroup = @newGroup
FROM #table T
WHERE [Group] = @group AND newGroup IS NULL
AND EXISTS (
SELECT 1 FROM #table
WHERE newGroup = @newGroup
AND (Address = t.Address OR Phone = t.Phone OR Email = t.Email)
)
END
SET @newGroup += 1
END
DELETE #Groups WHERE [Group] = @Group
END
SELECT * FROM #table