如何在SQL Server中将层次关系分组在一起
在表示例中,我有一个列名Parent和Child,下面是表数据如何在SQL Server中将层次关系分组在一起,sql,sql-server,stored-procedures,Sql,Sql Server,Stored Procedures,在表示例中,我有一个列名Parent和Child,下面是表数据 | Parent | Child | |---------------------|------------------| | 100 | 101 | |---------------------|------------------| | 101 | 102 | |--
| Parent | Child |
|---------------------|------------------|
| 100 | 101 |
|---------------------|------------------|
| 101 | 102 |
|---------------------|------------------|
| 200 | 201 |
|---------------------|------------------|
| 103 | 102 |
|---------------------|------------------|
| 202 | 201 |
|---------------------|------------------|
如果我把输入值设为100,那么结果应该是100101102103,因为100->101->102->103,如果我把输入值设为102,那么它应该给出与上面相同的结果。102->101->100和102->103。我只需要使用存储过程来实现这一点
下面是我正在尝试的示例代码
CREATE PROCEDURE GetAncestors(@thingID varchar(MAX))
AS
BEGIN
SET NOCOUNT ON;
WITH
CTE
AS
(
SELECT
Example.Parent, Example.Child
FROM Example
WHERE Parent = @thingID or Child = @thingID
UNION ALL
SELECT
Example.Parent, Example.Child
FROM
CTE
INNER JOIN Example ON Example.Parent = CTE.Child
)
SELECT
Parent AS Result
FROM CTE
UNION
SELECT
Child AS Result
FROM CTE
;
END
GO
您尝试的问题是在开始时进行筛选。如果我是对的,您希望通过它们的关系(无论是上升的还是下降的,或者是它们的组合)将它们全部聚集在一起。例如,ID 100有子101,子101有另一个子102,但102有一个父103,对于该集合中的任何输入,您希望结果是这四个100、101、102、103。这就是为什么你不能在一开始就过滤,因为你没有任何方法知道在另一段关系中哪个关系将被链接 解决这个问题并不像看上去那么简单,而且你不可能只用一个递归就解决它 下面是我很久以前提出的一个将所有这些关系组合在一起的解决方案。请记住,对于超过100k的大型数据集,计算可能需要一段时间,因为它必须首先识别所有组,然后在最后选择结果
CREATE PROCEDURE GetAncestors(@thingID INT)
AS
BEGIN
SET NOCOUNT ON
-- Load your data
IF OBJECT_ID('tempdb..#TreeRelationship') IS NOT NULL
DROP TABLE #TreeRelationship
CREATE TABLE #TreeRelationship (
RelationID INT IDENTITY(1,1) PRIMARY KEY NONCLUSTERED,
Parent INT,
Child INT,
GroupID INT)
INSERT INTO #TreeRelationship (
Parent,
Child)
SELECT
Parent = D.Parent,
Child = D.Child
FROM
Example AS D
UNION -- Data has to be loaded in both ways (direct and reverse) for algorithm to work correctly
SELECT
Parent = D.Child,
Child = D.Parent
FROM
Example AS D
-- Start algorithm
IF OBJECT_ID('tempdb..#FirstWork') IS NOT NULL
DROP TABLE #FirstWork
CREATE TABLE #FirstWork (
Parent INT,
Child INT,
ComponentID INT)
CREATE CLUSTERED INDEX CI_FirstWork ON #FirstWork (Parent, Child)
INSERT INTO #FirstWork (
Parent,
Child,
ComponentID)
SELECT DISTINCT
Parent = T.Parent,
Child = T.Child,
ComponentID = ROW_NUMBER() OVER (ORDER BY T.Parent, T.Child)
FROM
#TreeRelationship AS T
IF OBJECT_ID('tempdb..#SecondWork') IS NOT NULL
DROP TABLE #SecondWork
CREATE TABLE #SecondWork (
Component1 INT,
Component2 INT)
CREATE CLUSTERED INDEX CI_SecondWork ON #SecondWork (Component1)
DECLARE @v_CurrentDepthLevel INT = 0
WHILE @v_CurrentDepthLevel < 100 -- Relationships depth level can be controlled with this value
BEGIN
SET @v_CurrentDepthLevel = @v_CurrentDepthLevel + 1
TRUNCATE TABLE #SecondWork
INSERT INTO #SecondWork (
Component1,
Component2)
SELECT DISTINCT
Component1 = t1.ComponentID,
Component2 = t2.ComponentID
FROM
#FirstWork t1
INNER JOIN #FirstWork t2 on
t1.child = t2.parent OR
t1.parent = t2.parent
WHERE
t1.ComponentID <> t2.ComponentID
IF (SELECT COUNT(*) FROM #SecondWork) = 0
BREAK
UPDATE #FirstWork SET
ComponentID = CASE WHEN items.ComponentID < target THEN items.ComponentID ELSE target END
FROM
#FirstWork items
INNER JOIN (
SELECT
Source = Component1,
Target = MIN(Component2)
FROM
#SecondWork
GROUP BY
Component1
) new_components on new_components.source = ComponentID
UPDATE #FirstWork SET
ComponentID = target
FROM #FirstWork items
INNER JOIN(
SELECT
source = component1,
target = MIN(component2)
FROM
#SecondWork
GROUP BY
component1
) new_components ON new_components.source = ComponentID
END
;WITH Groupings AS
(
SELECT
parent,
child,
group_id = DENSE_RANK() OVER (ORDER BY ComponentID DESC)
FROM
#FirstWork
)
UPDATE FG SET
GroupID = IT.group_id
FROM
#TreeRelationship FG
INNER JOIN Groupings IT ON
IT.parent = FG.parent AND
IT.child = FG.child
-- Select the proper result
;WITH IdentifiedGroup AS
(
SELECT TOP 1
T.GroupID
FROM
#TreeRelationship AS T
WHERE
T.Parent = @thingID
)
SELECT DISTINCT
Result = T.Parent
FROM
#TreeRelationship AS T
INNER JOIN IdentifiedGroup AS I ON T.GroupID = I.GroupID
END
您将看到,对于值为100、101、102和103的@thingID,结果是这四个,对于值200、201和202,结果是这三个
我很确定这不是一个最佳的解决方案,但它提供了正确的输出,而且我从来没有必要对它进行调整,因为它可以快速满足我的要求。您可以简单地使用SQL中引入的图形处理 服务器2017。 这里有一个例子
下面是一个更一般的问题的简化版查询 其主要思想是将父、子对视为图中的边,并从给定节点开始遍历所有连接的边 由于图是无向的,我们首先在CTE_对中建立一个双向的对列表 CTE_Recursive跟踪图的边,并在检测到循环时停止。它将访问节点的路径构建为IDPath中的字符串,如果新节点位于以前访问过的路径中,则停止递归 最终的CTE_CleanResult将所有找到的节点放在一个简单的列表中
CREATE PROCEDURE GetAncestors(@thingID varchar(8000))
AS
BEGIN
SET NOCOUNT ON;
WITH
CTE_Pairs
AS
(
SELECT
CAST(Parent AS varchar(8000)) AS ID1
,CAST(Child AS varchar(8000)) AS ID2
FROM Example
WHERE Parent <> Child
UNION
SELECT
CAST(Child AS varchar(8000)) AS ID1
,CAST(Parent AS varchar(8000)) AS ID2
FROM Example
WHERE Parent <> Child
)
,CTE_Recursive
AS
(
SELECT
ID1 AS AnchorID
,ID1
,ID2
,CAST(',' + ID1 + ',' + ID2 + ',' AS varchar(8000)) AS IDPath
,1 AS Lvl
FROM
CTE_Pairs
WHERE ID1 = @thingID
UNION ALL
SELECT
CTE_Recursive.AnchorID
,CTE_Pairs.ID1
,CTE_Pairs.ID2
,CAST(CTE_Recursive.IDPath + CTE_Pairs.ID2 + ',' AS varchar(8000)) AS IDPath
,CTE_Recursive.Lvl + 1 AS Lvl
FROM
CTE_Pairs
INNER JOIN CTE_Recursive ON CTE_Recursive.ID2 = CTE_Pairs.ID1
WHERE
CTE_Recursive.IDPath NOT LIKE '%,' + CTE_Pairs.ID2 + ',%'
)
,CTE_RecursionResult
AS
(
SELECT AnchorID, ID1, ID2
FROM CTE_Recursive
)
,CTE_CleanResult
AS
(
SELECT AnchorID, ID1 AS ID
FROM CTE_RecursionResult
UNION
SELECT AnchorID, ID2 AS ID
FROM CTE_RecursionResult
)
SELECT ID
FROM CTE_CleanResult
ORDER BY ID
OPTION(MAXRECURSION 0);
END;
有关With使用的说明,请参见。能否显示更好的预期输出?感谢您的帮助,我需要使用同一示例表使用存储过程来执行上述操作。@PawanKumar我已将其编辑为存储过程,并使用您的表示例来获取数据。我还将您的@thingID更改为INT,请检查是否可以。@EzLo,您将无法仅使用1个递归来解决它-好的,这是可能的,请参阅我的答案,但很难判断显式循环是否比递归查询更高效。@EzLo,非常感谢您的回答,我得到了所需的输出…一旦我有任何问题,我会检查性能…如果有任何问题,我会回复你clarification@EzLo有没有办法比较两个输出相同的存储过程的性能?有没有办法比较两个输出相同的存储过程的性能?@PawanKumar,运行它们并测量时间。有什么问题吗?