基于逗号分隔值的Sql查询联接
我有一个表,它有一个复合键和一个逗号分隔的值。我需要为每个逗号分隔的元素将单行拆分为一行。我看到过类似的问题和类似的答案,但无法将它们转化为我自己的解决方案 我正在运行SQLServer2008R2基于逗号分隔值的Sql查询联接,sql,sql-server,tsql,sql-server-2008-r2,Sql,Sql Server,Tsql,Sql Server 2008 R2,我有一个表,它有一个复合键和一个逗号分隔的值。我需要为每个逗号分隔的元素将单行拆分为一行。我看到过类似的问题和类似的答案,但无法将它们转化为我自己的解决方案 我正在运行SQLServer2008R2 | Key Part 1 | Key Part 2 | Key Part 3 | Values | |------------------------------------------------------| | A | A | A
| Key Part 1 | Key Part 2 | Key Part 3 | Values |
|------------------------------------------------------|
| A | A | A | PDE,PPP,POR |
| A | A | B | PDE,XYZ |
| A | B | A | PDE,RRR |
|------------------------------------------------------|
我需要这个作为输出
| Key Part 1 | Key Part 2 | Key Part 3 | Values | Sequence |
|-------------------------------------------------------------------|
| A | A | A | PDE | 0 |
| A | A | A | PPP | 1 |
| A | A | A | POR | 2 |
| A | A | B | PDE | 0 |
| A | A | B | XYZ | 1 |
| A | B | A | PDE | 0 |
| A | B | A | RRR | 1 |
|-------------------------------------------------------------------|
谢谢
Geoff如果您没有或不想要拆分/解析UDF,这里有一个简单的内联方法 示例
Select A.[Key Part 1]
,A.[Key Part 2]
,A.[Key Part 3]
,B.*
From YourTable A
Cross Apply (
Select [Values] = LTrim(RTrim(X2.i.value('(./text())[1]', 'varchar(max)')))
,[Sequence] = Row_Number() over (Order By (Select null))-1
From (Select x = Cast('<x>' + replace(A.[Values],',','</x><x>')+'</x>' as xml)) X1
Cross Apply x.nodes('x') X2(i)
) B
有兴趣的UDF
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (@String varchar(max),@Delimiter varchar(25))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter)) = @Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = LTrim(RTrim(Substring(@String, A.N, A.L)))
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-8K]('John||Cappelletti||was||here','||')
如果您没有或想要拆分/解析UDF,这里有一个简单的内联方法 示例
Select A.[Key Part 1]
,A.[Key Part 2]
,A.[Key Part 3]
,B.*
From YourTable A
Cross Apply (
Select [Values] = LTrim(RTrim(X2.i.value('(./text())[1]', 'varchar(max)')))
,[Sequence] = Row_Number() over (Order By (Select null))-1
From (Select x = Cast('<x>' + replace(A.[Values],',','</x><x>')+'</x>' as xml)) X1
Cross Apply x.nodes('x') X2(i)
) B
有兴趣的UDF
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (@String varchar(max),@Delimiter varchar(25))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter)) = @Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = LTrim(RTrim(Substring(@String, A.N, A.L)))
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-8K]('John||Cappelletti||was||here','||')
如果所有CSV值正好是3个字符(正如您在测试数据中所做的那样),您可以通过预先创建所需的精确行数(而不是为每个字符创建一行以查找分隔符),以极其高效的方式使用计数表。。。因为您已经知道分隔符的位置 在这种情况下,我将使用理货功能,但您也可以使用固定的理货表 tfn_计数函数的代码
SET QUOTED_IDENTIFIER ON
SET ANSI_NULLS ON
GO
CREATE FUNCTION dbo.tfn_Tally
/* ============================================================================
07/20/2017 JL, Created. Capable of creating a sequense of rows
ranging from -10,000,000,000,000,000 to 10,000,000,000,000,000
============================================================================ */
(
@NumOfRows BIGINT,
@StartWith BIGINT
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
cte_n1 (n) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (n)), -- 10 rows
cte_n2 (n) AS (SELECT 1 FROM cte_n1 a CROSS JOIN cte_n1 b), -- 100 rows
cte_n3 (n) AS (SELECT 1 FROM cte_n2 a CROSS JOIN cte_n2 b), -- 10,000 rows
cte_n4 (n) AS (SELECT 1 FROM cte_n3 a CROSS JOIN cte_n3 b), -- 100,000,000 rows
cte_Tally (n) AS (
SELECT TOP (@NumOfRows)
(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1) + @StartWith
FROM
cte_n4 a CROSS JOIN cte_n4 b -- 10,000,000,000,000,000 rows
)
SELECT
t.n
FROM
cte_Tally t;
GO
如何在解决方案中使用它
-- create some test data...
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
CREATE TABLE #TestData (
KeyPart1 CHAR(1),
KeyPart2 CHAR(1),
KeyPart3 CHAR(1),
[Values] varchar(50)
);
INSERT #TestData (KeyPart1, KeyPart2, KeyPart3, [Values]) VALUES
('A', 'A', 'A', 'PDE,PPP,POR'),
('A', 'A', 'B', 'PDE,XYZ'),
('A', 'B', 'A', 'PDE,RRR,XXX,YYY,ZZZ,AAA,BBB,CCC');
--==========================================================
-- solution query...
SELECT
td.KeyPart1,
td.KeyPart2,
td.KeyPart3,
x.SplitValue,
[Sequence] = t.n
FROM
#TestData td
CROSS APPLY dbo.tfn_Tally(LEN(td.[Values]) - LEN(REPLACE(td.[Values], ',', '')) + 1, 0) t
CROSS APPLY ( VALUES (SUBSTRING(td.[Values], t.n * 4 + 1, 3)) ) x (SplitValue);
结果
KeyPart1 KeyPart2 KeyPart3 SplitValue Sequence
-------- -------- -------- ---------- --------------------
A A A PDE 0
A A A PPP 1
A A A POR 2
A A B PDE 0
A A B XYZ 1
A B A PDE 0
A B A RRR 1
A B A XXX 2
A B A YYY 3
A B A ZZZ 4
A B A AAA 5
A B A BBB 6
A B A CCC 7
如果所有csv元素都是字符数的假设是错误的,那么最好使用传统的基于计数的拆分器。在这种情况下,我的建议是
在这种情况下,解决方案查询将如下所示
SELECT
td.KeyPart1,
td.KeyPart2,
td.KeyPart3,
SplitValue = dsk.Item,
[Sequence] = dsk.ItemNumber - 1
FROM
#TestData td
CROSS APPLY dbo.DelimitedSplit8K(td.[Values], ',') dsk;
结果是
KeyPart1 KeyPart2 KeyPart3 SplitValue Sequence
-------- -------- -------- ---------- --------------------
A A A PDE 0
A A A PPP 1
A A A POR 2
A A B PDE 0
A A B XYZ 1
A B A PDE 0
A B A RRR 1
A B A XXX 2
A B A YYY 3
A B A ZZZ 4
A B A AAA 5
A B A BBB 6
A B A CCC 7
HTH,Jason如果所有CSV值正好是3个字符(正如您在测试数据中所做的那样),您可以通过预先创建所需的精确行数(而不是为每个字符创建一行来查找分隔符),以极其高效的方式使用计数表。。。因为您已经知道分隔符的位置 在这种情况下,我将使用理货功能,但您也可以使用固定的理货表 tfn_计数函数的代码
SET QUOTED_IDENTIFIER ON
SET ANSI_NULLS ON
GO
CREATE FUNCTION dbo.tfn_Tally
/* ============================================================================
07/20/2017 JL, Created. Capable of creating a sequense of rows
ranging from -10,000,000,000,000,000 to 10,000,000,000,000,000
============================================================================ */
(
@NumOfRows BIGINT,
@StartWith BIGINT
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
cte_n1 (n) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (n)), -- 10 rows
cte_n2 (n) AS (SELECT 1 FROM cte_n1 a CROSS JOIN cte_n1 b), -- 100 rows
cte_n3 (n) AS (SELECT 1 FROM cte_n2 a CROSS JOIN cte_n2 b), -- 10,000 rows
cte_n4 (n) AS (SELECT 1 FROM cte_n3 a CROSS JOIN cte_n3 b), -- 100,000,000 rows
cte_Tally (n) AS (
SELECT TOP (@NumOfRows)
(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1) + @StartWith
FROM
cte_n4 a CROSS JOIN cte_n4 b -- 10,000,000,000,000,000 rows
)
SELECT
t.n
FROM
cte_Tally t;
GO
如何在解决方案中使用它
-- create some test data...
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
CREATE TABLE #TestData (
KeyPart1 CHAR(1),
KeyPart2 CHAR(1),
KeyPart3 CHAR(1),
[Values] varchar(50)
);
INSERT #TestData (KeyPart1, KeyPart2, KeyPart3, [Values]) VALUES
('A', 'A', 'A', 'PDE,PPP,POR'),
('A', 'A', 'B', 'PDE,XYZ'),
('A', 'B', 'A', 'PDE,RRR,XXX,YYY,ZZZ,AAA,BBB,CCC');
--==========================================================
-- solution query...
SELECT
td.KeyPart1,
td.KeyPart2,
td.KeyPart3,
x.SplitValue,
[Sequence] = t.n
FROM
#TestData td
CROSS APPLY dbo.tfn_Tally(LEN(td.[Values]) - LEN(REPLACE(td.[Values], ',', '')) + 1, 0) t
CROSS APPLY ( VALUES (SUBSTRING(td.[Values], t.n * 4 + 1, 3)) ) x (SplitValue);
结果
KeyPart1 KeyPart2 KeyPart3 SplitValue Sequence
-------- -------- -------- ---------- --------------------
A A A PDE 0
A A A PPP 1
A A A POR 2
A A B PDE 0
A A B XYZ 1
A B A PDE 0
A B A RRR 1
A B A XXX 2
A B A YYY 3
A B A ZZZ 4
A B A AAA 5
A B A BBB 6
A B A CCC 7
如果所有csv元素都是字符数的假设是错误的,那么最好使用传统的基于计数的拆分器。在这种情况下,我的建议是
在这种情况下,解决方案查询将如下所示
SELECT
td.KeyPart1,
td.KeyPart2,
td.KeyPart3,
SplitValue = dsk.Item,
[Sequence] = dsk.ItemNumber - 1
FROM
#TestData td
CROSS APPLY dbo.DelimitedSplit8K(td.[Values], ',') dsk;
结果是
KeyPart1 KeyPart2 KeyPart3 SplitValue Sequence
-------- -------- -------- ---------- --------------------
A A A PDE 0
A A A PPP 1
A A A POR 2
A A B PDE 0
A A B XYZ 1
A B A PDE 0
A B A RRR 1
A B A XXX 2
A B A YYY 3
A B A ZZZ 4
A B A AAA 5
A B A BBB 6
A B A CCC 7
HTH,Jason--创建表
Create table YourTable
(
p1 varchar(50),
p2 varchar(50),
p3 varchar(50),
pval varchar(50)
)
go
--插入数据
insert into YourTable values ('A','A','A','PDE,PPP,POR'),
('A','A','B','PDE,XYZ'),('A','B','A','PDE,RRR')
go
--查看示例数据
SELECT p1, p2, p3 , pval FROM YourTable
go
--要求的结果
SELECT p1,p2,p3, LTRIM(RTRIM(Split.a.value('.', 'VARCHAR(100)'))) as Value1 , ROW_NUMBER() OVER(PARTITION BY id ORDER BY id ASC)-1 AS SequenceNo
FROM
(SELECT ROW_NUMBER() over (order by (SELECT NULL)) AS ID, p1,p2,p3, pval, CAST ('<M>' + REPLACE(pval, ',', '</M><M>') + '</M>' AS XML) AS Data from YourTable
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
go
--创建表
Create table YourTable
(
p1 varchar(50),
p2 varchar(50),
p3 varchar(50),
pval varchar(50)
)
go
--插入数据
insert into YourTable values ('A','A','A','PDE,PPP,POR'),
('A','A','B','PDE,XYZ'),('A','B','A','PDE,RRR')
go
--查看示例数据
SELECT p1, p2, p3 , pval FROM YourTable
go
--要求的结果
SELECT p1,p2,p3, LTRIM(RTRIM(Split.a.value('.', 'VARCHAR(100)'))) as Value1 , ROW_NUMBER() OVER(PARTITION BY id ORDER BY id ASC)-1 AS SequenceNo
FROM
(SELECT ROW_NUMBER() over (order by (SELECT NULL)) AS ID, p1,p2,p3, pval, CAST ('<M>' + REPLACE(pval, ',', '</M><M>') + '</M>' AS XML) AS Data from YourTable
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
go
所有元素是否与示例中的每个元素正好有3个字符?不应在单个单元格中以逗号分隔的形式存储多个值。首先,正如您所看到的,以后使用这些数据是一个混乱和困难的过程,而且它也违反了关系数据库设计的第一种正常形式。所有元素是否都与示例中的每个元素正好3个字符?您不应该在单个单元格中以逗号分隔的形式存储多个值。首先,正如您所看到的,在以后使用这些数据是一个混乱和困难的过程,而且它也违反了关系数据库设计的第一个正常形式。