Sql server 使用联接查询查找重复记录的SQL Server查询
我使用的是SQL Server数据库,其中包含表x、y和映射表xy 表:x 表:XY 我有一个带有X_id、日期、文本的表X,还有一个带有X_id和y_id的映射表xy。 我需要一个查询来找到x的哪些记录是重复的。当x的记录满足以下所有条件时,可以将其视为重复记录 两者的文本相同 日期间隔应为5分钟。 在XY映射表中,两者应具有相同的y_idSql server 使用联接查询查找重复记录的SQL Server查询,sql-server,Sql Server,我使用的是SQL Server数据库,其中包含表x、y和映射表xy 表:x 表:XY 我有一个带有X_id、日期、文本的表X,还有一个带有X_id和y_id的映射表xy。 我需要一个查询来找到x的哪些记录是重复的。当x的记录满足以下所有条件时,可以将其视为重复记录 两者的文本相同 日期间隔应为5分钟。 在XY映射表中,两者应具有相同的y_id 我能够编写一个查询来满足前两个条件,尽管有重复的数据。但我无法编写满足第三个条件的查询,也无法在执行自联接时显示不同的数据。我能想到的满足第三个条件的最简
我能够编写一个查询来满足前两个条件,尽管有重复的数据。但我无法编写满足第三个条件的查询,也无法在执行自联接时显示不同的数据。我能想到的满足第三个条件的最简单方法是将y_ID聚合到一行进行比较 使用a可以让我更容易阅读,但也可以不用它来编写 测试设置: 查询:
;with cte as (
select
x.*
, y_ids = stuff((
select ','+convert(varchar(10),xy.y_id)
from xy
where x.x_id = xy.x_id
order by xy.y_id
for xml path (''), type).value('.','varchar(max)')
,1,1,'')
from x
)
select *
from cte
where exists (
select 1
from cte as i
where i.x_id <> cte.x_id
and abs(datediff(minute,i.date,cte.date))<=5
and i.text = cte.text
and i.y_ids = cte.y_ids
)
不聚合y_ID的方法:
这是另一个示例,如果x在XY中没有关系数据,是否需要忽略条件3? 此示例将忽略条件3
DECLARE @x TABLE(x_id int,[date] datetime, text varchar(10))
insert into @x values
( 1,'2017-02-22 20:40:30.617','txt1')
,( 2,'2017-02-22 20:40:06.103','txt1')
,( 3,'2017-02-22 20:28:21.393','txt2')
,( 4,'2017-02-22 20:28:21.393','txt3')
,( 5,'2017-02-22 20:28:21.394','txt3')
DECLARE @xy TABLE(x_id int, y_id int)
INSERT INTO @xy VALUES
( 1,3 )
,( 1,10)
,( 2,3 )
,( 2,10)
,( 3,5 );
SELECT x.*,xy.* FROM @x AS x
INNER JOIN @x AS ox ON x.x_id!=ox.x_id AND x.text=ox.text AND ABS(DATEDIFF(MINUTE,x.date,ox.date))<=5
OUTER APPLY(
SELECT COUNT(0) AS totaly, SUM(CASE WHEN xy1.y_id+xy2.y_id IS NULL THEN 1 ELSE 0 END) AS NULLROW
FROM (SELECT y_id FROM @xy WHERE x_id=x.x_id) AS xy1 FULL JOIN (SELECT y_id FROM @xy WHERE x_id=ox.x_id) AS xy2 ON xy1.y_id=xy2.y_id
) AS xy
WHERE (xy.totaly>0 and xy.NULLROW=0) OR (xy.totaly=0)
嘿@Sqlzim,我尝试过你的解决方案。它似乎在显示记录,即使y_id不同。@Raghavendra,那是因为我忘了将I.y_id=cte.y_id添加到where。现在修好了。@Raghavendra很乐意帮忙!
create table x(
x_id int
, [date] datetime
, text varchar(32)
);
insert into x values
( 1,'2017-02-22 20:40:30.617','txt1')
,( 2,'2017-02-22 20:40:06.103','txt1')
,( 3,'2017-02-22 20:28:21.393','txt2');
create table xy (
x_id int
, y_id int
);
insert into xy values
( 1,3 )
,( 1,10)
,( 2,3 )
,( 2,10)
,( 3,5 );
;with cte as (
select
x.*
, y_ids = stuff((
select ','+convert(varchar(10),xy.y_id)
from xy
where x.x_id = xy.x_id
order by xy.y_id
for xml path (''), type).value('.','varchar(max)')
,1,1,'')
from x
)
select *
from cte
where exists (
select 1
from cte as i
where i.x_id <> cte.x_id
and abs(datediff(minute,i.date,cte.date))<=5
and i.text = cte.text
and i.y_ids = cte.y_ids
)
+------+---------------------+------+-------+
| x_id | date | text | y_ids |
+------+---------------------+------+-------+
| 1 | 2017-02-22 20:40:30 | txt1 | 3,10 |
| 2 | 2017-02-22 20:40:06 | txt1 | 3,10 |
+------+---------------------+------+-------+
;with cte as (
select x.*
, xy.y_id
, cnt = count(*) over (partition by x.x_id)
from x inner join xy on x.x_id = xy.x_id
)
select x.x_id, x.date, x.text
from cte as x
inner join cte as x2
on x.x_id <> x2.x_id
and x.y_id = x2.y_id
and x.text = x2.text
and x.cnt = x2.cnt
and abs(datediff(minute,x.date,x2.date))<=5
group by x.x_id, x.date, x.text, x.cnt
having count(*) = x.cnt
+------+---------------------+------+
| x_id | date | text |
+------+---------------------+------+
| 1 | 2017-02-22 20:40:30 | txt1 |
| 2 | 2017-02-22 20:40:06 | txt1 |
+------+---------------------+------+
DECLARE @x TABLE(x_id int,[date] datetime, text varchar(10))
insert into @x values
( 1,'2017-02-22 20:40:30.617','txt1')
,( 2,'2017-02-22 20:40:06.103','txt1')
,( 3,'2017-02-22 20:28:21.393','txt2')
,( 4,'2017-02-22 20:28:21.393','txt3')
,( 5,'2017-02-22 20:28:21.394','txt3')
DECLARE @xy TABLE(x_id int, y_id int)
INSERT INTO @xy VALUES
( 1,3 )
,( 1,10)
,( 2,3 )
,( 2,10)
,( 3,5 );
SELECT x.*,xy.* FROM @x AS x
INNER JOIN @x AS ox ON x.x_id!=ox.x_id AND x.text=ox.text AND ABS(DATEDIFF(MINUTE,x.date,ox.date))<=5
OUTER APPLY(
SELECT COUNT(0) AS totaly, SUM(CASE WHEN xy1.y_id+xy2.y_id IS NULL THEN 1 ELSE 0 END) AS NULLROW
FROM (SELECT y_id FROM @xy WHERE x_id=x.x_id) AS xy1 FULL JOIN (SELECT y_id FROM @xy WHERE x_id=ox.x_id) AS xy2 ON xy1.y_id=xy2.y_id
) AS xy
WHERE (xy.totaly>0 and xy.NULLROW=0) OR (xy.totaly=0)
x_id date text totaly NULLROW
----------- ----------------------- ---------- ----------- -----------
1 2017-02-22 20:40:30.617 txt1 2 0
2 2017-02-22 20:40:06.103 txt1 2 0
4 2017-02-22 20:28:21.393 txt3 0 NULL
5 2017-02-22 20:28:21.393 txt3 0 NULL