Sql 清除重复的按时间顺序排列的值

Sql 清除重复的按时间顺序排列的值,sql,sql-server,tsql,sql-server-2008-r2,Sql,Sql Server,Tsql,Sql Server 2008 R2,我正在尝试清理一些按时间顺序排列的数据,以删除重复的按时间顺序排列的数据 示例表: 如您所见,同一个人在同一部门可能有同一部门但有多个生效日期。我想通过一个查询来澄清这一点,以便只为每个部门的更改指定第一个日期。然而,我不想删除有人从部门50变为null然后又回到50的实例,因为这些都是位置的实际变化 示例输出: 我如何才能做到这一点?这比预期的更困难: declare @temp as table (emp_id int, department int,effective_date date)

我正在尝试清理一些按时间顺序排列的数据,以删除重复的按时间顺序排列的数据

示例表:

如您所见,同一个人在同一部门可能有同一部门但有多个生效日期。我想通过一个查询来澄清这一点,以便只为每个部门的更改指定第一个日期。然而,我不想删除有人从部门50变为null然后又回到50的实例,因为这些都是位置的实际变化

示例输出:


我如何才能做到这一点?

这比预期的更困难:

declare @temp as table (emp_id int, department int,effective_date date)


insert into @temp
values
    (1,50,'2015-04-01')
,   (1,50,'2015-05-22')
,   (1, null       ,'2015-07-04')
,   (1, null       ,'2015-07-24')
,   (1, null       ,'2015-07-30')
,   (1,50,'2015-09-07')
,   (1,50,'2016-01-16')
,   (1, null       ,'2016-04-23')
,   (2,60,'2015-01-20')
,   (2,60,'2015-11-22')
,   (2,60,'2016-07-20')
,   (3,50,'2015-04-02')
,   (3,50,'2015-07-15')
,   (3,60,'2016-01-25')


;with cte as
(
--Please not I am changing null to -1 for comparison    
select emp_id,isnull(department,-1) department,effective_date
  ,row_number() over (partition by emp_id order by effective_date) rn 
from @temp
)
,cte2 as
(
--Compare to next record
select cte.*
    ,ctelast.emp_id cte2Emp
    ,ctelast.department cte2dept
    ,ctelast.effective_date cte2ED
    ,isSame = case when cte.department=ctelast.department then 1 else 0 end
from cte
    join cte ctelast
        on cte.emp_id=ctelast.emp_id and cte.rn = ctelast.rn-1 
)

/*
Result of above:
emp_id  department  effective_date  rn  cte2Emp cte2dept    cte2ED  isSame
1   50  2015-04-01  1   1   50  2015-05-22  1
1   50  2015-05-22  2   1   -1  2015-07-04  0
1   -1  2015-07-04  3   1   -1  2015-07-24  1
1   -1  2015-07-24  4   1   -1  2015-07-30  1
1   -1  2015-07-30  5   1   50  2015-09-07  0
1   50  2015-09-07  6   1   50  2016-01-16  1
1   50  2016-01-16  7   1   -1  2016-04-23  0
2   60  2015-01-20  1   2   60  2015-11-22  1
2   60  2015-11-22  2   2   60  2016-07-20  1
3   50  2015-04-02  1   3   50  2015-07-15  1
3   50  2015-07-15  2   3   60  2016-01-25  0
*/

--Now you want both the first record and then any changes


select emp_id,department,effective_date from cte2 where rn=1
union all
select cte2emp,cte2dept,cte2.cte2ED from cte2 where isSame=0
order by 1,3

/*
result:
emp_id  department  effective_date
1   50  2015-04-01
1   -1  2015-07-04
1   50  2015-09-07
1   -1  2016-04-23
2   60  2015-01-20
3   50  2015-04-02
3   60  2016-01-25
*/
我的解决办法是

DECLARE @myTable  TABLE (emp_id INT, department INT, effective_date DATE);

INSERT INTO @myTable  VALUES
(1, 50  , '2015-04-01'),
(1, 50  , '2015-05-22'),
(1, null, '2015-07-04'),
(1, null, '2015-07-24'),
(1, null, '2015-07-30'),
(1, 50  , '2015-09-07'),
(1, 50  , '2016-01-16'),
(1, null, '2016-04-23'),
(2, 60  , '2015-01-20'),
(2, 60  , '2015-11-22'),
(2, 60  , '2016-07-20'),
(3, 50  , '2015-04-02'),
(3, 50  , '2015-07-15'),
(3, 60  , '2016-01-25')


;WITH T AS (
    SELECT *, 
        RN = ROW_NUMBER() OVER(PARTITION BY emp_id ORDER BY effective_date)
    FROM @myTable 
)
SELECT T1.emp_id, T1.department, T1.effective_date 
FROM 
    T T1 
    LEFT JOIN T T2 ON T1.emp_id = T2.emp_id AND T1.RN -1 = T2.RN 
WHERE (CASE WHEN ISNULL(T1.department,'') = ISNULL(T2.department,'') THEN 1 ELSE 0 END) = 0
ORDER BY T1.emp_id, T1.RN
结果:

emp_id      department  effective_date
----------- ----------- --------------
1           50          2015-04-01
1           NULL        2015-07-04
1           50          2015-09-07
1           NULL        2016-04-23
2           60          2015-01-20
3           50          2015-04-02
3           60          2016-01-25

(7 row(s) affected)
要删除重复值,请执行以下操作:

;WITH T AS (
    SELECT *, 
        RN = ROW_NUMBER() OVER(PARTITION BY emp_id ORDER BY effective_date)
    FROM @myTable 
)
DELETE T1
FROM 
    T T1 
    LEFT JOIN T T2 ON T1.emp_id = T2.emp_id AND T1.RN -1 = T2.RN 
WHERE ( CASE 
        WHEN ISNULL(T1.department,'') <> ISNULL(T2.department,'') THEN 1 
        ELSE 0 END ) = 0
where子句的替代

WHERE ( CASE WHEN T1.department <> T2.department 
            OR (T1.department IS NULL AND T2.department IS NOT NULL) 
            OR (T2.department IS NULL AND T1.department IS NOT NULL) 
        THEN 1 ELSE 0 END ) = 0

始终使用标记指定数据库引擎版本以获得最佳性能answer@FLICKER完成,谢谢。@DouglasGaskell这是您想要的输出还是什么?如果没有,请发布所需的输出。@Sami这是所需的输出是的,但是我在示例表和示例输出中添加了其他变量,以说明其他变量。@downvoter,想解释一下我能做些什么来改进这篇文章吗?很好,但是你需要从上一个查询中更新源表,使用另一个CTE,不过你需要更新源表。逻辑方面做得很好。我也添加了删除脚本。
;WITH T AS (
    SELECT *, 
        RN = ROW_NUMBER() OVER(PARTITION BY emp_id ORDER BY effective_date)
    FROM @myTable 
)
DELETE T1
FROM 
    T T1 
    LEFT JOIN T T2 ON T1.emp_id = T2.emp_id AND T1.RN -1 = T2.RN 
WHERE ( CASE 
        WHEN ISNULL(T1.department,'') <> ISNULL(T2.department,'') THEN 1 
        ELSE 0 END ) = 0
WHERE ( CASE WHEN T1.department <> T2.department 
            OR (T1.department IS NULL AND T2.department IS NOT NULL) 
            OR (T2.department IS NULL AND T1.department IS NOT NULL) 
        THEN 1 ELSE 0 END ) = 0