在Microsoft SQL Server中,有没有比我目前所做的更好的方法来在两个方向上填充空值?
我正在尝试找出如何在Microsoft SQL Server中向上填充空值。这在甲骨文中非常简单,但老板并不同意。经过六个小时的谷歌搜索,我终于决定发帖 在结果集中 值是我们的时间戳数据,包含所有可能的日期时间 VALUE2是我们的填充空向下方法 VALUE3是我们的向上填充空值方法 考虑到这张假烂桌子在Microsoft SQL Server中,有没有比我目前所做的更好的方法来在两个方向上填充空值?,sql,sql-server,sql-server-2014,Sql,Sql Server,Sql Server 2014,我正在尝试找出如何在Microsoft SQL Server中向上填充空值。这在甲骨文中非常简单,但老板并不同意。经过六个小时的谷歌搜索,我终于决定发帖 在结果集中 值是我们的时间戳数据,包含所有可能的日期时间 VALUE2是我们的填充空向下方法 VALUE3是我们的向上填充空值方法 考虑到这张假烂桌子 -- Test Data & Table DECLARE @TEST001 TABLE (TIME_STAMP datetime, TagA integer, TagB
-- Test Data & Table
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
TagA integer,
TagB integer,
TagC integer)
-- Insert test Values
INSERT INTO @TEST001
VALUES
('2017-01-21 00:01:00.042', NULL, NULL, 87),
('2017-01-21 00:04:10.155', NULL, 1239, NULL),
('2017-01-21 00:04:10.959', NULL, NULL, 86),
('2017-01-21 00:06:49.401', NULL, 1240, NULL),
('2017-01-21 00:06:59.301', NULL, 1239, NULL),
('2017-01-21 00:07:10.124', 108, NULL, NULL),
('2017-01-21 00:12:11.789', 109, NULL, NULL),
('2017-01-21 00:16:12.190', 108, NULL, NULL),
('2017-01-21 00:16:13.987', 107, NULL, NULL),
('2017-01-21 00:17:31.410', NULL, 1260, NULL),
('2017-01-21 00:17:32.511', NULL, 1261, 87),
('2017-01-21 00:17:32.966', NULL, 1262, NULL)
给定这个查询,它在伪垃圾表中查询TagA
-- Start of Query used in VBS ADODB CONN.
declare @s datetime
declare @e datetime
set @s = '2017-01-21 00:00:00'
set @e = '2017-01-21 23:59:59'
;
-- We need to get all intervals between our two dates.
WITH ALL_INTERVALS AS (
SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
-- We need to include both our real data and all possible intervals.
ALL_TIMES AS (
SELECT
Time_Stamp as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE Time_Stamp BETWEEN @s and @e
UNION ALL
SELECT
TIMES AS TIMES,
NULL AS VALUE
FROM ALL_INTERVALS
),
-- We need to find the real first value and fill all nulls with this value until we hit a new value.
FILL_DOWN AS ( SELECT
TIMES,
VALUE,
ISNULL(VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE2
FROM ALL_TIMES AT
),
-- Our fill up method does not work if our first set of values is null. UGH, this does not work either....crap. It fills our first set
-- With our last real value....
FILL_UP AS (
SELECT
TIMES,
VALUE,
VALUE2,
ISNULL(VALUE2, (SELECT TOP 1 VALUE2 FROM FILL_DOWN WHERE TIMES > FD.TIMES AND VALUE2 IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
FROM FILL_DOWN FD
)
SELECT *
FROM FILL_UP
ORDER BY TIMES ASC
结果
TIMES VALUE VALUE2 VALUE3
2017-01-2100:01:00.000 NULL NULL 107 <---------- This should be 108 from our fake crappy table.
2017-01-2100:01:00.043 NULL NULL 107
2017-01-2100:02:00.000 NULL NULL 107
2017-01-2100:03:00.000 NULL NULL 107
2017-01-2100:04:00.000 NULL NULL 107
2017-01-2100:04:10.157 NULL NULL 107
2017-01-2100:04:10.960 NULL NULL 107
2017-01-2100:05:00.000 NULL NULL 107
2017-01-2100:06:00.000 NULL NULL 107
2017-01-2100:06:49.400 NULL NULL 107
2017-01-2100:06:59.300 NULL NULL 107
2017-01-2100:07:00.000 NULL NULL 107
2017-01-2100:07:10.123 108 108 108
2017-01-2100:08:00.000 NULL 108 108
2017-01-2100:09:00.000 NULL 108 108
2017-01-2100:10:00.000 NULL 108 108
2017-01-2100:11:00.000 NULL 108 108
2017-01-2100:12:00.000 NULL 108 108
2017-01-2100:12:11.790 109 109 109
2017-01-2100:13:00.000 NULL 109 109
2017-01-2100:14:00.000 NULL 109 109
2017-01-2100:15:00.000 NULL 109 109
2017-01-2100:16:00.000 NULL 109 109
2017-01-2100:16:12.190 108 108 108
2017-01-2100:16:13.987 107 107 107
2017-01-2100:17:00.000 NULL 107 107
2017-01-2100:17:31.410 NULL 107 107
2017-01-2100:17:32.510 NULL 107 107
2017-01-2100:17:32.967 NULL 107 107
2017-01-2100:18:00.000 NULL 107 107
2017-01-2100:19:00.000 NULL 107 107
2017-01-2100:20:00.000 NULL 107 107
2017-01-2100:21:00.000 NULL 107 107
2017-01-2100:22:00.000 NULL 107 107
2017-01-2100:23:00.000 NULL 107 107
2017-01-2100:24:00.000 NULL 107 107
2017-01-2100:25:00.000 NULL 107 107
2017-01-2100:26:00.000 NULL 107 107
编辑:
我今天在玩非常大的数据集,发现这是有史以来最快的事情。它在一秒钟内完全符合我的要求。你看不到这个小表格中的速度差异,但是对于大数据集来说,1秒与24分钟是非常大的!谢谢你的帮助,祝你一切顺利
-- Test Data & Table
DECLARE @TEST001 TABLE
(TIME_STAMP datetime,
TagA integer,
TagB integer,
TagC integer)
-- Insert test Values
INSERT INTO @TEST001
VALUES
('2017-01-21 00:01:00.042', NULL, NULL, 87),
('2017-01-21 00:04:10.155', NULL, 1239, NULL),
('2017-01-21 00:04:10.959', NULL, NULL, 86),
('2017-01-21 00:06:49.401', NULL, 1240, NULL),
('2017-01-21 00:06:59.301', NULL, 1239, NULL),
('2017-01-21 00:07:10.124', 108, NULL, NULL),
('2017-01-21 00:12:11.789', 109, NULL, NULL),
('2017-01-21 00:16:12.190', 108, NULL, NULL),
('2017-01-21 00:16:13.987', 107, NULL, NULL),
('2017-01-21 00:17:31.410', NULL, 1260, NULL),
('2017-01-21 00:17:32.511', NULL, 1261, 87),
('2017-01-21 00:17:32.966', NULL, 1262, NULL)
-- Start of Query used in VBS ADODB CONNN
declare @s datetime
declare @e datetime
set @s = '2017-01-01 00:00:00'
set @e = '2017-01-31 23:59:59'
;
WITH ALL_INTERVALS
AS ( SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s),
NULL AS VALUE
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
ALL_TIMES
AS ( SELECT
H.TIME_STAMP as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE TIME_STAMP BETWEEN @s and @e
UNION ALL
SELECT
AI.TIMES AS TIMES,
AI.VALUE AS VALUE
FROM ALL_INTERVALS AI
),
-- JUST INCASE OUR REAL TIME == INTERVAL TIME exactly.
FILL_ACCROSS AS( SELECT TIMES AS TIMES,
ISNULL(AT.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES = AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE
FROM ALL_TIMES AT
),
-- FILL UP AND FILL DOWN.
FILL_UP_DOWN AS ( SELECT
TIMES,
VALUE,
ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES > FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE2,
ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3
FROM FILL_ACCROSS FA
)
--Just a nice display of what is going on.
select FD.TIMES,
FD.VALUE,
FD.VALUE2,
FD.VALUE3,
CASE
WHEN FD.VALUE3 IS NULL THEN (FD.VALUE2)
ELSE (FD.VALUE3)
END AS VALUE4
FROM FILL_UP_DOWN FD order by TIMES ASC
如果您正在寻找替代方案,请尝试此分组
-- Start of Query used in VBS ADODB CONN.
declare @s datetime
declare @e datetime
set @s = '2017-01-21 00:00:00.001'
set @e = '2017-01-21 23:59:59'
;
--select datepart(millisecond,@s);
-- We need to get all intervals between our two dates.
WITH ALL_INTERVALS AS (
SELECT TOP (datediff(mi,@s,@e))
TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s)
FROM sys.all_objects AS s1
CROSS JOIN
sys.all_objects AS s2
),
-- We need to include both our real data and all possible intervals.
ALL_TIMES AS (
SELECT
Time_Stamp as TIMES,
TagA AS VALUE
FROM @TEST001 H
WHERE Time_Stamp BETWEEN @s and @e
UNION ALL
SELECT
TIMES AS TIMES,
NULL AS VALUE
FROM ALL_INTERVALS
),
g1 AS (
SELECT
TIMES,
VALUE,
diff = case
when value=lag(value,1) over(order by TIMES)
or coalesce (value, lag(value,1) over(order by TIMES)) is null
then 0 else 1 end
FROM ALL_TIMES AT
),
g2 as (
select TIMES, VALUE
, grp = sum(diff) over(order by TIMES)
, direction = case sum(diff) over(order by TIMES) when 0 then 1 else -1 end
from g1
)
select
TIMES,
VALUE,
grp,
ISNULL(VALUE,
(SELECT TOP 1 VALUE FROM g2 repl WHERE grp = g2.grp + g2.direction AND VALUE IS NOT NULL ORDER BY TIMES )
) AS VALUE3
from g2;
我真的不明白你想做什么。您可以缩短示例数据并添加所需结果吗?如果您将相关子查询中的顺序更改为从DESC获取VALUE3到ASC,您应该会得到所需的结果。目前,它正在寻找给定日期后的最后一个非空值,而从外观上看,你想要的是第一个非空值。在填充顺序中删除DESC byI冷静下来,吃了一顿冷笑,谢谢@GarethD和Serg,执行仍然需要5秒,但至少它能工作!我发誓我试过删除DESC,但是太晚了,我看不清楚。太棒了,我喜欢不同的方法,谢谢你的发帖!我发现在使用大型数据集时,速度与我最初的查询相同。但是在玩了一整天之后,我想到了上面的编辑。在我的大数据集上完成几乎需要一秒钟。干杯再次感谢您指出需要通过以下方式取消或更改订单。那天我做得不太好,弄不明白。