基于日期的数据SQL聚合
我确信这是一个非常愚蠢的问题,我有一个愚蠢的时刻。 考虑下面的基本情景,这是一个非常小的场景,与现实有很多不同的维度和措施: 我需要得到的是预期的输出。 因此,参数中定义的输入日期和输出日期之间的所有成本都包括在内。但是,仅包括最新的PID-定义为: 1-其中PID按顺序运行,或根据日期重叠最新的PID,只要两者在@output date不活动 2-如果在@output date有两个PID处于活动状态,则同时显示这两个PID 我一辈子都不知道如何在SQL中做到这一点,请注意,is必须是非动态的,并且不使用任何CTE。不幸的是,只使用带有子查询的基本SQL 显然,返回必要的ID和PID列表很容易:基于日期的数据SQL聚合,sql,sql-server,Sql,Sql Server,我确信这是一个非常愚蠢的问题,我有一个愚蠢的时刻。 考虑下面的基本情景,这是一个非常小的场景,与现实有很多不同的维度和措施: 我需要得到的是预期的输出。 因此,参数中定义的输入日期和输出日期之间的所有成本都包括在内。但是,仅包括最新的PID-定义为: 1-其中PID按顺序运行,或根据日期重叠最新的PID,只要两者在@output date不活动 2-如果在@output date有两个PID处于活动状态,则同时显示这两个PID 我一辈子都不知道如何在SQL中做到这一点,请注意,is必须是非动态的
declare @input_date date ='2006-01-01'
declare @output_date date ='2006-12-31'
select a.PID, a.ID
from #tmp a
where date_from <=@output_date and date_to >=@input_date
我在使用CTE方面取得了一些进展,因此您可以看到,如果我能够:
drop table #tmp
CREATE TABLE #tmp (
[date_from] [datetime] NOT NULL,
[date_to] [datetime] NOT NULL,
[ID] [nvarchar](25) NOT NULL,
[PID] [nvarchar](25) NOT NULL,
[cost] [float] NULL
) ON [PRIMARY]
INSERT #tmp VALUES('2005-1-1','2005-1-31','10001','X123',1254.32)
INSERT #tmp VALUES('2000-10-10','2006-8-21','10005','TEST01',21350.9636378758)
INSERT #tmp VALUES('2006-8-22','2099-12-31','10005','TEST02',22593.4926163943)
INSERT #tmp VALUES('2006-1-1','2099-12-31','10006','X01',22458.3342354444)
INSERT #tmp VALUES('2006-2-8','2099-12-31','10006','X02',22480.3772331959)
INSERT #tmp VALUES('2006-1-1','2006-2-7','10007','AB01',565.416874152212)
INSERT #tmp VALUES('2006-2-8','2006-7-31','10007','AA05',19108.3206482165)
declare @input_date date ='2006-01-01'
declare @output_date date ='2006-12-31'
;with cte as (
select t.id,t.PID,t.cost,t.date_from,t.date_to ,
iif(date_To >= @output_date OR max_date_To is not null,PID,NULL) as PID2,
b.total_id_cost
from #tmp t
left join (select ID,max(date_to) as max_date_to
from #tmp
where date_from <=@output_date and date_to >=@input_date
group by ID) a
on t.ID = a.ID and t.date_to = a.max_date_to
left join (Select ID, sum(cost) as total_id_cost
from #tmp
where date_from <=@output_date and date_to >=@input_date
group by ID) b
on t.ID = b.ID
where date_from <=@output_date and date_to >=@input_date )
select distinct ID,PID2,
iif(ID in (
select ID
from cte
where PID2 IS NULL)
and ID not in (select ID
from cte
where PID IS NOT NULL
group by ID
having count (distinct PID2) >1 ), cte.total_id_cost, cost) as cost
from cte
where PID2 is not null;
您好,您可以尝试以下查询: 选择a.resource\u id、maxa.post\u id PID、SUMa.cost 来自tmp a 其中date\u from=@input\u date 按a.resource\u id分组
按a.resource\u id订购 我认为这可能有效:
SELECT
t1.ID,
q1.PID,
SUM(t1.cost)
FROM
Table AS t1
JOIN
(
SELECT
q2.ID,
t2.PID
FROM
(
SELECT
ID,
MAX(date_to) AS maxdate
FROM
Table
GROUP BY
ID
) AS q2
JOIN
table AS t2
ON
q2.ID = t2.ID
AND
q2.maxdate = t2.date_to
) AS q1
ON
t1.ID = q1.ID
AND
t1.PID = q1.PID
GROUP BY
t1.ID,
q1.PID
这是一个没有CTE的查询。质疑的理念: 1查找连续日期,并在每个id中创建不同的组 2查找每个组的最小和最大日期、成本总和 3输入参数限制
declare @date_from date = '20060101'
declare @date_to date = '20061231'
declare @myTable table(
date_from date
, date_to date
, id int
, pid varchar(30)
, cost decimal(10,2)
)
insert into @myTable values
('20050101', '20050201', 10001, 'x123', 1254.32)
, ('20001010', '20060821', 10005, 'test01', 21350.96)
, ('20060822', '20991231', 10005, 'test02', 22593.49)
, ('20060101', '20991231', 10006, 'x01', 22548.33)
, ('20060208', '20991231', 10006, 'x02', 22480.38)
, ('20060101', '20060207', 10007, 'abo1', 565.42)
, ('20060208', '20060731', 10007, 'abo2', 19108.32)
select
date_from = min(date_from), date_to = max(date_to)
, id, pid = max(case when date_to = max_date_to then pid end)
, cost = sum(cost)
from (
select
a.date_from, a.date_to, a.id, a.pid, a.cost, a.rn, grp = sum(b.ss)
, max_date_to = max(a.date_to) over (partition by a.id, sum(b.ss))
from
(
select
a.*, ss = case when datediff(dd, b.date_to, a.date_from) = 1 then 0 else 1 end
from
(
select
*, rn = row_number() over (partition by id order by date_from)
from
@myTable
) a
left join (
select
*, rn = row_number() over (partition by id order by date_from)
from
@myTable
) b on a.id = b.id and a.rn - 1 = b.rn
) a
left join (
select
a.*, ss = case when datediff(dd, b.date_to, a.date_from) = 1 then 0 else 1 end
from
(
select
*, rn = row_number() over (partition by id order by date_from)
from
@myTable
) a
left join (
select
*, rn = row_number() over (partition by id order by date_from)
from
@myTable
) b on a.id = b.id and a.rn - 1 = b.rn
) b on a.id = b.id and a.rn >= b.rn
group by a.date_from, a.date_to, a.id, a.pid, a.cost, a.rn
) t
group by id, grp, max_date_to
having min(date_from) <= @date_from and max(date_to) >= @date_to
order by id
结果与您提供的输出略有不同。但是:
当输入为2006年1月1日时,id=10006且pid=X02 date\u from=08/02/2006为1
id=10007日期至=31/07/2006时为2,输入为31/12/2006
所以,我认为查询是正确的
使用cte以可读性更强的格式,因此在一次查询中似乎有几个问题需要解决 我们需要与最新日期匹配的PID。这并不太困难,可以通过将数据与查找最新日期的自身聚合连接起来来解决 如果两个PID都处于活动状态,即从日期和到日期重叠,则两个PID都必须显示。我发现这更棘手。最后,我做了一个查询,找到那些确实重叠并且符合日期的,并对其进行了计数。然后将此计数用作1上联接的条件。因此,它可以有条件地选择与最新日期匹配的PID 最后,使用上面的结果,你可以求和得到成本。结果查询有点像怪物,但这里就是。 如果它没有涵盖其他未详细说明的场景,请务必让我知道
DECLARE @Data TABLE (date_from DATETIME, date_to DATETIME, ID INT, PID NVARCHAR(50), COST MONEY)
INSERT @Data VALUES('2005-1-1','2005-1-31','10001','X123',1254.32)
INSERT @Data VALUES('2000-10-10','2006-8-21','10005','TEST01',21350.9636378758)
INSERT @Data VALUES('2006-8-22','2099-12-31','10005','TEST02',22593.4926163943)
INSERT @Data VALUES('2006-1-1','2099-12-31','10006','X01',22458.3342354444)
INSERT @Data VALUES('2006-2-8','2099-12-31','10006','X02',22480.3772331959)
INSERT @Data VALUES('2006-1-1','2006-2-7','10007','AB01',565.416874152212)
INSERT @Data VALUES('2006-2-8','2006-7-31','10007','AA05',19108.3206482165)
declare @input_date date ='2006-01-01'
declare @output_date date ='2006-12-31'
select
a.ID,
PIDForMaxDateThatMatches.PID,
SUM(a.cost) as cost
from
@Data a
inner join (
-- number of PIDs for dates that overlap grouped by ID
select
a.ID,
-- where there's no overlap then we want the count to be 1 so that later we can use it as condition
COUNT(DISTINCT ISNULL(b.PID,'')) as NumberOfPID
from
@Data a
-- may or may not find overlaps
LEFT JOIN @data b ON
b.date_from <=@output_date and
b.date_to >=@input_date and
a.date_from <= b.date_to and
a.date_to >= b.date_from and
a.ID = b.ID and
a.PID <> b.PID
where
a.date_from <=@output_date and
a.date_to >=@input_date
group by
a.ID) as PIDCountForOverlappingMatches ON
a.ID = PIDCountForOverlappingMatches.ID
left join (
-- get the PID that matches the max date_to
select
DataForMaxDate.ID,
DataForMaxDate.date_from,
DataForMaxDate.date_to,
DataForMaxDate.PID
from
@Data as DataForMaxDate
inner join (
-- get the max date_to that matches the criteria
select
ID,
MAX(date_to) as maxDateTo
from
@Data a
where
date_from <=@output_date and
date_to >=@input_date
group by
ID) as MaxToDatePerID on
DataForMaxDate.ID = MaxToDatePerID.ID and
DataForMaxDate.date_to = MaxToDatePerID.maxDateTo) as PIDForMaxDateThatMatches on
a.ID = PIDForMaxDateThatMatches.ID AND
-- if there's no overlapping dates the PID count would be 1, which we'll take the PID that matches the max(date_to)
-- but if there is overlap, then we want both dates to show, thus the from date must also match before we take the PID
(PIDCountForOverlappingMatches.NumberOfPID = 1 OR a.date_from = PIDForMaxDateThatMatches.date_from)
where
a.date_from <= @output_date and
a.date_to >= @input_date
GROUP BY
a.ID,
PIDForMaxDateThatMatches.PID
ORDER BY
a.ID
编辑:DB Fiddle
结果:
ID PID成本
10005测试02 43944.4562
10006 X01 22458.3342
10006x0222480.3772
10007 AA05 19673.7375决定显示哪些行的逻辑是什么?是什么使PID成为最新的?日期从或日期到?为什么不使用cte的限制?这是一个相当随意的限制。您好,显示的逻辑是:如果日期大于@input\u date并且是ID的最新PID记录,即如果PID是顺序的,则只显示最新的,如果它们同时运行,并且日期显示CTE限制是由于接口造成的,那么您的SQL版本是什么?在这种情况下,它可以工作,但是如果最新的PID不是最大PID,那么它会失败,不是吗?我已经编辑了数据作为一个例子好吧,你说的最新PID是什么意思?我以为它是最大的。很遗憾,我恐怕不是。我已经将方案和数据添加到OP中,以便您可以进行测试!嗨,乌兹,谢谢你!但是,您提到的这两项除外责任都应该包括在内。我提供的输出是queryHi所需要的。我以为你在寻找完全覆盖你输入参数的数据。但看起来您正在寻找重叠的间隔。然后将查询中的having子句更改为having mindate_from=@date_from。你会得到你期望的结果。只是为了性能问题谢谢你,这似乎很有效。。。现在尝试放大它!我发现了一个例子,当第二行返回为null时,它不起作用。嗨,亚当,你对新场景的预期结果是什么?嗨,亨利,我只是想自己解决这个问题!问题出在ID 10007上,我认为在这种情况下,全部金额应该显示在PID AB01上,因为其他两个日期都在结束日期之前。直到LiveData抛出它,我才真正考虑过这个场景。好吧,在这种情况下,我将打破原始查询,而不是有条件地匹配,在同一集中做两个左连接,1个覆盖日期重叠,另一个覆盖没有重叠的最大日期。这是DB小提琴
date_from date_to id pid cost
------------------------------------------------
2000-10-10 2099-12-31 10005 test02 43944.45
2006-01-01 2099-12-31 10006 x01 22548.33
DECLARE @Data TABLE (date_from DATETIME, date_to DATETIME, ID INT, PID NVARCHAR(50), COST MONEY)
INSERT @Data VALUES('2005-1-1','2005-1-31','10001','X123',1254.32)
INSERT @Data VALUES('2000-10-10','2006-8-21','10005','TEST01',21350.9636378758)
INSERT @Data VALUES('2006-8-22','2099-12-31','10005','TEST02',22593.4926163943)
INSERT @Data VALUES('2006-1-1','2099-12-31','10006','X01',22458.3342354444)
INSERT @Data VALUES('2006-2-8','2099-12-31','10006','X02',22480.3772331959)
INSERT @Data VALUES('2006-1-1','2006-2-7','10007','AB01',565.416874152212)
INSERT @Data VALUES('2006-2-8','2006-7-31','10007','AA05',19108.3206482165)
declare @input_date date ='2006-01-01'
declare @output_date date ='2006-12-31'
select
a.ID,
PIDForMaxDateThatMatches.PID,
SUM(a.cost) as cost
from
@Data a
inner join (
-- number of PIDs for dates that overlap grouped by ID
select
a.ID,
-- where there's no overlap then we want the count to be 1 so that later we can use it as condition
COUNT(DISTINCT ISNULL(b.PID,'')) as NumberOfPID
from
@Data a
-- may or may not find overlaps
LEFT JOIN @data b ON
b.date_from <=@output_date and
b.date_to >=@input_date and
a.date_from <= b.date_to and
a.date_to >= b.date_from and
a.ID = b.ID and
a.PID <> b.PID
where
a.date_from <=@output_date and
a.date_to >=@input_date
group by
a.ID) as PIDCountForOverlappingMatches ON
a.ID = PIDCountForOverlappingMatches.ID
left join (
-- get the PID that matches the max date_to
select
DataForMaxDate.ID,
DataForMaxDate.date_from,
DataForMaxDate.date_to,
DataForMaxDate.PID
from
@Data as DataForMaxDate
inner join (
-- get the max date_to that matches the criteria
select
ID,
MAX(date_to) as maxDateTo
from
@Data a
where
date_from <=@output_date and
date_to >=@input_date
group by
ID) as MaxToDatePerID on
DataForMaxDate.ID = MaxToDatePerID.ID and
DataForMaxDate.date_to = MaxToDatePerID.maxDateTo) as PIDForMaxDateThatMatches on
a.ID = PIDForMaxDateThatMatches.ID AND
-- if there's no overlapping dates the PID count would be 1, which we'll take the PID that matches the max(date_to)
-- but if there is overlap, then we want both dates to show, thus the from date must also match before we take the PID
(PIDCountForOverlappingMatches.NumberOfPID = 1 OR a.date_from = PIDForMaxDateThatMatches.date_from)
where
a.date_from <= @output_date and
a.date_to >= @input_date
GROUP BY
a.ID,
PIDForMaxDateThatMatches.PID
ORDER BY
a.ID