Sql server 基于最新时间戳的SQL Server表groupby日期和值
考虑下面的表1和表2,以及预期产出的预期结果 所需输出 我的目标是根据最新的Sql server 基于最新时间戳的SQL Server表groupby日期和值,sql-server,date,datetime,Sql Server,Date,Datetime,考虑下面的表1和表2,以及预期产出的预期结果 所需输出 我的目标是根据最新的p_日期(本例为2020-05-31)筛选所有条目,然后在id和mon_year上选择groupby,但仅根据时间戳选择最新条目 最后,将两个表连接起来,并为on和其他表中未找到的值分配NULL(参见第4行) 先谢谢你 这是针对SQL Server 2012的 -- table 1 spent values, desired values commented create table spent ( id b
p_日期
(本例为2020-05-31)筛选所有条目,然后在id
和mon_year
上选择groupby
,但仅根据时间戳选择最新条目
最后,将两个表连接起来,并为on和其他表中未找到的值分配NULL
(参见第4行)
先谢谢你
这是针对SQL Server 2012的
-- table 1 spent values, desired values commented
create table spent
(
id bigint,
mon_year varchar(100),
p_date date,
timestamp datetime,
spent decimal(9,2)
);
insert into spent
values (12345, 'Oct-20', '2020-05-31', '2020-06-18 12:16:56', 100.0), --1
(12345, 'Oct-20', '2020-05-31', '2020-06-18 12:11:56', 150.0),
(12345, 'Nov-20', '2020-05-31', '2020-06-18 12:10:56', 250.0), --2
(12345, 'Mar-20', '2020-05-31', '2020-06-18 12:15:56', 10.0), --3
(12345, 'Jan-20', '2020-05-31', '2020-06-18 12:13:56', 5.0), --4
(12345, 'Mar-20', '2020-05-31', '2020-06-18 12:12:56', 0.0),
(12345, 'Oct-20', '2020-04-30', '2020-05-15 08:16:56', 1000.0),
(12300, 'Mar-20', '2020-04-30', '2020-06-19 12:11:56', 150.0),
(12300, 'Nov-20', '2020-05-31', '2020-06-19 12:10:56', 250.0), --5
(12300, 'Mar-20', '2020-05-31', '2020-06-18 12:15:56', 10.0), --6
(12300, 'Mar-20', '2020-05-31', '2020-06-18 12:12:56', 0.0);
-- table 2 forecast values, desired values commented
create table forecast
(
id bigint,
mon_year varchar(100),
p_date date,
timestamp datetime,
forecast decimal(9,2)
);
insert into forecast values
(12345, 'Oct-20', '2020-05-31', '2020-06-18 12:16:56',50.0 ), --1
(12345, 'Oct-20', '2020-05-31', '2020-06-18 12:11:56', 100.0),
(12345, 'Nov-20', '2020-05-31', '2020-06-18 12:10:56',30.0 ), --2
(12345, 'Mar-20', '2020-05-31', '2020-06-18 12:15:56',40.0 ), --3
(12345, 'Mar-20', '2020-05-31', '2020-06-18 12:12:56', 0.0),
(12345, 'Oct-20', '2020-04-30', '2020-05-15 08:16:56', 0.0),
(12300, 'Mar-20', '2020-04-30', '2020-06-19 12:11:56', 100.0),
(12300, 'Nov-20', '2020-05-31', '2020-06-19 12:10:56',200.0 ), --5
(12300, 'Mar-20', '2020-05-31', '2020-06-18 12:15:56', 100.0), --6
(12300, 'Mar-20', '2020-05-31', '2020-06-18 12:12:56', 0.0);
-- desired final table join, values
create table desired_outcome
(
id bigint,
mon_year varchar(100),
p_date date,
forecast decimal(9,2),
spent decimal(9,2)
);
insert into desired_outcome values
(12345, 'Oct-20', '2020-05-31', 50.0, 100.0 ), --1
(12345, 'Nov-20', '2020-05-31', 30.0, 250.0 ), --2
(12345, 'Mar-20', '2020-05-31', 40.0, 10.0 ), --3
(12345, 'Jan-20', '2020-05-31', NULL, 5.0 ), --4
(12300, 'Nov-20', '2020-05-31', 200.0, 250.0), --5
(12300, 'Mar-20', '2020-05-31', 100.0, 10.0); --6
下面的代码运行到公共表表达式,以使用
行号()
窗口函数获得每个周一年\id
组合的最新时间戳记录,然后使用完全联接
将它们组合在一起。full join
允许存在支出或预测。要从两个表中获取最新日期,请事先运行一个变量以获取最大日期。注释代码如下:
-- Get the maximum date available from either table
declare @max_date date = (
select
max(p_date)
from (
select distinct
p_date
from spent
union
select distinct
p_date
from forecast
) as x
);
with latest_spend as (
select
id
,mon_year
,p_date
,spent
-- The partition by is the grouping you asked for, the order by is to get the latest, and this ranks them newest to oldest
,row_number() over (partition by mon_year, id order by [timestamp] desc) as row_no
from spent
where p_date = @max_date
)
,
latest_forecast as (
select
id
,mon_year
,p_date
,forecast
-- The partition by is the grouping you asked for, the order by is to get the latest, and this ranks them newest to oldest as the first cte.
,row_number() over (partition by mon_year, id order by [timestamp] desc) as row_no
from forecast
where p_date = @max_date
)
select
ls.id
,ls.mon_year
,ls.p_date
,lf.forecast
,ls.spent
from latest_spend as ls
full join latest_forecast as lf on ls.mon_year = lf.mon_year
and ls.id = lf.id
and lf.row_no = 1 -- full join, so either a spend or a forecast can be present.
where ls.row_no =1
order by id desc, mon_year asc;
到目前为止,您尝试了什么,为什么不起作用?我尝试按id和p_日期分组,并应用where子句,其中p_日期等于最大值(p_日期);我遇到了一些问题,在datetime部分,甚至在我陈述逻辑的顺序上都有不足之处。有什么想法吗?谢谢!!如何将p_date
添加到脚本中,使其始终默认为提供的最新最大值,而不是将其(即“2020-05-31”)编码到脚本中?我已对答案进行了修改,以适应这一情况,使用一个变量来评估两个表中的最大日期。