日期绑定SQL分组依据
我的数据集如下所示:日期绑定SQL分组依据,sql,sql-server,tsql,group-by,Sql,Sql Server,Tsql,Group By,我的数据集如下所示: StartDate EndDate InstrumentID Dimension DimensionValue 2018-01-01 2018-01-01 123 Currency GBP 2018-01-02 2018-01-02 123 Currency GBP 2018-01-03 2018-01-03 123 Currency USD 2018-01-04 2018-
StartDate EndDate InstrumentID Dimension DimensionValue
2018-01-01 2018-01-01 123 Currency GBP
2018-01-02 2018-01-02 123 Currency GBP
2018-01-03 2018-01-03 123 Currency USD
2018-01-04 2018-01-04 123 Currency USD
2018-01-05 2018-01-05 123 Currency GBP
2018-01-06 2018-01-06 123 Currency GBP
SELECT
MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
, [InstrumentID]
, Dimension
, DimensionValue
FROM #Worktable
GROUP BY InstrumentID, Dimension, DimensionValue
我希望将此数据集转换为日期绑定数据集,如下所示:
StartDate EndDate InstrumentID Dimension DimensionValue
2018-01-01 2018-01-02 123 Currency GBP
2018-01-03 2018-01-04 123 Currency USD
2018-01-05 2018-01-06 123 Currency GBP
我考虑过这样编写SQL:
StartDate EndDate InstrumentID Dimension DimensionValue
2018-01-01 2018-01-01 123 Currency GBP
2018-01-02 2018-01-02 123 Currency GBP
2018-01-03 2018-01-03 123 Currency USD
2018-01-04 2018-01-04 123 Currency USD
2018-01-05 2018-01-05 123 Currency GBP
2018-01-06 2018-01-06 123 Currency GBP
SELECT
MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
, [InstrumentID]
, Dimension
, DimensionValue
FROM #Worktable
GROUP BY InstrumentID, Dimension, DimensionValue
然而,这显然不起作用,因为它将忽略英镑日期的变化,只将一个记录与2018-01-01的开始日期和2018-01-06的结束日期一起分组
有没有办法做到这一点并达到我要求的日期
谢谢更新,我只是想到了这个,我还不能测试它,我想它会按照你想要的方式工作
Select StartDate, EndDate, InstrumentID, Dimension, DimensionValue From (
SELECT
StartDate AS StartDate
, EndDate AS EndDate
, [InstrumentID]
, Dimension
, DimensionValue
, Count(*)
FROM #Worktable
GROUP BY InstrumentID, StartDate, EndDate, Dimension, DimensionValue) x
希望这有帮助 更新,我只是想到了这一点,我还不能测试它,我想它会按照你想要的方式工作
Select StartDate, EndDate, InstrumentID, Dimension, DimensionValue From (
SELECT
StartDate AS StartDate
, EndDate AS EndDate
, [InstrumentID]
, Dimension
, DimensionValue
, Count(*)
FROM #Worktable
GROUP BY InstrumentID, StartDate, EndDate, Dimension, DimensionValue) x
希望这有帮助 这是一个常见的缺口和孤岛问题。有很多关于如何做到这一点的例子;例如:
WITH VTE AS(
SELECT CONVERT(date,StartDate) AS StartDate,
CONVERT(Date,EndDate) AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM (VALUES('20180101','20180101',123,'Currency','GBP'),
('20180102','20180102',123,'Currency','GBP'),
('20180103','20180103',123,'Currency','USD'),
('20180104','20180104',123,'Currency','USD'),
('20180105','20180105',123,'Currency','GBP'),
('20180106','20180106',123,'Currency','GBP')) V(StartDate,EndDate,InstrumentID,Dimension,DimensionValue)),
Grps AS (
SELECT StartDate,
EndDate,
InstrumentID,
Dimension,
DimensionValue,
ROW_NUMBER() OVER (PARTITION BY InstrumentID, Dimension ORDER BY StartDate) -
ROW_NUMBER() OVER (PARTITION BY InstrumentID, Dimension, DimensionValue ORDER BY StartDate) AS Grp
FROM VTE)
SELECT MIN(StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM Grps
GROUP BY InstrumentID,
Dimension,
DimensionValue,
Grp
ORDER BY StartDate;
这是一个常见的缺口和孤岛问题。有很多关于如何做到这一点的例子;例如:
WITH VTE AS(
SELECT CONVERT(date,StartDate) AS StartDate,
CONVERT(Date,EndDate) AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM (VALUES('20180101','20180101',123,'Currency','GBP'),
('20180102','20180102',123,'Currency','GBP'),
('20180103','20180103',123,'Currency','USD'),
('20180104','20180104',123,'Currency','USD'),
('20180105','20180105',123,'Currency','GBP'),
('20180106','20180106',123,'Currency','GBP')) V(StartDate,EndDate,InstrumentID,Dimension,DimensionValue)),
Grps AS (
SELECT StartDate,
EndDate,
InstrumentID,
Dimension,
DimensionValue,
ROW_NUMBER() OVER (PARTITION BY InstrumentID, Dimension ORDER BY StartDate) -
ROW_NUMBER() OVER (PARTITION BY InstrumentID, Dimension, DimensionValue ORDER BY StartDate) AS Grp
FROM VTE)
SELECT MIN(StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM Grps
GROUP BY InstrumentID,
Dimension,
DimensionValue,
Grp
ORDER BY StartDate;
您需要使用密集等级,如:
with x as(
select DENSE_RANK() OVER
(PARTITION BY DimensionValue) AS Rank , *
from Worktable
) select StartDate AS StartDate
, EndDate AS EndDate
, [InstrumentID]
, Max(Dimension) AS Dimension
, DimensionValue, Rank
FROM x
GROUP BY InstrumentID, StartDate, EndDate, DimensionValue,Rank
您需要使用密集等级,如:
with x as(
select DENSE_RANK() OVER
(PARTITION BY DimensionValue) AS Rank , *
from Worktable
) select StartDate AS StartDate
, EndDate AS EndDate
, [InstrumentID]
, Max(Dimension) AS Dimension
, DimensionValue, Rank
FROM x
GROUP BY InstrumentID, StartDate, EndDate, DimensionValue,Rank
这是一种缺口和孤岛的形式。但是因为有开始日期和结束日期,你需要小心。我建议使用滞后和累积总和:
select InstrumentID, Dimension, DimensionValue,
min(startdate) as startdate, max(enddate) as enddate
from (select w.*,
sum(case when prev_enddate = startdate then 0 else 1 end)
over (partition by InstrumentID, Dimension,
DimensionValue order by startdate) as grp
from (select w.*,
lag(enddate) over (partition by InstrumentID, Dimension, DimensionValue
order by startdate) as prev_enddate
from #worktable w
) w
group by InstrumentID, Dimension, DimensionValue, grp
order by InstrumentID, Dimension, DimensionValue, min(startdate);
这是一种缺口和孤岛的形式。但是因为有开始日期和结束日期,你需要小心。我建议使用滞后和累积总和:
select InstrumentID, Dimension, DimensionValue,
min(startdate) as startdate, max(enddate) as enddate
from (select w.*,
sum(case when prev_enddate = startdate then 0 else 1 end)
over (partition by InstrumentID, Dimension,
DimensionValue order by startdate) as grp
from (select w.*,
lag(enddate) over (partition by InstrumentID, Dimension, DimensionValue
order by startdate) as prev_enddate
from #worktable w
) w
group by InstrumentID, Dimension, DimensionValue, grp
order by InstrumentID, Dimension, DimensionValue, min(startdate);
请尝试以下操作:
WITH CTE AS(
SELECT StartDate::DATE AS StartDate,
EndDate::DATE AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM (VALUES('20180101','20180101',123,'Currency','GBP'),
('20180102','20180102',123,'Currency','GBP'),
('20180103','20180103',123,'Currency','USD'),
('20180104','20180104',123,'Currency','USD'),
('20180105','20180105',123,'Currency','GBP'),
('20180106','20180106',123,'Currency','GBP')) V(StartDate,EndDate,InstrumentID,Dimension,DimensionValue))
SELECT startdate
, enddate
, instrumentid
, dimension
, dimensionvalue
FROM (
SELECT *
, CASE WHEN (LAG(enddate, 1) OVER(PARTITION BY dimensionvalue ORDER BY startdate) IS NULL) OR (enddate - LAG(enddate, 1) OVER(PARTITION BY dimensionvalue ORDER BY startdate) <> 1) THEN 0
ELSE 1 END is_valid
FROM CTE
) a
WHERE is_valid = 1
ORDER BY startdate;
创建临时表要归功于@Lamu。请尝试以下操作:
WITH CTE AS(
SELECT StartDate::DATE AS StartDate,
EndDate::DATE AS EndDate,
InstrumentID,
Dimension,
DimensionValue
FROM (VALUES('20180101','20180101',123,'Currency','GBP'),
('20180102','20180102',123,'Currency','GBP'),
('20180103','20180103',123,'Currency','USD'),
('20180104','20180104',123,'Currency','USD'),
('20180105','20180105',123,'Currency','GBP'),
('20180106','20180106',123,'Currency','GBP')) V(StartDate,EndDate,InstrumentID,Dimension,DimensionValue))
SELECT startdate
, enddate
, instrumentid
, dimension
, dimensionvalue
FROM (
SELECT *
, CASE WHEN (LAG(enddate, 1) OVER(PARTITION BY dimensionvalue ORDER BY startdate) IS NULL) OR (enddate - LAG(enddate, 1) OVER(PARTITION BY dimensionvalue ORDER BY startdate) <> 1) THEN 0
ELSE 1 END is_valid
FROM CTE
) a
WHERE is_valid = 1
ORDER BY startdate;
创建临时表要归功于@Lamu。这是一个非常对称的源数据集。当天气不好时会发生什么?日期范围重叠、维度值重叠、开始日期-结束日期等。这是一个非常对称的源数据集。当天气不好时会发生什么?日期范围重叠、维度值重叠、开始日期-结束日期等。这不会给出OP想要的结果。这不会给出OP想要的结果。