Sql : 不错。我只会使用(t1.begin,t1.end+INTERVAL'1'天)。我想如果没有这个改变,一些边缘案例可能会显示错误。@Adam:我不知道。因此,也许一个+间隔'2'天可能适合捕捉这种情况2000-01-01-2001-12-31和2002-
:Sql : 不错。我只会使用(t1.begin,t1.end+INTERVAL'1'天)。我想如果没有这个改变,一些边缘案例可能会显示错误。@Adam:我不知道。因此,也许一个+间隔'2'天可能适合捕捉这种情况2000-01-01-2001-12-31和2002-,sql,postgresql,date,Sql,Postgresql,Date,: 不错。我只会使用(t1.begin,t1.end+INTERVAL'1'天)。我想如果没有这个改变,一些边缘案例可能会显示错误。@Adam:我不知道。因此,也许一个+间隔'2'天可能适合捕捉这种情况2000-01-01-2001-12-31和2002-01-01-2002-06-31。这不是每个ID只返回一个范围吗?还有一件事,我起初没有注意到。这两个条件绝对不能同时应用:t1.id=r.id和t1.id>r.id。感谢您的评论和观察。这个额外的条件只适用于我测试这个查询的表。我现在已经将其
不错。我只会使用
(t1.begin,t1.end+INTERVAL'1'天)
。我想如果没有这个改变,一些边缘案例可能会显示错误。@Adam:我不知道。因此,也许一个+间隔'2'天
可能适合捕捉这种情况2000-01-01-2001-12-31
和2002-01-01-2002-06-31
。这不是每个ID只返回一个范围吗?还有一件事,我起初没有注意到。这两个条件绝对不能同时应用:t1.id=r.id和t1.id>r.id
。感谢您的评论和观察。这个额外的条件只适用于我测试这个查询的表。我现在已经将其更改为适合Marcel的表格。嗯,我认为这个解决方案存在一个问题:使用min(开始)时,您得到第一个“开始”条目,使用max(结束)时,您得到最后一个“结束”条目。但是,您可能会丢失第一个“开始”和最后一个“结束”之间所有“开始”和“结束”的信息。您可以计算最小值和最大值之间的差异,然后对每一行计算其各自的最小值和最大值,如果该数字低于第一个,则表示计划中有空闲天数。这就是我最初的想法:)它在我认为的每种情况下都能工作,具体取决于您如何实现:)“有没有一种方法可以不用窗口函数和查询?”--可能没有,因为您需要在某一点或另一点递归合并行。我明白了-谢谢。因此,我的尝试是没有出路的?我的意思是它给了我所有需要的行,我唯一的困难是排除不需要的行。然而,我已经要求我的管理员更新postgresql服务器。希望他会这么好。如果没有递归查询,您可以在curr上找到使用大型(且非常缓慢)连接的边界。开始查看此相关线程:另请参见:
ID BEGIN END
SELECT "ID", MIN("BEGIN"), MAX("END")
FROM ...
GROUP BY "ID"
SELECT "ID", LEAST(tab1."BEGIN", tab2."BEGIN"), GREATEST(tab1."END", tab2."END")
FROM <mytable> AS tab1
JOIN <mytable> AS tab2
ON tab1."ID" = tab2."ID"
AND (tab1."BEGIN", tab1."END" + INTERVAL '2 day') OVERLAPS (tab2."BEGIN", tab2."END")
ORDER BY "ID"
ID BEGIN END
1;"2000-01-01";"2000-03-31"
1;"2000-04-01";"2000-05-31"
1;"2000-04-15";"2000-07-31"
1;"2000-09-01";"2000-10-31"
2;"2000-02-01";"2000-03-15"
2;"2000-01-15";"2000-03-31"
2;"2000-04-01";"2000-04-15"
3;"2000-06-01";"2000-06-15"
3;"2000-07-01";"2000-07-15"
ID BEGIN END
1;"2000-01-01";"2000-07-31"
1;"2000-09-01";"2000-10-31"
2;"2000-01-15";"2000-04-15"
3;"2000-06-01";"2000-06-15"
3;"2000-07-01";"2000-07-15"
WITH
t1 AS (
SELECT id, begin AS time
FROM "nace-8510-test".checkfkt
UNION ALL
SELECT id, end
FROM "nace-8510-test".checkfkt
),
t2 AS (
SELECT Row_Number() OVER(PARTITION BY id ORDER BY time) AS num, id, time
FROM t1 AS t1_1
),
t3 AS (
SELECT t2_1.num - Row_Number() OVER(PARTITION BY t2_1.id ORDER BY t2_1.time, t2_2.time) num1,
t2_1.id, t2_1.time AS begin, t2_2.time AS end
FROM t2 AS t2_1
INNER JOIN t2 AS t2_2
ON t2_1.id = t2_2.id
AND t2_1.num = t2_2.num - 1
WHERE
EXISTS (
SELECT *
FROM "nace-8510-test".checkfkt AS s
WHERE s.id = t2_1.id
AND (s.begin < t2_2.time AND s.end > t2_1.time)
)
OR t2_1.time = t2_2.time
OR t2_1.time + INTERVAL '1 day' = t2_2.time
)
SELECT id, MIN(begin) AS von, MAX(end) AS bis
FROM t3
GROUP BY id, num1
ORDER BY id
select id,
lag(start) over w as prev_start,
lag(end) over w as prev_end,
start,
end,
lead(start) over w as next_start,
lead(end) over w as next_end
from yourtable
window w as (
partition by id
order by start, end
)
WITH RECURSIVE t1_rec ( id, "begin", "end", n ) AS (
SELECT id, "begin", "end", n
FROM (
SELECT
id, "begin", "end",
CASE
WHEN LEAD("begin") OVER (
PARTITION BY id
ORDER BY "begin") <= ("end" + interval '2' day)
THEN 1 ELSE 0 END AS cl,
ROW_NUMBER() OVER (
PARTITION BY id
ORDER BY "begin") AS n
FROM mytable
) s
WHERE s.cl = 1
UNION ALL
SELECT p1.id, p1."begin", p1."end", a.n
FROM t1_rec a
JOIN mytable p1 ON p1.id = a.id
AND p1."begin" > a."begin"
AND (a."begin", a."end" + interval '2' day) OVERLAPS
(p1."begin", p1."end")
)
SELECT t1.id, min(t1."begin"), max(t1."end")
FROM t1_rec t1
LEFT JOIN t1_rec t2 ON t1.id = t2.id
AND t2."end" = t1."end"
AND t2.n < t1.n
WHERE t2.n IS NULL
GROUP BY t1.id, t1.n
ORDER BY t1.id, t1.n;
WITH RECURSIVE rec_stmt ( id, begin, end ) AS (
/* seed statement:
start with only first start and end dates for each id
*/
SELECT id, MIN(begin), MIN(end)
FROM mytable seed_stmt
GROUP BY id
UNION ALL
/* iterative (not really recursive) statement:
append qualifying rows to resultset
*/
SELECT t1.id, t1.begin, t1.end
FROM rec_stmt r
JOIN mytable t1 ON t1.id = r.id
AND t1.begin > r.end
AND (r.begin, r.end + INTERVAL '1' DAY) OVERLAPS
(t1.begin - INTERVAL '1' DAY, t1.end)
)
SELECT MIN(begin), MAX(end)
FROM rec_stmt
GROUP BY id;
COPY (
SELECT "ID", "BEGIN", "END"
<sorry, for a reason I don't know StackOverflow won't let me finish my code here...>
data - read.csv2("</path/to.csv>")
data$BEGIN - as.Date(data$BEGIN)
data$END - as.Date(data$END)
smoothingEpisodes - function (theData) {
theLength - nrow(theData)
if (theLength 2L) return(theData)
ID - as.integer(theData[["ID"]])
BEGIN - as.numeric(theData[["BEGIN"]])
END - as.numeric(theData[["END"]])
curId - ID[[1L]]
curBEGIN - BEGIN[[1L]]
curEND - END[[1L]]
out.1 - integer(length = theLength)
out.2 - out.3 - numeric(length = theLength)
j - 1L
for(i in 2:nrow(theData)) {
nextId - ID[[i]]
nextBEGIN - BEGIN[[i]]
nextEND - END[[i]]
if (curId != nextId | (curEND + 1) nextBEGIN) {
out.1[[j]] - curId
out.2[[j]] - curBEGIN
out.3[[j]] - curEND
j - j + 1L
curId - nextId
curBEGIN - nextBEGIN
curEND - nextEND
} else {
curEND - max(curEND, nextEND, na.rm = TRUE)
}
}
out.1[[j]] - curId
out.2[[j]] - curBEGIN
out.3[[j]] - curEND
theOutput - data.frame(ID = out.1[1:j], BEGIN = as.Date(out.2[1:j], origin = "1970-01-01"), END = as.Date(out.3[1:j], origin = "1970-01-01"))
theOutput
}
data1 - smoothingEpisodes(data)
data2 - transform(data1, TAGE = (as.numeric(data1$END - data1$BEGIN) + 1))
write.csv2(data2, file = "</path/to/output.csv>")