Google bigquery 在BigQuery中使用LAG()函数替代条件_TRUE_事件?
Vertica有一种非常好的操作类型:基于事件的窗口操作,基本上可以让您识别事件发生的时间。例如,条件_TRUE_事件将在给定布尔表达式每次解析为TRUE时递增一个计数器 是否有任何方法可以使用BigQuery模拟此函数?请注意,CONDITIONAL\u TRUE\u事件中有一个LAG()函数 例如:Google bigquery 在BigQuery中使用LAG()函数替代条件_TRUE_事件?,google-bigquery,window-functions,vertica,Google Bigquery,Window Functions,Vertica,Vertica有一种非常好的操作类型:基于事件的窗口操作,基本上可以让您识别事件发生的时间。例如,条件_TRUE_事件将在给定布尔表达式每次解析为TRUE时递增一个计数器 是否有任何方法可以使用BigQuery模拟此函数?请注意,CONDITIONAL\u TRUE\u事件中有一个LAG()函数 例如: CONDITIONAL_TRUE_EVENT(timestamp - LAG(timestamp) > '7 days') OVER(PARTITION BY zuid, sub_type
CONDITIONAL_TRUE_EVENT(timestamp - LAG(timestamp) > '7 days')
OVER(PARTITION BY zuid, sub_type ORDER BY timestamp)
谢谢 这个问题我已经考虑过好几次了 实际上是嵌套两个查询以达到目的: 第一个查询(使用公共表表达式)引入一个计数器,当您所追求的条件为true时,该计数器为1,否则为0。 第二个查询,即查询第一个查询的输出,创建该计数器的运行总和 它比我在BigQuery版本下面展示的Vertica版本要笨拙得多
WITH
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
SELECT
vid
, CONDITIONAL_TRUE_EVENT(
ts - LAG(ts,1,'0000-01-01') > '30 MINUTES'
) OVER w AS tripid
, ts
, psi
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid | tripid | ts | psi
-- out -----+--------+---------------------+--------
-- out 42 | 1 | 2020-10-01 17:00:00 | 25.356
-- out 42 | 1 | 2020-10-01 17:00:10 | 35.124
-- out 42 | 1 | 2020-10-01 17:00:20 | 47.056
-- out 42 | 1 | 2020-10-01 17:00:30 | 45.225
-- out 42 | 2 | 2020-10-01 17:45:00 | 25.356
-- out 42 | 2 | 2020-10-01 17:45:10 | 35.124
-- out 42 | 2 | 2020-10-01 17:45:20 | 47.056
-- out 42 | 2 | 2020-10-01 17:45:30 | 45.225
让我使用我玩过的示例:带有时间戳和机油压力测量的传感器数据。我们想要区分我们只能识别的“行程”,因为“行程”之间的间隔超过30分钟
BigQuery版本-它与所有支持LAG()OLAP函数的DBMS-s一起工作
WITH
-- input ...
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
,
with_chg_counter AS (
SELECT
CASE WHEN ts - LAG(ts,1,'0000-01-01') OVER w > '30 MINUTES'
THEN 1
ELSE 0
END AS chg
, *
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
)
SELECT
vid
, SUM(chg) OVER w AS tripid
, ts
, psi
FROM with_chg_counter
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid|tripid|ts |psi
-- out 42| 1|2020-10-01 17:00:00|25.356
-- out 42| 1|2020-10-01 17:00:10|35.124
-- out 42| 1|2020-10-01 17:00:20|47.056
-- out 42| 1|2020-10-01 17:00:30|45.225
-- out 42| 2|2020-10-01 17:45:00|25.356
-- out 42| 2|2020-10-01 17:45:10|35.124
-- out 42| 2|2020-10-01 17:45:20|47.056
-- out 42| 2|2020-10-01 17:45:30|45.225
还有Vertica版本
WITH
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
SELECT
vid
, CONDITIONAL_TRUE_EVENT(
ts - LAG(ts,1,'0000-01-01') > '30 MINUTES'
) OVER w AS tripid
, ts
, psi
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid | tripid | ts | psi
-- out -----+--------+---------------------+--------
-- out 42 | 1 | 2020-10-01 17:00:00 | 25.356
-- out 42 | 1 | 2020-10-01 17:00:10 | 35.124
-- out 42 | 1 | 2020-10-01 17:00:20 | 47.056
-- out 42 | 1 | 2020-10-01 17:00:30 | 45.225
-- out 42 | 2 | 2020-10-01 17:45:00 | 25.356
-- out 42 | 2 | 2020-10-01 17:45:10 | 35.124
-- out 42 | 2 | 2020-10-01 17:45:20 | 47.056
-- out 42 | 2 | 2020-10-01 17:45:30 | 45.225
这个问题我已经问过好几次了 实际上是嵌套两个查询以达到目的: 第一个查询(使用公共表表达式)引入一个计数器,当您所追求的条件为true时,该计数器为1,否则为0。 第二个查询,即查询第一个查询的输出,创建该计数器的运行总和 它比我在BigQuery版本下面展示的Vertica版本要笨拙得多
WITH
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
SELECT
vid
, CONDITIONAL_TRUE_EVENT(
ts - LAG(ts,1,'0000-01-01') > '30 MINUTES'
) OVER w AS tripid
, ts
, psi
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid | tripid | ts | psi
-- out -----+--------+---------------------+--------
-- out 42 | 1 | 2020-10-01 17:00:00 | 25.356
-- out 42 | 1 | 2020-10-01 17:00:10 | 35.124
-- out 42 | 1 | 2020-10-01 17:00:20 | 47.056
-- out 42 | 1 | 2020-10-01 17:00:30 | 45.225
-- out 42 | 2 | 2020-10-01 17:45:00 | 25.356
-- out 42 | 2 | 2020-10-01 17:45:10 | 35.124
-- out 42 | 2 | 2020-10-01 17:45:20 | 47.056
-- out 42 | 2 | 2020-10-01 17:45:30 | 45.225
让我使用我玩过的示例:带有时间戳和机油压力测量的传感器数据。我们想要区分我们只能识别的“行程”,因为“行程”之间的间隔超过30分钟
BigQuery版本-它与所有支持LAG()OLAP函数的DBMS-s一起工作
WITH
-- input ...
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
,
with_chg_counter AS (
SELECT
CASE WHEN ts - LAG(ts,1,'0000-01-01') OVER w > '30 MINUTES'
THEN 1
ELSE 0
END AS chg
, *
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
)
SELECT
vid
, SUM(chg) OVER w AS tripid
, ts
, psi
FROM with_chg_counter
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid|tripid|ts |psi
-- out 42| 1|2020-10-01 17:00:00|25.356
-- out 42| 1|2020-10-01 17:00:10|35.124
-- out 42| 1|2020-10-01 17:00:20|47.056
-- out 42| 1|2020-10-01 17:00:30|45.225
-- out 42| 2|2020-10-01 17:45:00|25.356
-- out 42| 2|2020-10-01 17:45:10|35.124
-- out 42| 2|2020-10-01 17:45:20|47.056
-- out 42| 2|2020-10-01 17:45:30|45.225
还有Vertica版本
WITH
oilpressure(vid,ts,psi) AS (
SELECT 42,TIMESTAMP '2020-10-01 17:00:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:00:30', 45.225
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:00', 25.356
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:10', 35.124
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:20', 47.056
UNION ALL SELECT 42,TIMESTAMP '2020-10-01 17:45:30', 45.225
)
SELECT
vid
, CONDITIONAL_TRUE_EVENT(
ts - LAG(ts,1,'0000-01-01') > '30 MINUTES'
) OVER w AS tripid
, ts
, psi
FROM oilpressure
WINDOW w AS (PARTITION BY vid ORDER BY ts)
;
-- out vid | tripid | ts | psi
-- out -----+--------+---------------------+--------
-- out 42 | 1 | 2020-10-01 17:00:00 | 25.356
-- out 42 | 1 | 2020-10-01 17:00:10 | 35.124
-- out 42 | 1 | 2020-10-01 17:00:20 | 47.056
-- out 42 | 1 | 2020-10-01 17:00:30 | 45.225
-- out 42 | 2 | 2020-10-01 17:45:00 | 25.356
-- out 42 | 2 | 2020-10-01 17:45:10 | 35.124
-- out 42 | 2 | 2020-10-01 17:45:20 | 47.056
-- out 42 | 2 | 2020-10-01 17:45:30 | 45.225
下面是BigQuery的示例
select zuid, sub_type, timestamp,
countif(flag) over(partition by zuid, sub_type order by timestamp) as conditional_true_event
from (
select zuid, sub_type, timestamp,
date(timestamp) - 7 > lag(date(timestamp)) over(partition by zuid, sub_type order by timestamp) flag
from `project.dataset.table`
)
-- order by timestamp
下面是BigQuery的示例
select zuid, sub_type, timestamp,
countif(flag) over(partition by zuid, sub_type order by timestamp) as conditional_true_event
from (
select zuid, sub_type, timestamp,
date(timestamp) - 7 > lag(date(timestamp)) over(partition by zuid, sub_type order by timestamp) flag
from `project.dataset.table`
)
-- order by timestamp
看起来相对简单-但您能否提供输入数据和预期结果的简单示例,这样就不会误读您的问题:o)看起来相对简单-但您能否提供输入数据和预期结果的简单示例,这样就不会误读您的问题:o)