Google cloud platform BigQuery:让脱机用户在某个时间点和他们已脱机的时间点进行访问
假设BigQuery中有一个表,其中包含以下数据:Google cloud platform BigQuery:让脱机用户在某个时间点和他们已脱机的时间点进行访问,google-cloud-platform,google-bigquery,Google Cloud Platform,Google Bigquery,假设BigQuery中有一个表,其中包含以下数据: time | user | status 0:00 | 1 | start <-- user 1 is online 3:00 | 1 | stop <-- user 1 is disconnected 3:10 | 1 | stop <-- a user can have multiple stops 3:20 | 1 | stop <-- a user
time | user | status
0:00 | 1 | start <-- user 1 is online
3:00 | 1 | stop <-- user 1 is disconnected
3:10 | 1 | stop <-- a user can have multiple stops
3:20 | 1 | stop <-- a user can have multiple stops
3:15 | 2 | start <-- user 2 connected
4:00 | 2 | stop <-- user 2 disconnected
5:00 | 1 | start <-- user 1 is online again
5:10 | 2 | stop <-- user 2 still offline
9:00 | 1 | start <-- user 1 connected
10:00 | 1 | stop <-- user 1 disconnected
我尝试过聚合、最后一个值结束、延迟结束,但我只能得到上一个事件。这是一个很酷的挑战!下面是我为在特定日期连接的用户提出的建议
WITH
#Select all the previous state for each user
select_previous AS (
SELECT
time,
user,
state,
LAG(state) OVER (PARTITION BY user ORDER BY time) AS previous_state
FROM
`gbl-imt-homerider-basguillaueb.bqml_test.user_up_sof`),
#Keep only the state change date
state_change AS (
SELECT
*
FROM
select_previous
WHERE
previous_state != state
OR previous_state IS NULL ),
#Select the latest time registered before the wanted timeline
max_time AS(
SELECT
MAX(time) AS time,
user
FROM
state_change
WHERE
time < TIMESTAMP("2019-08-01 00:08:00")
GROUP BY
user)
#Join the state change list with the latest time registered per user. Keep only the state stop
SELECT
state_change.*
FROM
max_time
JOIN
state_change
ON
max_time.time = state_change.time
AND max_time.user = state_change.user
WHERE
state = "stop"
不确定是否理解您提供的示例。
如果需要,我可以调整这些查询
WITH
#Select all the previous state for each user
select_previous AS (
SELECT
time,
user,
state,
LAG(state) OVER (PARTITION BY user ORDER BY time) AS previous_state
FROM
`gbl-imt-homerider-basguillaueb.bqml_test.user_up_sof`),
#Keep only the state change date
state_change AS (
SELECT
*
FROM
select_previous
WHERE
previous_state != state
OR previous_state IS NULL ),
#Select the latest time registered before the wanted timeline
max_time AS(
SELECT
MAX(time) AS time,
user
FROM
state_change
WHERE
time < TIMESTAMP("2019-08-01 00:08:00")
GROUP BY
user)
#Join the state change list with the latest time registered per user. Keep only the state stop
SELECT
state_change.*
FROM
max_time
JOIN
state_change
ON
max_time.time = state_change.time
AND max_time.user = state_change.user
WHERE
state = "stop"
WITH
#Select all the previous state for each user
select_previous AS (
SELECT
time,
user,
state,
LAG(state) OVER (PARTITION BY user ORDER BY time) AS previous_state
FROM
`gbl-imt-homerider-basguillaueb.bqml_test.user_up_sof`),
#Keep only the state change date
state_change AS (
SELECT
*
FROM
select_previous
WHERE
previous_state != state
OR previous_state IS NULL ),
#get the previous date change
time_previous_change AS (
SELECT
*,
LAG(time) OVER (PARTITION BY user ORDER BY time) AS previous_time
FROM
state_change )
#perform a sum of the timeDiff, only when the state is start and the previous is stop (offline time)
SELECT
user,
SUM(timestamp_diff (time,
previous_time,
second)) AS elapsed_in_second
FROM
time_previous_change
WHERE
state="start"
AND previous_state="stop"
GROUP BY
user