SQL条件滚动和
我使用的是BigQuery标准SQL,需要从我的交易表中按客户(SQL条件滚动和,sql,google-bigquery,Sql,Google Bigquery,我使用的是BigQuery标准SQL,需要从我的交易表中按客户(cust\u id)获取submit\u amount的滚动总和 但是,我只能在滚动总和中包含某些交易。我的条件是: 可能需要包含在滚动总和中的行的支付日期为空或=正在计算的行的提交日期 可能需要包含在滚动总和中的行的提交日期在正在计算的行的提交日期之前的0到30天之间 cust\u id必须与当前行匹配cust\u id 这是我的数据示例,其中包含我要计算的应计金额字段。我还在这里添加了一个字段,只是为了更清楚地说明哪些交易应该符
cust\u id
)获取submit\u amount
的滚动总和
但是,我只能在滚动总和中包含某些交易。我的条件是:
支付日期
为空或=
正在计算的行的提交日期
cust\u id
必须与当前行匹配cust\u id
应计金额
字段。我还在这里添加了一个字段,只是为了更清楚地说明哪些交易应该符合每行滚动总和的条件
txn_id | cust_id | submit_date | paid_date | submit_amount | accrued_amount | qual_txn_id
-------------------------------------------------------------------------------------------
1 | 1 | 2020-01-01 | 2020-01-15 | 10 | 10 | 1
2 | 1 | 2020-01-12 | 2020-02-01 | 5 | 15 | 1, 2
3 | 1 | 2020-01-25 | NULL | 2 | 7 | 2, 3
4 | 1 | 2020-02-05 | NULL | 4 | 6 | 3, 4
5 | 1 | 2020-02-06 | NULL | 1 | 7 | 3, 4, 5
6 | 1 | 2020-03-01 | 2020-03-15 | 3 | 8 | 4, 5, 6
7 | 2 | 2020-03-05 | 2020-03-20 | 6 | 6 | 7
8 | 2 | 2020-03-25 | NULL | 2 | 2 | 8
我尝试使用CASE WHEN
代替submit\u amount
(在SUM()
中)来应用paid\u date
标准,但无法使其相互比较正确的行
正确的方法是什么?下面是针对BigQuery标准SQL的
#standardSQL
SELECT * EXCEPT(arr),
(
SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
FROM UNNEST(arr)
) AS accrued_amount,
(
SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
FROM UNNEST(arr)
) AS qual_txn_id
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
如下面的示例所示,是否应用于您问题中的样本数据
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 txn_id, 1 cust_id, DATE '2020-01-01' submit_date, DATE '2020-01-15' paid_date, 10 submit_amount UNION ALL
SELECT 2, 1, '2020-01-12', '2020-02-01', 5, UNION ALL
SELECT 3, 1, '2020-01-25', NULL, 2 UNION ALL
SELECT 4, 1, '2020-02-05', NULL, 4 UNION ALL
SELECT 5, 1, '2020-02-06', NULL, 1 UNION ALL
SELECT 6, 1, '2020-03-01', '2020-03-15', 3 UNION ALL
SELECT 7, 2, '2020-03-05', '2020-03-20', 6 UNION ALL
SELECT 8, 2, '2020-03-25', NULL, 2
)
SELECT * EXCEPT(arr),
(
SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
FROM UNNEST(arr)
) AS accrued_amount,
(
SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
FROM UNNEST(arr)
) AS qual_txn_id
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
结果是
Row txn_id cust_id submit_date paid_date submit_amount accrued_amount qual_txn_id
1 1 1 2020-01-01 2020-01-15 10 10 1
2 2 1 2020-01-12 2020-02-01 5 15 1,2
3 3 1 2020-01-25 null 2 7 2,3
4 4 1 2020-02-05 null 4 6 3,4
5 5 1 2020-02-06 null 1 7 3,4,5
6 6 1 2020-03-01 2020-03-15 3 8 4,5,6
7 7 2 2020-03-05 2020-03-20 6 6 7
8 8 2 2020-03-25 null 2 2 8
此外,下面是重构版本——略低于上面的版本,具有完全相同的输出
#standardSQL
SELECT * EXCEPT(arr),
(
SELECT AS STRUCT
SUM(IF(qual, submit_amount, 0)) AS accrued_amount,
STRING_AGG(IF(qual, CAST(txn_id AS STRING), NULL)) AS qual_txn_id
FROM UNNEST(arr), UNNEST([paid_date IS NULL OR paid_date >= submit_date]) qual
).*
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
下面是BigQuery标准SQL
#standardSQL
SELECT * EXCEPT(arr),
(
SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
FROM UNNEST(arr)
) AS accrued_amount,
(
SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
FROM UNNEST(arr)
) AS qual_txn_id
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
如下面的示例所示,是否应用于您问题中的样本数据
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 txn_id, 1 cust_id, DATE '2020-01-01' submit_date, DATE '2020-01-15' paid_date, 10 submit_amount UNION ALL
SELECT 2, 1, '2020-01-12', '2020-02-01', 5, UNION ALL
SELECT 3, 1, '2020-01-25', NULL, 2 UNION ALL
SELECT 4, 1, '2020-02-05', NULL, 4 UNION ALL
SELECT 5, 1, '2020-02-06', NULL, 1 UNION ALL
SELECT 6, 1, '2020-03-01', '2020-03-15', 3 UNION ALL
SELECT 7, 2, '2020-03-05', '2020-03-20', 6 UNION ALL
SELECT 8, 2, '2020-03-25', NULL, 2
)
SELECT * EXCEPT(arr),
(
SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
FROM UNNEST(arr)
) AS accrued_amount,
(
SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
FROM UNNEST(arr)
) AS qual_txn_id
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
结果是
Row txn_id cust_id submit_date paid_date submit_amount accrued_amount qual_txn_id
1 1 1 2020-01-01 2020-01-15 10 10 1
2 2 1 2020-01-12 2020-02-01 5 15 1,2
3 3 1 2020-01-25 null 2 7 2,3
4 4 1 2020-02-05 null 4 6 3,4
5 5 1 2020-02-06 null 1 7 3,4,5
6 6 1 2020-03-01 2020-03-15 3 8 4,5,6
7 7 2 2020-03-05 2020-03-20 6 6 7
8 8 2 2020-03-25 null 2 2 8
此外,下面是重构版本——略低于上面的版本,具有完全相同的输出
#standardSQL
SELECT * EXCEPT(arr),
(
SELECT AS STRUCT
SUM(IF(qual, submit_amount, 0)) AS accrued_amount,
STRING_AGG(IF(qual, CAST(txn_id AS STRING), NULL)) AS qual_txn_id
FROM UNNEST(arr), UNNEST([paid_date IS NULL OR paid_date >= submit_date]) qual
).*
FROM (
SELECT *,
ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
PARTITION BY cust_id
ORDER BY UNIX_DATE(submit_date)
RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
) AS arr
FROM `project.dataset.table`
)
提交日期在当前行提交日期之前0到30天之间
我的意思是可能需要包含在滚动总和中的行的提交日期需要在计算任何行之前0到30天。提交日期在当前行提交日期之前0到30天之间
我的意思是可能需要包含在滚动总和中的行需要在计算任何行之前0-30天。