SQL条件滚动和

SQL条件滚动和,sql,google-bigquery,Sql,Google Bigquery,我使用的是BigQuery标准SQL,需要从我的交易表中按客户(cust\u id)获取submit\u amount的滚动总和 但是,我只能在滚动总和中包含某些交易。我的条件是: 可能需要包含在滚动总和中的行的支付日期为空或=正在计算的行的提交日期 可能需要包含在滚动总和中的行的提交日期在正在计算的行的提交日期之前的0到30天之间 cust\u id必须与当前行匹配cust\u id 这是我的数据示例,其中包含我要计算的应计金额字段。我还在这里添加了一个字段,只是为了更清楚地说明哪些交易应该符

我使用的是BigQuery标准SQL,需要从我的交易表中按客户(
cust\u id
)获取
submit\u amount
的滚动总和

但是,我只能在滚动总和中包含某些交易。我的条件是:
  • 可能需要包含在滚动总和中的行的
    支付日期
    为空或
    =
    正在计算的行的
    提交日期
  • 可能需要包含在滚动总和中的行的提交日期在正在计算的行的提交日期之前的0到30天之间
  • cust\u id
    必须与当前行匹配
    cust\u id
  • 这是我的数据示例,其中包含我要计算的
    应计金额
    字段。我还在这里添加了一个字段,只是为了更清楚地说明哪些交易应该符合每行滚动总和的条件

    txn_id | cust_id | submit_date | paid_date | submit_amount | accrued_amount | qual_txn_id ------------------------------------------------------------------------------------------- 1 | 1 | 2020-01-01 | 2020-01-15 | 10 | 10 | 1 2 | 1 | 2020-01-12 | 2020-02-01 | 5 | 15 | 1, 2 3 | 1 | 2020-01-25 | NULL | 2 | 7 | 2, 3 4 | 1 | 2020-02-05 | NULL | 4 | 6 | 3, 4 5 | 1 | 2020-02-06 | NULL | 1 | 7 | 3, 4, 5 6 | 1 | 2020-03-01 | 2020-03-15 | 3 | 8 | 4, 5, 6 7 | 2 | 2020-03-05 | 2020-03-20 | 6 | 6 | 7 8 | 2 | 2020-03-25 | NULL | 2 | 2 | 8 我尝试使用
    CASE WHEN
    代替
    submit\u amount
    (在
    SUM()
    中)来应用
    paid\u date
    标准,但无法使其相互比较正确的行


    正确的方法是什么?

    下面是针对BigQuery标准SQL的

    #standardSQL
    SELECT * EXCEPT(arr),
        (
          SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
          FROM UNNEST(arr) 
        ) AS accrued_amount, 
        (
          SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
          FROM UNNEST(arr) 
        ) AS qual_txn_id 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    )    
    
    如下面的示例所示,是否应用于您问题中的样本数据

    #standardSQL
    WITH `project.dataset.table` AS (
      SELECT 1 txn_id, 1 cust_id, DATE '2020-01-01' submit_date, DATE '2020-01-15' paid_date, 10 submit_amount UNION ALL
      SELECT 2, 1, '2020-01-12', '2020-02-01', 5, UNION ALL
      SELECT 3, 1, '2020-01-25', NULL, 2 UNION ALL
      SELECT 4, 1, '2020-02-05', NULL, 4 UNION ALL
      SELECT 5, 1, '2020-02-06', NULL, 1 UNION ALL
      SELECT 6, 1, '2020-03-01', '2020-03-15', 3 UNION ALL
      SELECT 7, 2, '2020-03-05', '2020-03-20', 6 UNION ALL
      SELECT 8, 2, '2020-03-25', NULL, 2
    )
    SELECT * EXCEPT(arr),
        (
          SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
          FROM UNNEST(arr) 
        ) AS accrued_amount, 
        (
          SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
          FROM UNNEST(arr) 
        ) AS qual_txn_id 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    )    
    
    结果是

    Row txn_id  cust_id submit_date paid_date   submit_amount   accrued_amount  qual_txn_id  
    1   1       1       2020-01-01  2020-01-15  10              10              1    
    2   2       1       2020-01-12  2020-02-01  5               15              1,2  
    3   3       1       2020-01-25  null        2               7               2,3  
    4   4       1       2020-02-05  null        4               6               3,4  
    5   5       1       2020-02-06  null        1               7               3,4,5    
    6   6       1       2020-03-01  2020-03-15  3               8               4,5,6    
    7   7       2       2020-03-05  2020-03-20  6               6               7    
    8   8       2       2020-03-25  null        2               2               8   
    
    此外,下面是重构版本——略低于上面的版本,具有完全相同的输出

    #standardSQL
    SELECT * EXCEPT(arr),
        (
          SELECT AS STRUCT 
            SUM(IF(qual, submit_amount, 0)) AS accrued_amount,
            STRING_AGG(IF(qual, CAST(txn_id AS STRING), NULL)) AS qual_txn_id
          FROM UNNEST(arr), UNNEST([paid_date IS NULL OR paid_date >= submit_date]) qual 
        ).* 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    ) 
    

    下面是BigQuery标准SQL

    #standardSQL
    SELECT * EXCEPT(arr),
        (
          SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
          FROM UNNEST(arr) 
        ) AS accrued_amount, 
        (
          SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
          FROM UNNEST(arr) 
        ) AS qual_txn_id 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    )    
    
    如下面的示例所示,是否应用于您问题中的样本数据

    #standardSQL
    WITH `project.dataset.table` AS (
      SELECT 1 txn_id, 1 cust_id, DATE '2020-01-01' submit_date, DATE '2020-01-15' paid_date, 10 submit_amount UNION ALL
      SELECT 2, 1, '2020-01-12', '2020-02-01', 5, UNION ALL
      SELECT 3, 1, '2020-01-25', NULL, 2 UNION ALL
      SELECT 4, 1, '2020-02-05', NULL, 4 UNION ALL
      SELECT 5, 1, '2020-02-06', NULL, 1 UNION ALL
      SELECT 6, 1, '2020-03-01', '2020-03-15', 3 UNION ALL
      SELECT 7, 2, '2020-03-05', '2020-03-20', 6 UNION ALL
      SELECT 8, 2, '2020-03-25', NULL, 2
    )
    SELECT * EXCEPT(arr),
        (
          SELECT SUM(IF(paid_date IS NULL OR paid_date >= submit_date, submit_amount, 0))
          FROM UNNEST(arr) 
        ) AS accrued_amount, 
        (
          SELECT STRING_AGG(IF(paid_date IS NULL OR paid_date >= submit_date, CAST(txn_id AS STRING), NULL))
          FROM UNNEST(arr) 
        ) AS qual_txn_id 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    )    
    
    结果是

    Row txn_id  cust_id submit_date paid_date   submit_amount   accrued_amount  qual_txn_id  
    1   1       1       2020-01-01  2020-01-15  10              10              1    
    2   2       1       2020-01-12  2020-02-01  5               15              1,2  
    3   3       1       2020-01-25  null        2               7               2,3  
    4   4       1       2020-02-05  null        4               6               3,4  
    5   5       1       2020-02-06  null        1               7               3,4,5    
    6   6       1       2020-03-01  2020-03-15  3               8               4,5,6    
    7   7       2       2020-03-05  2020-03-20  6               6               7    
    8   8       2       2020-03-25  null        2               2               8   
    
    此外,下面是重构版本——略低于上面的版本,具有完全相同的输出

    #standardSQL
    SELECT * EXCEPT(arr),
        (
          SELECT AS STRUCT 
            SUM(IF(qual, submit_amount, 0)) AS accrued_amount,
            STRING_AGG(IF(qual, CAST(txn_id AS STRING), NULL)) AS qual_txn_id
          FROM UNNEST(arr), UNNEST([paid_date IS NULL OR paid_date >= submit_date]) qual 
        ).* 
    FROM (
      SELECT *,
        ARRAY_AGG(STRUCT(txn_id, paid_date, submit_amount)) OVER (
          PARTITION BY cust_id 
          ORDER BY UNIX_DATE(submit_date)
          RANGE BETWEEN 29 PRECEDING AND CURRENT ROW
        ) AS arr  
      FROM `project.dataset.table`
    ) 
    

    提交日期在当前行提交日期之前0到30天之间
    我的意思是可能需要包含在滚动总和中的行的提交日期需要在计算任何行之前0到30天。
    提交日期在当前行提交日期之前0到30天之间
    我的意思是可能需要包含在滚动总和中的行需要在计算任何行之前0-30天。