Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/database/8.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Sql 在Vertica中采取行动之前获取最后的交互_Sql_Database_Vertica - Fatal编程技术网

Sql 在Vertica中采取行动之前获取最后的交互

Sql 在Vertica中采取行动之前获取最后的交互,sql,database,vertica,Sql,Database,Vertica,假设我有两个表:sales和page\u views。 我想看看用户在购买产品之前查看的最后n页是什么。在Vertica中,查询将如何执行此操作 销售表: |sale_id| date | user_id | promotion_id | ----------------------------------------------- | 1 | 2018-05-01 | A | 1 | | 2 | 2018-05-01 | B

假设我有两个表:
sales
page\u views
。 我想看看用户在购买产品之前查看的最后n页是什么。在Vertica中,查询将如何执行此操作

销售表:

|sale_id|    date    | user_id | promotion_id |
-----------------------------------------------
|     1 | 2018-05-01 |    A    |            1 |
|     2 | 2018-05-01 |    B    |            2 |
|     3 | 2018-05-01 |    C    |            1 |
|     4 | 2018-05-01 |    D    |            2 |
页面视图表:

| page_id |    date    | user_id |
----------------------------------
|       1 | 2018-04-30 |    A    |
|       3 | 2018-04-29 |    A    |
|       1 | 2018-04-28 |    A    |
|       1 | 2018-04-30 |    B    |
|       2 | 2018-04-29 |    B    |
|       1 | 2018-04-30 |    C    |
|       1 | 2018-04-30 |    D    |
|       2 | 2018-04-29 |    D    |
输出表:

| sale_id | promotion_id | page_id-1 | page_id-2 | page_id-3 |
--------------------------------------------------------------
|       1 |            1 |         1 |         3 |         1 |
|       2 |            2 |         1 |         1 |         0 |
|       3 |            1 |         1 |         0 |         0 | 
|       4 |            2 |         1 |         2 |         0 |

在这种情况下,如果交互少于n次,则用虚拟值(可以是0或-1)替换id

您可以使用
union all
组合这两个表。然后根据每行后面的销售id分配一个组。然后枚举每个组和轴中的值:

with tp as (
      select user_id, sales_id, promotion_id, date, null as page_id
      from sales
      union all
      select user_id, null, null, date, page_id
      from page_views
     ),
     tp2 as (
      select user_id,
             coalesce(sales_id, 
                      first_value(sales_id ignore nulls) over (partition by user_id order by date desc)
                     ) as sales_id,
             coalesce(promotion_id, 
                      first_value(sales_id ignore nulls) over (partition by user_id order by date desc)
                     ) as promotion_id,
             date, page_id
       from tp2
      ),
      tp3 as (
       select row_number() over (partition by user_id, sales_id) order by desc desc) as seqnum,
              tp2.*
       from tp2
      )
select user_id, sales_id, promotion_id,
       max(case when seqnum = 2 then page_id end) as page_1,
       max(case when seqnum = 3 then page_id end) as page_2,
       max(case when seqnum = 4 then page_id end) as page_3
from tp3;
group by user_
标识、销售标识、促销标识

我无法抗拒——“如果你有一把锤子,你的整个世界就是一颗钉子……”

您的查询引用了一个由一系列事件组成的模式:一个销售事件前面有一个或多个页面视图事件

因此,我:

a) 从
销售
页面视图

b) 将Vertica的MATCH()子句应用于该联合选择-获取
MATCH\u id
pattern\u id
-以查找我要查找的模式

c) 最后,正如Gordon Linoff在上面所做的那样,按用户id分组

-- create the two input tables as temporary input, so you can play if you like ...

CREATE LOCAL TEMPORARY TABLE                                                                                                                                
sales(sale_id,date,user_id,promotion_id)
ON COMMIT PRESERVE ROWS AS (
          SELECT 1,DATE '2018-05-01','A',1
UNION ALL SELECT 2,DATE '2018-05-01','B',2
UNION ALL SELECT 3,DATE '2018-05-01','C',1
UNION ALL SELECT 4,DATE '2018-05-01','D',2
)
;
CREATE LOCAL TEMPORARY TABLE
page_views(page_id,date,user_id)
ON COMMIT PRESERVE ROWS AS (
          SELECT 1,DATE '2018-04-30','A'
UNION ALL SELECT 3,DATE '2018-04-29','A'
UNION ALL SELECT 1,DATE '2018-04-28','A'
UNION ALL SELECT 1,DATE '2018-04-30','B'
UNION ALL SELECT 2,DATE '2018-04-29','B'
UNION ALL SELECT 1,DATE '2018-04-30','C'
UNION ALL SELECT 1,DATE '2018-04-30','D'
UNION ALL SELECT 2,DATE '2018-04-29','D'
)
;

-- here's your query ...

WITH tser AS (                                                                                                                                              
  SELECT
    sale_id
  , NULL::INT AS page_id
  , user_id
  , promotion_id
  , date
  FROM sales
  UNION ALL SELECT
    NULL::INT AS sale_id
  , page_id
  , user_id
  , NULL::INT AS promotion_id
  , date
  FROM page_views
  ORDER BY
    user_id
  , date
)
,
w_pattern AS (
  SELECT
    NVL(sale_id,page_id) AS ev_id
  , user_id
  , promotion_id
  , date
  , event_name()
  , pattern_id()
  , match_id()
  FROM tser
  MATCH(
    PARTITION BY user_id
    ORDER BY date DESC
    DEFINE
      sale AS (sale_id IS NOT NULL)
    , pgview AS (page_id IS NOT NULL)
    PATTERN p AS (sale pgview+)
    ROWS MATCH FIRST EVENT
  )
)
SELECT
  MAX(CASE match_id WHEN 1 THEN ev_id        END) AS sale_id
, MAX(CASE match_id WHEN 1 THEN promotion_id END) AS promotion_id
, MAX(CASE match_id WHEN 2 THEN ev_id        END) AS page_id_1
, MAX(CASE match_id WHEN 3 THEN ev_id        END) AS page_id_2
, MAX(CASE match_id WHEN 4 THEN ev_id        END) AS page_id_3
FROM w_pattern
GROUP BY 
  user_id
, pattern_id                                                                                                                                                
ORDER BY 1

sale_id|promotion_id|page_id_1|page_id_2|page_id_3                                                                                                          
      1|           1|        1|        3|        1
      2|           2|        1|        2|-
      3|           1|        1|-        |-
      4|           2|        1|        2|-
玩得开心。。。。 马可