Postgresql 结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?

Postgresql 结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?,postgresql,amazon-redshift,Postgresql,Amazon Redshift,结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?这似乎很有效。非常感谢你的帮助!我将根据我的实际数据来验证这一点,看看是否有任何差异。重要的是要知道这是针对红移还是博士后的,因为他们在高级水平上有很大的不同!是哪一个?请删除一个标签? +----------------------+----


结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?这似乎很有效。非常感谢你的帮助!我将根据我的实际数据来验证这一点,看看是否有任何差异。重要的是要知道这是针对红移还是博士后的,因为他们在高级水平上有很大的不同!是哪一个?请删除一个标签?
+----------------------+---------+---------------+
| timestamp            | user_id | action        |
+----------------------+---------+---------------+
| 2017-01-01T12:10:31Z |       1 | subscribed    |
| 2017-01-01T13:11:51Z |       2 | subscribed    |
| 2017-01-01T13:15:53Z |       3 | subscribed    |
| ...                  | ...     | ...           |
| 2017-02-17T09:42:33Z |       4 | subscribed    |
| ...                  | ...     | ...           |
| 2017-03-15T16:59:13Z |       1 | unsubscribed  |
| 2017-03-17T02:19:56Z |       2 | unsubscribed  |
| 2017-03-17T05:33:05Z |       2 | subscribed    |
| ...                  | ...     | ...           |
+------------+-------------+
| month      | subscribers |
+------------+-------------+
| 2017-01-01 |           3 |
| 2017-02-01 |           4 |
| 2017-03-01 |           3 |
| ...        |         ... |
WITH RECURSIVE months(start_date, end_date) AS (
    select 
      timestamp '2017-01-01',/*change this date to adjust range*/
      (date_trunc('MONTH', timestamp '2017-01-01') + INTERVAL '1 MONTH')::DATE/*change this date to adjust range*/
  UNION ALL
    SELECT 
      start_date + interval '1 month',
      (date_trunc('MONTH', start_date + interval '1 month') + INTERVAL '1 MONTH')::DATE
    FROM 
      months 
    WHERE 
      start_date < timestamp '2017-12-01' /*change this date to adjust range*/
),

subscription_months(start_date, end_date, user_id) as(
  select
    months.start_date::DATE,
    months.end_date,
    initial_subscription.user_id
   from
     subscription initial_subscription
     left join lateral ( 
       select
         cancellation.timestamp
       from
         subscription cancellation
       where
         cancellation.user_id = initial_subscription.user_id
         and cancellation.timestamp >= initial_subscription.timestamp
         and cancellation.action = 'unsubscribed'
        order by
         cancellation.timestamp asc
        limit 1 
     ) as cancellation on true
     inner join months on
       initial_subscription.timestamp <= months.end_date
       and (
           cancellation.timestamp is null
           or cancellation.timestamp >= months.end_date
       )
    where
      initial_subscription.action = 'subscribed'
 )

 select 
   start_date,
   end_date,
   count(distinct user_id)
 from 
   subscription_months
 group by
   start_date,
   end_date
 order by 
   start_date
-- get starting month
WITH start_month AS(
  SELECT MIN(CAST(DATE_TRUNC('month', ts) AS DATE)) AS earliest
FROM test
),
-- bucket each date into months
month_buckets AS(
  SELECT CAST(DATE_TRUNC('month', ts) AS DATE) AS month_bucket
  FROM test
  GROUP BY 1
),
-- for each month bucket, find all actions taken by each user upto that month
master AS (SELECT mb.month_bucket, user_id, actions, ts
FROM month_buckets mb
LEFT JOIN test
ON CAST(DATE_TRUNC('month', test.ts) AS DATE) <= mb.month_bucket
)
-- for each user, get the latest action and timestamp
-- group by month_bucket, count
SELECT m1.month_bucket AS month, 
    COUNT(m1.user_id) AS subscribers
FROM master m1
JOIN (SELECT month_bucket, user_id, MAX(ts) AS ts
    FROM master
    GROUP BY 1, 2
    ) m2
ON m1.month_bucket = m2.month_bucket
AND m1.user_id = m2.user_id
AND m1.ts = m2.ts
AND m1.actions = 'subscribed'
GROUP BY 1
ORDER BY 1;
select count(*) 
from
(
  select distinct id
  from subscribers
  group by id
  having count(*) in  (1, 3, 5...) -- here you can use a table function to return odd numbers
) a
select count(distinct a.id) 
from
(
  select distinct id
  from subscribers
  group by id
  having count(*)  in  (1, 3, 5...) -- here you can use a table function to return odd numbers
) a join
 subscribers s on a.id = s.id
where timestamp between @date1 and @date2
with
next_events as (
    select
     user_id
    ,"timestamp"::date as date_from
    ,action
    ,lead(timestamp) over (partition by user_id order by timestamp) ::date as date_to
    ,lead(action) over (partition by user_id order by timestamp) as next_action
    from your_table
    where action in ('subscribed','unsubscribed')
)
,ranges as (
    select 
     user_id
    ,date_from
    ,coalesce(date_to,current_date) as date_to
    from next_events
    where (action='subscribed' and next_action='unsubscribed')
    or (action='subscribed' and next_action is null)
)
,subscriber_days as (
    select 
     t1.user_id
    ,t2.date
    from ranges t1
    join calendar t2
    on t2.date between t1.date_from and t1.date_to
)
-- use whatever method needed to identify monthly N from daily N (first day, last day, average, etc.)
-- below is the unique count
select
 date_trunc('month',date) as date
,count(distinct user_id) as subscribers
from subscriber_days
group by 1
order by 1