Postgresql 结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?
Postgresql 结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?,postgresql,amazon-redshift,Postgresql,Amazon Redshift,结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?这似乎很有效。非常感谢你的帮助!我将根据我的实际数据来验证这一点,看看是否有任何差异。重要的是要知道这是针对红移还是博士后的,因为他们在高级水平上有很大的不同!是哪一个?请删除一个标签? +----------------------+----
结果与预期一致。我在我的Postgres版本的数据中尝试过这个,它看起来非常准确。但是我还没有检查所有的值。谢谢!但是,我们有没有想过如何避免横向连接?在红移中我看不到任何复制的方法。这是另一个没有横向连接的版本。它使用另一个CTE来避免它。你觉得怎么样?这似乎很有效。非常感谢你的帮助!我将根据我的实际数据来验证这一点,看看是否有任何差异。重要的是要知道这是针对红移还是博士后的,因为他们在高级水平上有很大的不同!是哪一个?请删除一个标签?
+----------------------+---------+---------------+
| timestamp | user_id | action |
+----------------------+---------+---------------+
| 2017-01-01T12:10:31Z | 1 | subscribed |
| 2017-01-01T13:11:51Z | 2 | subscribed |
| 2017-01-01T13:15:53Z | 3 | subscribed |
| ... | ... | ... |
| 2017-02-17T09:42:33Z | 4 | subscribed |
| ... | ... | ... |
| 2017-03-15T16:59:13Z | 1 | unsubscribed |
| 2017-03-17T02:19:56Z | 2 | unsubscribed |
| 2017-03-17T05:33:05Z | 2 | subscribed |
| ... | ... | ... |
+------------+-------------+
| month | subscribers |
+------------+-------------+
| 2017-01-01 | 3 |
| 2017-02-01 | 4 |
| 2017-03-01 | 3 |
| ... | ... |
WITH RECURSIVE months(start_date, end_date) AS (
select
timestamp '2017-01-01',/*change this date to adjust range*/
(date_trunc('MONTH', timestamp '2017-01-01') + INTERVAL '1 MONTH')::DATE/*change this date to adjust range*/
UNION ALL
SELECT
start_date + interval '1 month',
(date_trunc('MONTH', start_date + interval '1 month') + INTERVAL '1 MONTH')::DATE
FROM
months
WHERE
start_date < timestamp '2017-12-01' /*change this date to adjust range*/
),
subscription_months(start_date, end_date, user_id) as(
select
months.start_date::DATE,
months.end_date,
initial_subscription.user_id
from
subscription initial_subscription
left join lateral (
select
cancellation.timestamp
from
subscription cancellation
where
cancellation.user_id = initial_subscription.user_id
and cancellation.timestamp >= initial_subscription.timestamp
and cancellation.action = 'unsubscribed'
order by
cancellation.timestamp asc
limit 1
) as cancellation on true
inner join months on
initial_subscription.timestamp <= months.end_date
and (
cancellation.timestamp is null
or cancellation.timestamp >= months.end_date
)
where
initial_subscription.action = 'subscribed'
)
select
start_date,
end_date,
count(distinct user_id)
from
subscription_months
group by
start_date,
end_date
order by
start_date
-- get starting month
WITH start_month AS(
SELECT MIN(CAST(DATE_TRUNC('month', ts) AS DATE)) AS earliest
FROM test
),
-- bucket each date into months
month_buckets AS(
SELECT CAST(DATE_TRUNC('month', ts) AS DATE) AS month_bucket
FROM test
GROUP BY 1
),
-- for each month bucket, find all actions taken by each user upto that month
master AS (SELECT mb.month_bucket, user_id, actions, ts
FROM month_buckets mb
LEFT JOIN test
ON CAST(DATE_TRUNC('month', test.ts) AS DATE) <= mb.month_bucket
)
-- for each user, get the latest action and timestamp
-- group by month_bucket, count
SELECT m1.month_bucket AS month,
COUNT(m1.user_id) AS subscribers
FROM master m1
JOIN (SELECT month_bucket, user_id, MAX(ts) AS ts
FROM master
GROUP BY 1, 2
) m2
ON m1.month_bucket = m2.month_bucket
AND m1.user_id = m2.user_id
AND m1.ts = m2.ts
AND m1.actions = 'subscribed'
GROUP BY 1
ORDER BY 1;
select count(*)
from
(
select distinct id
from subscribers
group by id
having count(*) in (1, 3, 5...) -- here you can use a table function to return odd numbers
) a
select count(distinct a.id)
from
(
select distinct id
from subscribers
group by id
having count(*) in (1, 3, 5...) -- here you can use a table function to return odd numbers
) a join
subscribers s on a.id = s.id
where timestamp between @date1 and @date2
with
next_events as (
select
user_id
,"timestamp"::date as date_from
,action
,lead(timestamp) over (partition by user_id order by timestamp) ::date as date_to
,lead(action) over (partition by user_id order by timestamp) as next_action
from your_table
where action in ('subscribed','unsubscribed')
)
,ranges as (
select
user_id
,date_from
,coalesce(date_to,current_date) as date_to
from next_events
where (action='subscribed' and next_action='unsubscribed')
or (action='subscribed' and next_action is null)
)
,subscriber_days as (
select
t1.user_id
,t2.date
from ranges t1
join calendar t2
on t2.date between t1.date_from and t1.date_to
)
-- use whatever method needed to identify monthly N from daily N (first day, last day, average, etc.)
-- below is the unique count
select
date_trunc('month',date) as date
,count(distinct user_id) as subscribers
from subscriber_days
group by 1
order by 1