Python 逻辑,如果你想的话。 #load libraries library(tidyverse) library(lubridate) pymts <- read.table(text = "cust_id agreement_id da
逻辑,如果你想的话。Python 逻辑,如果你想的话。 #load libraries library(tidyverse) library(lubridate) pymts <- read.table(text = "cust_id agreement_id da,python,r,pandas-groupby,pandas-resample,Python,R,Pandas Groupby,Pandas Resample,逻辑,如果你想的话。 #load libraries library(tidyverse) library(lubridate) pymts <- read.table(text = "cust_id agreement_id date payment 1 A 12/1/20 200 1 A 2/2/21 200 1 A 2/3/21 100 1 A 5/1/21 200 1 B 1/2/21 50 1 B 1
#load libraries
library(tidyverse)
library(lubridate)
pymts <- read.table(text = "cust_id agreement_id date payment
1 A 12/1/20 200
1 A 2/2/21 200
1 A 2/3/21 100
1 A 5/1/21 200
1 B 1/2/21 50
1 B 1/9/21 20
1 B 3/1/21 80
1 B 4/23/21 90
2 C 1/21/21 600
3 D 3/4/21 150
3 D 5/3/21 150", header = T)
agmt <- read.table(text = "agreement_id activation term_months total_fee
A 12/1/20 24 4800
B 1/21/21 6 600
C 1/21/21 6 600
D 3/4/21 6 300", header = T)
#final code
final<- pymts %>% mutate(date = as.Date(date, "%m/%d/%y")) %>%
left_join(agmt %>% mutate(activation = as.Date(activation, "%m/%d/%y")), by = "agreement_id") %>%
group_by(cust_id, agreement_id) %>%
mutate(d = n(),
date = floor_date(date, "month")) %>%
complete(date = seq.Date(from = min(date), by = "month", length.out = ifelse(sum(payment) == first(total_fee),
first(term_months),
(year(max(date)) -
year(min(date)))*12 +
month(max(date)) -
month(min(date)) +1))) %>%
mutate(payment = sum(payment, na.rm = T)) %>%
filter(!duplicated(date)) %>%
mutate(payment = payment/n()) %>%
select(1:4) %>% ungroup()
final
# A tibble: 22 x 4
cust_id agreement_id date payment
<int> <chr> <date> <dbl>
1 1 A 2020-12-01 117.
2 1 A 2021-01-01 117.
3 1 A 2021-02-01 117.
4 1 A 2021-03-01 117.
5 1 A 2021-04-01 117.
6 1 A 2021-05-01 117.
7 1 B 2021-01-01 60
8 1 B 2021-02-01 60
9 1 B 2021-03-01 60
10 1 B 2021-04-01 60
# ... with 12 more rows
from dateutil.relativedelta import relativedelta
# Transofrm column to date
payments['date']= pd.to_datetime(payments['date'])
agreement['activation']= pd.to_datetime(agreement['activation'])
final =pd.merge(payments,agreement,on='agreement_id',how='left')
# set date to beginning of month
final['date'] = pd.to_datetime(final.date).dt.to_period('M').dt.to_timestamp()
def set_date_range(df):
if df['payment'].sum() == df['total_fee'].iloc[0]:
return pd.date_range(min(g['date']), periods=df['term_months'].iloc[0], freq='M')
else:
return pd.date_range(min(g['date']),
max(g['date'])+relativedelta(months=+1), freq='M' )
# Create dataframe with dates
seq_df = pd.DataFrame()
for i,g in final.groupby(['cust_id', 'agreement_id']):
seq_df = pd.concat([seq_df,
pd.DataFrame({'cust_id': i[0], 'agreement_id': i[1], 'date': set_date_range(g)})])
# Set date to beginnig of month
seq_df['date'] = pd.to_datetime(seq_df.date).dt.to_period('M').dt.to_timestamp()
final = (pd.concat([final, seq_df], sort=True)
.sort_values(['cust_id', 'agreement_id', 'date'])
.reset_index(drop=True)
.reindex(columns=final.columns))
final['payment'] = final.groupby(by=['cust_id', 'agreement_id'])["payment"].transform("sum")
final = final.drop_duplicates(['cust_id', 'agreement_id', 'date'])
final['n'] = final.groupby(by=['cust_id', 'agreement_id'])["cust_id"].transform("count")
final['payment_due'] = final['payment']/final['n']
final[['cust_id','agreement_id','date', 'payment_due']]