R 计算复杂表_1中的持续时间
我有一个数据框R 计算复杂表_1中的持续时间,r,date,duration,R,Date,Duration,我有一个数据框 df <- data.frame("name" = c("jack", "william", "david", "john"), "01-Jan-19" = c(NA,"A", NA,"A"), "01-Feb-19" = c(&
df <- data.frame("name" = c("jack", "william", "david", "john"),
"01-Jan-19" = c(NA,"A", NA,"A"),
"01-Feb-19" = c("A","A",NA,"A"),
"01-Mar-19" = c("S","A","A","A"),
"01-Apr-19" = c("A","A","A","S"),
"01-May-19" = c(NA,"A","A","A"),
"01-Jun-19" = c("A","S","A","S"),
"01-Jul-19" = c("A","S","A","S"),
"01-Aug-19" = c(NA,"S","A","A"),
"01-Sep-19" = c(NA,"S","A","S"),
"01-Oct-19" = c("S","S","A","S"),
"01-Nov-19" = c("S","S",NA,"S"),
"01-Dec-19" = c("S","S","S",NA),
"01-Jan-20" = c("S","M","A","M"),
"01-Feb-20" = c("M","M","M","M"))
df%
dplyr::mutate(person=dmy(sub('X','',person)))%>%
分组单位(名称)%>%
dplyr::总结(平均持续时间=人[最大值(值='A'))]-人[最小值(值='A'))])
我可以问一下如何修改代码以达到以下目的吗?
在两个A之间的周期中,如何减去具有其他值的周期(任何不是A的值,例如S,NA)?
非常感谢。在这种情况下,我很难理解句号的含义。从您的示例来看,一个周期需要两个值。周期也要减去吗?例如,如果一个人有以下序列:a、S、a,他们是否有0、1或2个句点和a?那么A,S,A,A,S,A呢 相反,如果每个日期代表一个期间,该期间在下一个日期之前有效,则每个人-值组合的总持续时间可计算如下:
duration <- df %>%
tidyr::pivot_longer(cols = -name, names_to = 'date') %>%
dplyr::mutate(date = lubridate::dmy(sub('X', '', date))) %>%
group_by(name) %>%
dplyr::arrange(name, date) %>%
dplyr::mutate(duration = c(diff(date), 0)) %>%
dplyr::group_by(name, value) %>%
dplyr::summarise(summed_duration = sum(duration))
持续时间%
tidyr::pivot_更长(cols=-name,names_to='date')%>%
dplyr::mutate(日期=lubridate::dmy(sub('X','',date)))%>%
分组单位(名称)%>%
dplyr::排列(名称、日期)%>%
dplyr::mutate(duration=c(diff(date),0))%>%
dplyr::分组依据(名称、值)%>%
dplyr::总结(总结时间=总结(持续时间))
#一个tible:15 x 3
#分组:名称[4]
名称值和持续时间
1大卫A 276天
2.0天
3大卫31天
4大卫·纳89天
5杰克,119天
6.0天
7杰克154天
8.123天
9约翰一家152天
10约翰31天
11约翰182天
12约翰·纳31天
13威廉A 151天
14.31天
15威廉S 214天
根据评论进行编辑
df %>%
tidyr::pivot_longer(cols = -name, names_to = 'date') %>%
dplyr::mutate(date = lubridate::dmy(sub('X', '', date))) %>%
dplyr::group_by(name) %>%
dplyr::arrange(name, date) %>%
dplyr::mutate(duration = c(diff(date), 0)) %>%
dplyr::group_by(name, value) %>%
dplyr::filter(dplyr::row_number() < dplyr::last(dplyr::row_number()) | value != 'A') %>%
dplyr::summarise(summed_duration = sum(duration)) %>%
dplyr::filter(value == 'A')
# A tibble: 4 x 3
# Groups: name [4]
name value summed_duration
<chr> <chr> <drtn>
1 david A 245 days
2 jack A 88 days
3 john A 121 days
4 william A 120 days
df%>%
tidyr::pivot_更长(cols=-name,names_to='date')%>%
dplyr::mutate(日期=lubridate::dmy(sub('X','',date)))%>%
dplyr::分组依据(名称)%>%
dplyr::排列(名称、日期)%>%
dplyr::mutate(duration=c(diff(date),0))%>%
dplyr::分组依据(名称、值)%>%
dplyr::filter(dplyr::row_number()%
dplyr::summary(summary_duration=sum(duration))%>%
dplyr::筛选器(值=='A')
#一个tibble:4x3
#分组:名称[4]
名称值和持续时间
1大卫A 245天
2杰克88天
约翰一家121天
4威廉王子120天
在这种情况下,我很难理解句点的含义。从您的示例来看,一个周期需要两个值。周期也要减去吗?例如,如果一个人有以下序列:a、S、a,他们是否有0、1或2个句点和a?那么A,S,A,A,S,A呢
相反,如果每个日期代表一个期间,该期间在下一个日期之前有效,则每个人-值组合的总持续时间可计算如下:
duration <- df %>%
tidyr::pivot_longer(cols = -name, names_to = 'date') %>%
dplyr::mutate(date = lubridate::dmy(sub('X', '', date))) %>%
group_by(name) %>%
dplyr::arrange(name, date) %>%
dplyr::mutate(duration = c(diff(date), 0)) %>%
dplyr::group_by(name, value) %>%
dplyr::summarise(summed_duration = sum(duration))
持续时间%
tidyr::pivot_更长(cols=-name,names_to='date')%>%
dplyr::mutate(日期=lubridate::dmy(sub('X','',date)))%>%
分组单位(名称)%>%
dplyr::排列(名称、日期)%>%
dplyr::mutate(duration=c(diff(date),0))%>%
dplyr::分组依据(名称、值)%>%
dplyr::总结(总结时间=总结(持续时间))
#一个tible:15 x 3
#分组:名称[4]
名称值和持续时间
1大卫A 276天
2.0天
3大卫31天
4大卫·纳89天
5杰克,119天
6.0天
7杰克154天
8.123天
9约翰一家152天
10约翰31天
11约翰182天
12约翰·纳31天
13威廉A 151天
14.31天
15威廉S 214天
根据评论进行编辑
df %>%
tidyr::pivot_longer(cols = -name, names_to = 'date') %>%
dplyr::mutate(date = lubridate::dmy(sub('X', '', date))) %>%
dplyr::group_by(name) %>%
dplyr::arrange(name, date) %>%
dplyr::mutate(duration = c(diff(date), 0)) %>%
dplyr::group_by(name, value) %>%
dplyr::filter(dplyr::row_number() < dplyr::last(dplyr::row_number()) | value != 'A') %>%
dplyr::summarise(summed_duration = sum(duration)) %>%
dplyr::filter(value == 'A')
# A tibble: 4 x 3
# Groups: name [4]
name value summed_duration
<chr> <chr> <drtn>
1 david A 245 days
2 jack A 88 days
3 john A 121 days
4 william A 120 days
df%>%
tidyr::pivot_更长(cols=-name,names_to='date')%>%
dplyr::mutate(日期=lubridate::dmy(sub('X','',date)))%>%
dplyr::分组依据(名称)%>%
dplyr::排列(名称、日期)%>%
dplyr::mutate(duration=c(diff(date),0))%>%
dplyr::分组依据(名称、值)%>%
dplyr::filter(dplyr::row_number()%
dplyr::summary(summary_duration=sum(duration))%>%
dplyr::筛选器(值=='A')
#一个tibble:4x3
#分组:名称[4]
名称值和持续时间
1大卫A 245天
2杰克88天
约翰一家121天
4威廉王子120天
您可以计算最大索引和最小索引,其中值='A'
命名
并减去它们之间的月天数,其中值
不是'A'
df %>%
tidyr::pivot_longer(cols = -name,names_to = 'person', values_drop_na = TRUE) %>%
dplyr::mutate(person = lubridate::dmy(sub('X', '', person))) %>%
dplyr::group_by(name) %>%
dplyr::summarise(min_ind = min(which(value == 'A')),
max_ind = max(which(value == 'A')),
duration = person[max_ind] - person[min_ind] -
sum(lubridate::days_in_month(person[value[min_ind:max_ind] != 'A'])))
# name min_ind max_ind duration
# <chr> <int> <int> <drtn>
#1 david 1 10 275 days
#2 jack 1 5 89 days
#3 john 1 8 90 days
#4 william 1 5 120 days
df%>%
tidyr::pivot_longer(cols=-name,names_to='person',values_drop_na=TRUE)%>%
dplyr::mutate(person=lubridate::dmy(sub('X','',person))%>%
dplyr::分组依据(名称)%>%
dplyr::summary(min_ind=min(which(value='A')),
max_ind=max(其中(值='A')),
持续时间=人[max_ind]-人[min_ind]-
总和(lubridate::月天数(person[值[最小值:最大值]!='A']))
#名称最小索引最大索引持续时间
#
#1大卫110275天
#2 jack 1 5 89天
#3约翰1880天
#4.1.5 120天
您可以计算最大索引和最小索引,其中值='A'
命名
并减去它们之间的月天数,其中值
不是'A'
df %>%
tidyr::pivot_longer(cols = -name,names_to = 'person', values_drop_na = TRUE) %>%
dplyr::mutate(person = lubridate::dmy(sub('X', '', person))) %>%
dplyr::group_by(name) %>%
dplyr::summarise(min_ind = min(which(value == 'A')),
max_ind = max(which(value == 'A')),
duration = person[max_ind] - person[min_ind] -
sum(lubridate::days_in_month(person[value[min_ind:max_ind] != 'A'])))
# name min_ind max_ind duration
# <chr> <int> <int> <drtn>
#1 david 1 10 275 days
#2 jack 1 5 89 days
#3 john 1 8 90 days
#4 william 1 5 120 days
df%>%
tidyr::pivot_longer(cols=-name,names_to='person',values_drop_na=TRUE)%>%
dplyr::mutate(person=lubridate::dmy(sub('X','',person))