R 根据后续条目修改分组数据的第一个条目
使用以下数据集:R 根据后续条目修改分组数据的第一个条目,r,dplyr,R,Dplyr,使用以下数据集: df <- structure(list(test = c("1st", "2nd", "3rd", "1st", "2nd", "3rd", "1st", "2nd", "3rd"), id = c("PID1", "PID1&q
df <- structure(list(test = c("1st", "2nd", "3rd", "1st", "2nd", "3rd", "1st", "2nd", "3rd"),
id = c("PID1", "PID1", "PID1", "PID2", "PID2", "PID2", "PID3", "PID3", "PID3"),
date = c("2020-01-01", "2020-01-13", "2020-01-17", "2020-01-01", "2020-01-13", "2020-01-20", "2020-01-01", "2020-01-14", "2020-01-18"),
status_1 = c("Symp", "Symp", "uninfected", "Asymp", "Symp", "uninfected", "Asymp", "Asymp", "uninfected"),
status_2 = c("Symp", "Symp", "uninfected", "pre-Symp", "Symp", "uninfected", "Asymp", "Asymp", "uninfected")),
class = "data.frame",
row.names = c(NA, -9L)
)
您可以创建自己的自定义函数,用于检查该时间间隔内的状态
library(dplyr)
check_status <- function(x, date) {
if(first(x) == 'Asymp' & any(x == 'Symp')) {
if (date[which.max(x == 'Symp')] - first(date) < 14)
x[1] <- 'pre-Symp'
}
return(x)
}
库(dplyr)
检查状态%
分组依据(id)%>%
变异(状态2=检查状态(状态1,日期))
#测试id日期状态\u 1状态\u 2
#
#第一批PID1 2020-01-01研讨会
#2第二届PID1 2020-01-13研讨会
#3第三PID1 2020-01-17未感染未感染
#4第一批PID2 2020-01-01 Asymp预研讨会
#第二届PID2 2020-01-13研讨会
#6第三PID2 2020-01-20未感染未感染
#7第1 PID3 2020-01-01 Asymp Asymp
#8第二PID3 2020-01-14 Asymp Asymp
#9第3 PID3 2020-01-18未感染未感染
library(dplyr)
check_status <- function(x, date) {
if(first(x) == 'Asymp' & any(x == 'Symp')) {
if (date[which.max(x == 'Symp')] - first(date) < 14)
x[1] <- 'pre-Symp'
}
return(x)
}
df %>%
mutate(date = as.Date(date)) %>%
group_by(id) %>%
mutate(status_2 = check_status(status_1, date))
# test id date status_1 status_2
# <chr> <chr> <date> <chr> <chr>
#1 1st PID1 2020-01-01 Symp Symp
#2 2nd PID1 2020-01-13 Symp Symp
#3 3rd PID1 2020-01-17 uninfected uninfected
#4 1st PID2 2020-01-01 Asymp pre-Symp
#5 2nd PID2 2020-01-13 Symp Symp
#6 3rd PID2 2020-01-20 uninfected uninfected
#7 1st PID3 2020-01-01 Asymp Asymp
#8 2nd PID3 2020-01-14 Asymp Asymp
#9 3rd PID3 2020-01-18 uninfected uninfected