R 向列列表中添加几个延迟/移位
我想延迟几个列(例如,value_1+value_2+x-见下文),定义它们的延迟数(例如,3)及其命名。这是一些工作繁琐/手动代码:R 向列列表中添加几个延迟/移位,r,dplyr,R,Dplyr,我想延迟几个列(例如,value_1+value_2+x-见下文),定义它们的延迟数(例如,3)及其命名。这是一些工作繁琐/手动代码: library(dplyr) library(lubridate) library(data.table) haves <- data.frame( id = c("a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b") , date = as.Date(c("2015-0
library(dplyr)
library(lubridate)
library(data.table)
haves <- data.frame(
id = c("a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b")
, date = as.Date(c("2015-01-01", "2015-02-01", "2015-03-01", "2015-04-01", "2015-05-01", "2015-06-01", "2015-01-01", "2015-02-01", "2015-03-01", "2015-04-01", "2015-05-01", "2015-06-01"))
, value_1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
, value_2 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
, x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
)
haves$value_2 <- haves$value_2 + 1
haves$x <- haves$x + 2
haves
wants <- haves %>%
group_by(id) %>%
mutate(
value_1_lag_1 = lag(value_1, n = 1, order_by = date)
, value_1_lag_2 = lag(value_1, n = 2, order_by = date)
, value_1_lag_3 = lag(value_1, n = 3, order_by = date)
, value_2_lag_1 = lag(value_2, n = 1, order_by = date)
, value_2_lag_2 = lag(value_2, n = 2, order_by = date)
, value_2_lag_3 = lag(value_2, n = 3, order_by = date)
, x_lag_1 = lag(x, n = 1, order_by = date)
, x_lag_2 = lag(x, n = 2, order_by = date)
, x_lag_3 = lag(x, n = 3, order_by = date)
)
wants
它不能满足我的需求。这更接近:
colnames <- colnames(haves)
setDT(haves)
haves[, sapply(1:3, function(x){paste0(colnames[x + 2], x, '_lag_', 1:3)}) := shift(.SD, 1:3), by = id, .SDcols = value_1:x][]
colnames%
变异(
!!粘贴0(列,“_lag”,lag):=lag(!!rlang::sym(列),n=lag,order_by=date)
)
}
temp以下是链接答案中的适用dplyr解决方案
haves%>%
分组依据(id)%>%
嵌套%>%
突变(数据=映射(数据,~排列(,日期))%>%
突变(滞后=映射(数据、函数(dat)){
imap_dfc(dat[-1],~set_名称(map(1:3,滞后,x=.x)),
粘贴0(.y,“(1:3)))
})) %>%
unnest(c(数据、滞后))
这就是您要寻找的吗?这里有一种通过数据的替代方法。表
库(data.table)
图书馆(lubridate)
#>
#>附加包装:“lubridate”
#>以下对象已从“package:data.table”屏蔽:
#>
#>小时、等周、日、分、月、季、秒、日、周、,
#>元宵节
#>以下对象已从“package:base”屏蔽:
#>
#>日期
图书馆(stringr)
haves id日期值\u 1值\u 2值\u 3值\u 1滞后\u 1值\u 1滞后\u 2
#>1:a 2015-01-01不适用
#>2:b 2015-01-01 7 1 NA
#>3:a 2015-02-01 2 7 1
#>4:b 2015-02-01 8 2 7
#>5:a 2015-03-01 3 8 2
#>6:b 2015-03-01 9 3 8
#>7:a 2015-04-01 4 9 3
#>8:b 2015-04-01 10 4 9
#>9:a 2015-05-01 5 10 4
#>10:b 2015-05-01 11 5 10
#>11:a 2015-06-01 6 11 5
#>12:b 2015-06-01 12 6 11
#>值\ 1 \滞后\ 3值\ 2 \滞后\ 1值\ 2 \滞后\ 2值\ 2 \滞后\ 3值\ 3 \滞后\ 1
#>1:NA-NA-NA-NA
#>2:NA 1 NA 1
#>3:NA 7 1 NA 7
#> 4: 1 2 7 1 2
#> 5: 7 8 2 7 8
#> 6: 2 3 8 2 3
#> 7: 8 9 3 8 9
#> 8: 3 4 9 3 4
#> 9: 9 10 4 9 10
#> 10: 4 5 10 4 5
#> 11: 10 11 5 10 11
#> 12: 5 6 11 5 6
#>值\u 3\u滞后\u 2值\u 3\u滞后\u 3
#>1:NA-NA
#>2:NA-NA
#>3:1 NA
#> 4: 7 1
#> 5: 2 7
#> 6: 8 2
#> 7: 3 8
#> 8: 9 3
#> 9: 4 9
#> 10: 10 4
#> 11: 5 10
#> 12: 11 5
由(v0.3.0)于2020-05-04创建谢谢。请注意,我引入了另一个变量x,以使解决方案更加通用。
colnames <- colnames(haves)
setDT(haves)
haves[, sapply(1:3, function(x){paste0(colnames[x + 2], x, '_lag_', 1:3)}) := shift(.SD, 1:3), by = id, .SDcols = value_1:x][]
appender <- function(df, column, lag){
df %>%
group_by(
id
) %>%
mutate(
!!paste0(column, "_lag_", lag) := lag(!!rlang::sym(column), n = lag, order_by = date)
)
}
temp <- appender(haves, "value_2", 3)