在For循环中添加Group By
我的数据集如下:在For循环中添加Group By,r,for-loop,dplyr,R,For Loop,Dplyr,我的数据集如下: # Define Adstock Rate adstock_rate = 0.50 # Create Data advertising = c(117.913, 120.112, 125.828, 115.354, 177.090, 141.647, 137.892, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 158.
# Define Adstock Rate
adstock_rate = 0.50
# Create Data
advertising = c(117.913, 120.112, 125.828, 115.354, 177.090, 141.647, 137.892, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 158.511, 109.385, 91.084, 79.253, 102.706,
78.494, 135.114, 114.549, 87.337, 107.829, 125.020, 82.956, 60.813, 83.149, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 129.515, 105.486, 111.494, 107.099, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
134.913, 123.112, 178.828, 112.354, 100.090, 167.647, 177.892, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 112.511, 155.385, 123.084, 89.253, 67.706,
23.494, 122.114, 112.549, 65.337, 134.829, 123.020, 81.956, 23.813, 65.149, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 145.515, 154.486, 121.494, 117.099, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
)
Region = c(500, 500, 500, 500, 500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,
500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500, 500, 500,
500, 500,
501, 501, 501, 501, 501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,
501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501, 501, 501,
501, 501)
advertising_dataset<-data.frame(cbind(Region, advertising))
从这里开始,我将应用一个lag函数,其中我取第一个值,然后应用for循环来转换我的数据集
# Alternative Method Using Loops Proposed by Linh Tran
advertising_dataset$adstocked_advertising = numeric(length(advertising_dataset$advertising))
advertising_dataset$adstocked_advertising[1] = advertising_dataset$advertising[1]
for(i in 2:length(advertising_dataset$advertising)){
advertising_dataset$adstocked_advertising[i] = advertising_dataset$advertising[i] + adstock_rate * advertising_dataset$adstocked_advertising[i-1]}
我遇到的问题是,我的数据集是按区域分隔的。我需要应用上述函数,包括按区域获取第一个值
有没有一种方法可以通过dplyr包实现这一点
我知道这是错误的,但可能是这样的:
library(dplyr)
separated_by_region<- advertising_dataset %>%
group_by(Region) %>%
summarise(
advertising_dataset$adstocked_advertising =
numeric(length(advertising_dataset$advertising))
advertising_dataset$adstocked_advertising[1] =
advertising_dataset$advertising[1]
for(i in 2:length(advertising_dataset$advertising)){
advertising_dataset$adstocked_advertising[i] =
advertising_dataset$advertising[i] + adstock_rate *
advertising_dataset$adstocked_advertising[i-1]})
我不认为这就是你所说的使用dplyr的意思,或者说这比do好。。。方法,但您可以定义如上所述的函数:
foo <- function(df_) {
df_$adstocked_advertising = df_$advertising
for (i in 2:nrow(df_)) {
df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - 1]
}
return(df_)
}
当然,这需要一个数字检查,但它似乎符合您的输出至少500组
编辑:
根据注释,滞后值可调的版本
foo <- function(df_, lag_val = 1) {
df_$adstocked_advertising = df_$advertising
for (i in (1 + lag_val):nrow(df_)) {
df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - lag_val]
}
return(df_)
}
我认为这正是你想要的,但同样值得确认。希望它能帮助你回答另一个相关的问题,但我猜它需要一些修改才能更灵活
干杯
-Luke您能给出一个示例或模型,说明您希望输出的样子吗?刚刚提供。谢谢你,卡米尔。您的输出应该与显示的输出相同,但当区域501出现时,数字应该与我的输出不同。如果感兴趣,请在此跟进问题:如果感兴趣,我还问了一个跟进问题:@Luke C是否有方法使其更稳健,以便滞后可以是除1之外的其他值?当我设置为2或3时,函数失败。如果你愿意,我也可以发布不同的问题。什么最管用@NickKnauer-是的,这仍然是可行的,但是您需要考虑到这样一个事实,即对于数组中的第一个项,延迟被“跳过”。我会看看我是否能很快做出改变。好的,谢谢你,如果你愿意的话,我还可以发布另一个问题
foo <- function(df_) {
df_$adstocked_advertising = df_$advertising
for (i in 2:nrow(df_)) {
df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - 1]
}
return(df_)
}
library(dplyr)
adv_2 <- data.frame(advertising_dataset %>%
group_by(Region) %>%
do(foo(data.frame(.))))
> adv_2[1:10,]
Region advertising adstocked_advertising
1 500 117.913 117.91300
2 500 120.112 179.06850
3 500 125.828 215.36225
4 500 115.354 223.03512
5 500 177.090 288.60756
6 500 141.647 285.95078
7 500 137.892 280.86739
8 500 0.000 140.43370
9 500 0.000 70.21685
10 500 0.000 35.10842
> adv_2[50:60,]
Region advertising adstocked_advertising
50 500 0.000 0.401496
51 500 0.000 0.200748
52 500 0.000 0.100374
53 501 134.913 134.913000
54 501 123.112 190.568500
55 501 178.828 274.112250
56 501 112.354 249.410125
57 501 100.090 224.795063
58 501 167.647 280.044531
59 501 177.892 317.914266
60 501 0.000 158.957133
foo <- function(df_, lag_val = 1) {
df_$adstocked_advertising = df_$advertising
for (i in (1 + lag_val):nrow(df_)) {
df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - lag_val]
}
return(df_)
}
adv_2 <- data.frame(advertising_dataset %>%
group_by(Region) %>%
do(foo(data.frame(.), lag_val = 3)))
> adv_2
Region advertising adstocked_advertising
1 500 117.913 117.913000
2 500 120.112 120.112000
3 500 125.828 125.828000
4 500 115.354 174.310500
5 500 177.090 237.146000
6 500 141.647 204.561000
7 500 137.892 225.047250
8 500 0.000 118.573000
9 500 0.000 102.280500
10 500 0.000 112.523625