在For循环中添加Group By_R_For Loop_Dplyr

在For循环中添加Group By

r for-loop

在For循环中添加Group By,r,for-loop,dplyr,R,For Loop,Dplyr,我的数据集如下： # Define Adstock Rate adstock_rate = 0.50 # Create Data advertising = c(117.913, 120.112, 125.828, 115.354, 177.090, 141.647, 137.892, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 158.

我的数据集如下：

# Define Adstock Rate
adstock_rate = 0.50

# Create Data
advertising = c(117.913, 120.112, 125.828, 115.354, 177.090, 141.647, 137.892,   0.000,   0.000,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000,   0.000,   0.000, 158.511, 109.385,  91.084,  79.253, 102.706, 
            78.494, 135.114, 114.549,  87.337, 107.829, 125.020,  82.956,  60.813,  83.149,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000, 129.515, 105.486, 111.494, 107.099,   0.000,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000,   0.000,   0.000,   0.000,   0.000,
            134.913, 123.112, 178.828, 112.354, 100.090, 167.647, 177.892,   0.000,   0.000,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000,   0.000,   0.000, 112.511, 155.385,  123.084,  89.253, 67.706, 
            23.494, 122.114, 112.549,  65.337, 134.829, 123.020,  81.956,  23.813,  65.149,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000, 145.515, 154.486, 121.494, 117.099,   0.000,   0.000,   0.000, 
            0.000,   0.000,   0.000,   0.000,   0.000,   0.000,   0.000,   0.000
            )

Region = c(500, 500, 500, 500, 500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,
       500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500,500, 500, 500, 500, 500, 500, 
       500, 500,
       501, 501, 501, 501, 501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,
       501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501,501, 501, 501, 501, 501, 501, 
       501, 501)

advertising_dataset<-data.frame(cbind(Region, advertising))

从这里开始，我将应用一个lag函数，其中我取第一个值，然后应用for循环来转换我的数据集

# Alternative Method Using Loops Proposed by Linh Tran
advertising_dataset$adstocked_advertising = numeric(length(advertising_dataset$advertising))
advertising_dataset$adstocked_advertising[1] = advertising_dataset$advertising[1]

for(i in 2:length(advertising_dataset$advertising)){
  advertising_dataset$adstocked_advertising[i] = advertising_dataset$advertising[i] + adstock_rate * advertising_dataset$adstocked_advertising[i-1]}

我遇到的问题是，我的数据集是按区域分隔的。我需要应用上述函数，包括按区域获取第一个值

有没有一种方法可以通过dplyr包实现这一点

我知道这是错误的，但可能是这样的：

library(dplyr)
separated_by_region<- advertising_dataset %>%
group_by(Region) %>%
summarise(
advertising_dataset$adstocked_advertising = 
numeric(length(advertising_dataset$advertising))
advertising_dataset$adstocked_advertising[1] = 
advertising_dataset$advertising[1]

for(i in 2:length(advertising_dataset$advertising)){
  advertising_dataset$adstocked_advertising[i] = 
advertising_dataset$advertising[i] + adstock_rate * 
advertising_dataset$adstocked_advertising[i-1]})

我不认为这就是你所说的使用dplyr的意思，或者说这比do好。。。方法，但您可以定义如上所述的函数：

foo <- function(df_) {
  df_$adstocked_advertising = df_$advertising
  for (i in 2:nrow(df_)) {
    df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - 1]
  }
  return(df_)
}

当然，这需要一个数字检查，但它似乎符合您的输出至少500组

编辑：

根据注释，滞后值可调的版本

foo <- function(df_, lag_val = 1) {
  df_$adstocked_advertising = df_$advertising
  for (i in (1 + lag_val):nrow(df_)) {
    df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - lag_val]
  }
  return(df_)
}

我认为这正是你想要的，但同样值得确认。希望它能帮助你回答另一个相关的问题，但我猜它需要一些修改才能更灵活

干杯

-Luke

您能给出一个示例或模型，说明您希望输出的样子吗？刚刚提供。谢谢你，卡米尔。您的输出应该与显示的输出相同，但当区域501出现时，数字应该与我的输出不同。如果感兴趣，请在此跟进问题：如果感兴趣，我还问了一个跟进问题：@Luke C是否有方法使其更稳健，以便滞后可以是除1之外的其他值？当我设置为2或3时，函数失败。如果你愿意，我也可以发布不同的问题。什么最管用@NickKnauer-是的，这仍然是可行的，但是您需要考虑到这样一个事实，即对于数组中的第一个项，延迟被“跳过”。我会看看我是否能很快做出改变。好的，谢谢你，如果你愿意的话，我还可以发布另一个问题

foo <- function(df_) {
  df_$adstocked_advertising = df_$advertising
  for (i in 2:nrow(df_)) {
    df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - 1]
  }
  return(df_)
}

library(dplyr)

adv_2 <- data.frame(advertising_dataset %>%
  group_by(Region) %>%
  do(foo(data.frame(.))))


> adv_2[1:10,]
   Region advertising adstocked_advertising
1     500     117.913             117.91300
2     500     120.112             179.06850
3     500     125.828             215.36225
4     500     115.354             223.03512
5     500     177.090             288.60756
6     500     141.647             285.95078
7     500     137.892             280.86739
8     500       0.000             140.43370
9     500       0.000              70.21685
10    500       0.000              35.10842

> adv_2[50:60,]
   Region advertising adstocked_advertising
50    500       0.000              0.401496
51    500       0.000              0.200748
52    500       0.000              0.100374
53    501     134.913            134.913000
54    501     123.112            190.568500
55    501     178.828            274.112250
56    501     112.354            249.410125
57    501     100.090            224.795063
58    501     167.647            280.044531
59    501     177.892            317.914266
60    501       0.000            158.957133

foo <- function(df_, lag_val = 1) {
  df_$adstocked_advertising = df_$advertising
  for (i in (1 + lag_val):nrow(df_)) {
    df_$adstocked_advertising[i] = df_$advertising[i] + adstock_rate * df_$adstocked_advertising[i - lag_val]
  }
  return(df_)
}

adv_2 <- data.frame(advertising_dataset %>%
  group_by(Region) %>%
  do(foo(data.frame(.), lag_val = 3)))

> adv_2
    Region advertising adstocked_advertising
1      500     117.913            117.913000
2      500     120.112            120.112000
3      500     125.828            125.828000
4      500     115.354            174.310500
5      500     177.090            237.146000
6      500     141.647            204.561000
7      500     137.892            225.047250
8      500       0.000            118.573000
9      500       0.000            102.280500
10     500       0.000            112.523625