Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/81.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何将这个双索引for循环转换为一个有效的循环?_R_Loops_For Loop_Lapply_Mapply - Fatal编程技术网

如何将这个双索引for循环转换为一个有效的循环?

如何将这个双索引for循环转换为一个有效的循环?,r,loops,for-loop,lapply,mapply,R,Loops,For Loop,Lapply,Mapply,我对R和这里都是新手。我经常使用这个网站,但这次我陷入了一个无法解决的问题 我有这样一个数据框: df <- data.frame(id = c("A","A","B","B","B","C","D","D","D","D"), elapsed = c(10,15,5,7,20,4,3,4,15,16), val = c(5,8,1,2,4,6,7,9,8,3), acum = c(0,0,0,0,0,0,0,0

我对R和这里都是新手。我经常使用这个网站,但这次我陷入了一个无法解决的问题

我有这样一个数据框:

df <- data.frame(id = c("A","A","B","B","B","C","D","D","D","D"),
             elapsed = c(10,15,5,7,20,4,3,4,15,16),
             val = c(5,8,1,2,4,6,7,9,8,3),
             acum = c(0,0,0,0,0,0,0,0,0,0))

df考虑通过
id
组创建滞后变量,然后运行
ifelse()

#滞后组变量

df$lastpeased考虑通过
id
组创建滞后变量,然后运行
ifelse()

#滞后组变量

df$lastpeassed您可以根据每个id中的
eassed>5
标准创建一个新的分组变量,然后使用您喜欢的聚合工具

df <- data.frame(id = c("A","A","B","B","B","C","D","D","D","D"),
                 elapsed = c(10,15,5,7,20,4,3,4,15,16),
                 val = c(5,8,1,2,4,6,7,9,8,3),
                 acum = c(0,0,0,0,0,0,0,0,0,0))

within(df, {
  grp <- paste(id, ave(elapsed, id, FUN = function(x)
    cumsum(c(FALSE, diff(x) > 5))))
  acum <- ave(val, grp, FUN = cumsum)
})

#    id elapsed val acum grp
# 1   A      10   5    5 A.0
# 2   A      15   8   13 A.0
# 3   B       5   1    1 B.0
# 4   B       7   2    3 B.0
# 5   B      20   4    4 B.1
# 6   C       4   6    6 C.0
# 7   D       3   7    7 D.0
# 8   D       4   9   16 D.0
# 9   D      15   8    8 D.1
# 10  D      16   3   11 D.1

df您可以根据每个id中的
appead>5
标准创建一个新的分组变量,然后使用您喜欢的聚合工具

df <- data.frame(id = c("A","A","B","B","B","C","D","D","D","D"),
                 elapsed = c(10,15,5,7,20,4,3,4,15,16),
                 val = c(5,8,1,2,4,6,7,9,8,3),
                 acum = c(0,0,0,0,0,0,0,0,0,0))

within(df, {
  grp <- paste(id, ave(elapsed, id, FUN = function(x)
    cumsum(c(FALSE, diff(x) > 5))))
  acum <- ave(val, grp, FUN = cumsum)
})

#    id elapsed val acum grp
# 1   A      10   5    5 A.0
# 2   A      15   8   13 A.0
# 3   B       5   1    1 B.0
# 4   B       7   2    3 B.0
# 5   B      20   4    4 B.1
# 6   C       4   6    6 C.0
# 7   D       3   7    7 D.0
# 8   D       4   9   16 D.0
# 9   D      15   8    8 D.1
# 10  D      16   3   11 D.1

df带有
dplyr
,可能
df%>%groupby(id)%%>%mutate(acum=val+(lag(val,default=0)*ifelse(lag(persed,default=0)>=(persed-5),1,0))
几乎!这是一个很好的方法。但是在这里,它只考虑第一个滞后,并且可以在符合标准的高级滞后中出现行,并且应该将之相加。@ Aistista-请将您的评论作为可行的答案。但是@ SffcCI,您只提到一个滞后返回一行。您没有提到这些其他滞后。第一个值应该是0还是5?对于
dplyr
,可能
df%>%groupby(id)%%>%变异(acum=val+(滞后(val,默认值=0)*如果其他(滞后(经过,默认值=0)>=(经过-5),1,0))
!这是一个很好的方法。但是在这里,它只考虑第一个滞后,并且可以在符合标准的高级滞后中出现行,并且应该将之相加。@ Aistista-请将您的评论作为可行的答案。但是@ SffcCI,您只提到一个滞后返回一行。您没有提到这些其他滞后。第一个值应该是0还是5?@Khashaa,或者您可以简单地指出您可以使用
=
vs
。或使用任何其他滞后时间。我希望你把这个“失败”的案例写在日记里。这是多么伟大的胜利啊you@Khashaa或者你可以简单地指出,你可以使用
=
vs
。或使用任何其他滞后时间。我希望你把这个“失败”的案例写在日记里。你赢了,我想这是赢家。我试图把这个插入到for循环中,以便考虑高级顺序滞后,但是它似乎计算不可行(15K行运行超过一个小时)。然后,我将使用“次优”方法,并使用您的解决方案添加“last acumtest”而不是“lastvalue”。因此,如果第一个滞后的“经过时间”不大于5,我将该滞后的acumtest计算为当前值。谢谢你的帮助!伟大的很高兴你找到了解决办法。但是,请考虑“RAWR的DF解决方案和方法之间有趣的基准。我认为这是赢家。”我试图把这个插入到for循环中,以便考虑高级顺序滞后,但是它似乎计算不可行(15K行运行超过一个小时)。然后,我将使用“次优”方法,并使用您的解决方案添加“last acumtest”而不是“lastvalue”。因此,如果第一个滞后的“经过时间”不大于5,我将该滞后的acumtest计算为当前值。谢谢你的帮助!伟大的很高兴你找到了解决办法。但是,请考虑@ RAWR的DF解决方案和方法之间有趣的基准。
df <- data.frame(id = c("A","A","B","B","B","C","D","D","D","D"),
                 elapsed = c(10,15,5,7,20,4,3,4,15,16),
                 val = c(5,8,1,2,4,6,7,9,8,3),
                 acum = c(0,0,0,0,0,0,0,0,0,0))

within(df, {
  grp <- paste(id, ave(elapsed, id, FUN = function(x)
    cumsum(c(FALSE, diff(x) > 5))))
  acum <- ave(val, grp, FUN = cumsum)
})

#    id elapsed val acum grp
# 1   A      10   5    5 A.0
# 2   A      15   8   13 A.0
# 3   B       5   1    1 B.0
# 4   B       7   2    3 B.0
# 5   B      20   4    4 B.1
# 6   C       4   6    6 C.0
# 7   D       3   7    7 D.0
# 8   D       4   9   16 D.0
# 9   D      15   8    8 D.1
# 10  D      16   3   11 D.1
library('dplyr')
library('data.table')

rawr <- function(df) {
  df <- within(df, {
    grp <- paste(id, ave(elapsed, id, FUN = function(x)
      cumsum(c(FALSE, diff(x) > 5))))
    acum <- ave(val, grp, FUN = cumsum)
    })
  df
}

## shitty data table version, I'm sure it's wrong
## rest assured someone will point it out
rawr_dt <- function(df) {
  dt <- as.data.table(df)
  dt[, grp := cumsum(c(FALSE, diff(elapsed) > 5)), by = 'id'][, acum := cumsum(val), c('id', 'grp')]
  dt[, grp := NULL]
  dt
}

sfucci <- function(df) {
  for (i in 2:nrow(df)) {
    for(l in 0:nrow(df)) {
      if(l<i) {
        if (df[i,"id"]==df[i-l,"id"]) 
        {if (df[i,"elapsed"]-df[i-l,"elapsed"]<=5)
        {df$acum[i] <- df$acum[i]+df[i-l,"val"]}
        }
      } 
    }
  }
  df
}

Parfait <- function(df) {
  df$lastelapsed <- sapply(1:nrow(df), function(i) sum((df$id[i-1] == df$id[i]) * df$elapsed[i-1]))
  df$lastvalue <- sapply(1:nrow(df), function(i) sum((df$id[i-1] == df$id[i]) * df$val[i-1]))
  df$acumtest <- ifelse((df$elapsed - df$lastelapsed) <= 5, df$val + df$lastvalue, df$val)
  df
}
alistaire <- function(df) {
  df %>%
    group_by(id) %>%
    mutate(acum = val + (lag(val, default = 0) *
                           ifelse(lag(elapsed, default = 0) >= (elapsed - 5), 1, 0)))
}

acc <- rawr(df)$acum
identical(acc, rawr_dt(df)$acum)
# [1] TRUE
# identical(acc, sfucci(df)$acum)
identical(acc, Parfait(df)$acumtest)
# [1] TRUE
identical(acc, alistaire(df)$acum)
# [1] TRUE

library('microbenchmark')
microbenchmark(sfucci(df), rawr(df), rawr_dt(df), Parfait(df), alistaire(df), unit = 'relative')

# Unit: relative
#           expr       min       lq      mean   median        uq      max neval   cld
#     sfucci(df) 11.596961 9.990698 10.082249 9.952529 10.220162 5.603044   100     e
#       rawr(df)  1.000000 1.000000  1.000000 1.000000  1.000000 1.000000   100 a    
#    rawr_dt(df)  3.771649 3.483610  3.472160 3.436365  3.531379 1.945339   100    d 
#    Parfait(df)  3.392426 2.980234  3.008432 2.902410  3.006896 2.361832   100   c  
#  alistaire(df)  2.140693 2.042809  2.080444 2.028151  2.029965 2.638486   100  b