Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/file/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R中带复位的两个求和条件_R_Reset_Cumsum - Fatal编程技术网

R中带复位的两个求和条件

R中带复位的两个求和条件,r,reset,cumsum,R,Reset,Cumsum,我有一个数据帧,它有两种类型的值。我想把它分成几组。 这两个小组预计将提供两个条件。每一组应该是: 条件1:w的最大累积值这里是一个hack,通过重复子集和绑定完成。因此,对于大型数据集,这将非常缓慢。这将整个数据集作为输入 library(dplyr) cumsumdf <- function(df){ cumsum_75 <- function(x) {cumsum(x) %/% 76} cumsum_15 <- function(x) {cumsum(x) %

我有一个数据帧,它有两种类型的值。我想把它分成几组。 这两个小组预计将提供两个条件。每一组应该是:


  • 条件1:w的最大累积值这里是一个hack,通过重复子集和绑定完成。因此,对于大型数据集,这将非常缓慢。这将整个数据集作为输入

    library(dplyr)
    
    cumsumdf <- function(df){
      cumsum_75 <- function(x) {cumsum(x) %/% 76}
      cumsum_15 <- function(x) {cumsum(x) %/% 16}  
      cumsum_w75 <- function(x) {cumsum(x) %% 76}
      cumsum_n15 <- function(x) {cumsum(x) %% 16}
    
      m <- nrow(df)
    
      df$grp <- 0
      df <- df %>%
        group_by(grp) %>%
        mutate(cumsum_w = numeric(m), cumsum_n = numeric(m))
    
      n = 0
      df2 <- df[0,]
      while(nrow(df) >0 ){
        df$cumsum_w = cumsum_75(df$w)
        df$cumsum_n = cumsum_15(df$n)
    
        n <- n + 1
        df1 <- df[df$cumsum_n == 0 & df$cumsum_w == 0,]
        df <- df[df$cumsum_n != 0 | df$cumsum_w != 0,]
        df1$grp <- n  
        df1 <- df1 %>% group_by(grp) %>%
          mutate(cumsum_w = cumsum_w75(w), cumsum_n = cumsum_n15(n))
        df2 <- rbind(df2,df1)
      }
      return(df2)
    }
    
    cumsumdf(df)
    
    库(dplyr)
    
    cumsumdf是否始终为1?还有为什么会有第五组?到那时为止,你的
    w
    是45,
    n
    是6。那么为什么要改变组呢?是的,n总是一。最后一个问题是我的错误。它的组别是4。我更新了它。谢谢。
    cumsum\u n\u和\u reset
    有一些复制粘贴错误。如果(…
    360B排需要30分钟,那么
    cumsum\u w
    中仍然有
    w cumsum_w n cumsum_n group
    2     2     1   1   1
    1     3     1   2   1
    32    35    1   3   1
    5     40    1   4   1
    1     41    1   5   1
    1     42    1   6   1
    12    54    1   7   1
    1     55    1   8   1
    2     57    1   9   1
    32    32    1   2   2
    32    64    1   3   2
    32    32    1   1   3
    1     33    1   2   3
    3     36    1   3   3
    2     38    1   4   3
    12    50    1   5   3
    1     51    1   6   3
    1     52    1   7   3
    1     53    1   8   3
    1     54    1   9   3
    1     55    1   10  3
    1     56    1   11  3
    5     61    1   12  3
    3     64    1   13  3
    5     69    1   14  3
    1     70    1   15  3
    1     1     1   1   4
    1     2     1   2   4
    2     4     1   3   4
    7     11    1   4   4
    2     13    1   5   4
    32    45    1   6   4
    1     46    1   7   4
    
    library(BBmisc)
    chunk(df, chunk.size = 75, n.chunks = 15)
    Error in chunk(df, chunk.size = 75, n.chunks = 15) : 
      You must provide exactly one of 'chunk.size', 'n.chunks' or 'props'
    
    cumsum_with_reset_group <- function(w, n, threshold_w, threshold_n) {
      cumsum_w <- 0
      cumsum_n <- 0
      group <- 1
      result <- numeric()
    
      for (i in 1:length(w)) {
        cumsum_w <- cumsum_w + w[i]
        cumsum_n <- cumsum_n + n[i]
    
        if (cumsum_w > threshold_w | cumsum_n > threshold_n) {
          group <- group + 1
          cumsum_w <- cumsum_w + w[i]
          cumsum_n <- cumsum_n + n[i]
        }
    
        result = c(result, group)
    
      }
    
      return (result)
    }
    
    # cumsum with reset
    cumsum_w_with_reset <- function(w, threshold_w) {
      cumsum_w <- 0
      group <- 1
      result <- numeric()
    
      for (i in 1:length(w)) {
        cumsum_w <- cumsum_w + w[i]
    
        if (cumsum_w > threshold_w) {
          group <- group + 1
          cumsum_w <- w[i]
        }
    
        result = c(result, cumsum_w)
    
      }
    
      return (result)
    }
    
    
    # cumsum with reset
    cumsum_n_with_reset <- function(n, threshold_n) {
      cumsum_n <- 0
      group <- 1
      result <- numeric()
    
      for (i in 1:length(n)) {
        cumsum_n <- cumsum_n + n[i]
    
        if (cumsum_n > threshold_n | cumsum_w > threshold_w) {
          group <- group + 1
          cumsum_n <- n[i]
        }
    
        result = c(result, cumsum_n)
    
      }
    
      return (result)
    }
    # use functions above as window functions inside mutate statement
    y<-df %>% group_by() %>%
      mutate(
        cumsum_w = cumsum_w_with_reset(w, 75),
        cumsum_n  =cumsum_n_with_reset(n, 15),
        group = cumsum_with_reset_group(w, n, 75, 15)
      ) %>% 
      ungroup()
    
        Error in mutate_impl(.data, dots) : 
          Evaluation error: object 'cumsum_w' not found
    
    library(dplyr)
    
    cumsumdf <- function(df){
      cumsum_75 <- function(x) {cumsum(x) %/% 76}
      cumsum_15 <- function(x) {cumsum(x) %/% 16}  
      cumsum_w75 <- function(x) {cumsum(x) %% 76}
      cumsum_n15 <- function(x) {cumsum(x) %% 16}
    
      m <- nrow(df)
    
      df$grp <- 0
      df <- df %>%
        group_by(grp) %>%
        mutate(cumsum_w = numeric(m), cumsum_n = numeric(m))
    
      n = 0
      df2 <- df[0,]
      while(nrow(df) >0 ){
        df$cumsum_w = cumsum_75(df$w)
        df$cumsum_n = cumsum_15(df$n)
    
        n <- n + 1
        df1 <- df[df$cumsum_n == 0 & df$cumsum_w == 0,]
        df <- df[df$cumsum_n != 0 | df$cumsum_w != 0,]
        df1$grp <- n  
        df1 <- df1 %>% group_by(grp) %>%
          mutate(cumsum_w = cumsum_w75(w), cumsum_n = cumsum_n15(n))
        df2 <- rbind(df2,df1)
      }
      return(df2)
    }
    
    cumsumdf(df)