R中带复位的两个求和条件
我有一个数据帧,它有两种类型的值。我想把它分成几组。 这两个小组预计将提供两个条件。每一组应该是:R中带复位的两个求和条件,r,reset,cumsum,R,Reset,Cumsum,我有一个数据帧,它有两种类型的值。我想把它分成几组。 这两个小组预计将提供两个条件。每一组应该是: 条件1:w的最大累积值这里是一个hack,通过重复子集和绑定完成。因此,对于大型数据集,这将非常缓慢。这将整个数据集作为输入 library(dplyr) cumsumdf <- function(df){ cumsum_75 <- function(x) {cumsum(x) %/% 76} cumsum_15 <- function(x) {cumsum(x) %
- 条件1:w的最大累积值这里是一个hack,通过重复子集和绑定完成。因此,对于大型数据集,这将非常缓慢。这将整个数据集作为输入
library(dplyr) cumsumdf <- function(df){ cumsum_75 <- function(x) {cumsum(x) %/% 76} cumsum_15 <- function(x) {cumsum(x) %/% 16} cumsum_w75 <- function(x) {cumsum(x) %% 76} cumsum_n15 <- function(x) {cumsum(x) %% 16} m <- nrow(df) df$grp <- 0 df <- df %>% group_by(grp) %>% mutate(cumsum_w = numeric(m), cumsum_n = numeric(m)) n = 0 df2 <- df[0,] while(nrow(df) >0 ){ df$cumsum_w = cumsum_75(df$w) df$cumsum_n = cumsum_15(df$n) n <- n + 1 df1 <- df[df$cumsum_n == 0 & df$cumsum_w == 0,] df <- df[df$cumsum_n != 0 | df$cumsum_w != 0,] df1$grp <- n df1 <- df1 %>% group_by(grp) %>% mutate(cumsum_w = cumsum_w75(w), cumsum_n = cumsum_n15(n)) df2 <- rbind(df2,df1) } return(df2) } cumsumdf(df)
360B排需要30分钟,那么库(dplyr)
cumsumdf是否始终为1?还有为什么会有第五组?到那时为止,你的
是45,w
是6。那么为什么要改变组呢?是的,n总是一。最后一个问题是我的错误。它的组别是4。我更新了它。谢谢。n
有一些复制粘贴错误。如果(…cumsum\u n\u和\u reset
中仍然有cumsum\u w
!
w cumsum_w n cumsum_n group 2 2 1 1 1 1 3 1 2 1 32 35 1 3 1 5 40 1 4 1 1 41 1 5 1 1 42 1 6 1 12 54 1 7 1 1 55 1 8 1 2 57 1 9 1 32 32 1 2 2 32 64 1 3 2 32 32 1 1 3 1 33 1 2 3 3 36 1 3 3 2 38 1 4 3 12 50 1 5 3 1 51 1 6 3 1 52 1 7 3 1 53 1 8 3 1 54 1 9 3 1 55 1 10 3 1 56 1 11 3 5 61 1 12 3 3 64 1 13 3 5 69 1 14 3 1 70 1 15 3 1 1 1 1 4 1 2 1 2 4 2 4 1 3 4 7 11 1 4 4 2 13 1 5 4 32 45 1 6 4 1 46 1 7 4
library(BBmisc) chunk(df, chunk.size = 75, n.chunks = 15) Error in chunk(df, chunk.size = 75, n.chunks = 15) : You must provide exactly one of 'chunk.size', 'n.chunks' or 'props'
cumsum_with_reset_group <- function(w, n, threshold_w, threshold_n) { cumsum_w <- 0 cumsum_n <- 0 group <- 1 result <- numeric() for (i in 1:length(w)) { cumsum_w <- cumsum_w + w[i] cumsum_n <- cumsum_n + n[i] if (cumsum_w > threshold_w | cumsum_n > threshold_n) { group <- group + 1 cumsum_w <- cumsum_w + w[i] cumsum_n <- cumsum_n + n[i] } result = c(result, group) } return (result) } # cumsum with reset cumsum_w_with_reset <- function(w, threshold_w) { cumsum_w <- 0 group <- 1 result <- numeric() for (i in 1:length(w)) { cumsum_w <- cumsum_w + w[i] if (cumsum_w > threshold_w) { group <- group + 1 cumsum_w <- w[i] } result = c(result, cumsum_w) } return (result) } # cumsum with reset cumsum_n_with_reset <- function(n, threshold_n) { cumsum_n <- 0 group <- 1 result <- numeric() for (i in 1:length(n)) { cumsum_n <- cumsum_n + n[i] if (cumsum_n > threshold_n | cumsum_w > threshold_w) { group <- group + 1 cumsum_n <- n[i] } result = c(result, cumsum_n) } return (result) } # use functions above as window functions inside mutate statement y<-df %>% group_by() %>% mutate( cumsum_w = cumsum_w_with_reset(w, 75), cumsum_n =cumsum_n_with_reset(n, 15), group = cumsum_with_reset_group(w, n, 75, 15) ) %>% ungroup() Error in mutate_impl(.data, dots) : Evaluation error: object 'cumsum_w' not found
library(dplyr) cumsumdf <- function(df){ cumsum_75 <- function(x) {cumsum(x) %/% 76} cumsum_15 <- function(x) {cumsum(x) %/% 16} cumsum_w75 <- function(x) {cumsum(x) %% 76} cumsum_n15 <- function(x) {cumsum(x) %% 16} m <- nrow(df) df$grp <- 0 df <- df %>% group_by(grp) %>% mutate(cumsum_w = numeric(m), cumsum_n = numeric(m)) n = 0 df2 <- df[0,] while(nrow(df) >0 ){ df$cumsum_w = cumsum_75(df$w) df$cumsum_n = cumsum_15(df$n) n <- n + 1 df1 <- df[df$cumsum_n == 0 & df$cumsum_w == 0,] df <- df[df$cumsum_n != 0 | df$cumsum_w != 0,] df1$grp <- n df1 <- df1 %>% group_by(grp) %>% mutate(cumsum_w = cumsum_w75(w), cumsum_n = cumsum_n15(n)) df2 <- rbind(df2,df1) } return(df2) } cumsumdf(df)