R 在数据帧中替换组内不同数量的行

R 在数据帧中替换组内不同数量的行,r,replace,group-by,R,Replace,Group By,我有以下数据框: df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28), num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28))) df$val_adj <- df$val 然而,这似乎将num_adjustments计算到该列的第一行是什么(5,在我的例子中),然后将id==1的前5个值

我有以下数据框:

df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28), num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28)))

df$val_adj <- df$val

然而,这似乎将
num_adjustments
计算到该列的第一行是什么(
5
,在我的例子中),然后将
id==1
的前5个值重复到所有其他
id
s


我非常感谢你对这件事的任何建议。提前多谢

当每个组的当前
行数()
小于等于
num调整值时,我们可以
id
分组并添加
val+adj

library(dplyr)

df %>%
  group_by(id) %>%
  mutate(val_adj = ifelse(row_number() <= num_adjustments, val + adj, val))

#      id     val num_adjustments   adj val_adj
#    <dbl>   <dbl>           <dbl> <dbl>   <dbl>
# 1     1 0.114                 5   0.5 0.614  
# 2     1 0.622                 5   0.5 1.12   
# 3     1 0.609                 5   0.5 1.11   
# 4     1 0.623                 5   0.5 1.12   
# 5     1 0.861                 5   0.5 1.36   
# 6     1 0.640                 5   0.5 0.640  
# 7     1 0.00950               5   0.5 0.00950
# 8     2 0.233                 4   0.5 0.733  
# 9     2 0.666                 4   0.5 1.17   
#10     2 0.514                 4   0.5 1.01   
# … with 18 more rows
库(dplyr)
df%>%
分组依据(id)%>%
mutate(val_adj=ifelse(row_number()您可以使用
ave()
获得逻辑向量
x
,指示
id
的行号不超过
num\u adjustments
x
的位置也可以在LHS上使用

df$val_adj[x] <- 
  with(df, val + adj)[x <- with(df, ave(val_adj, id, FUN=seq_along)) <= df$num_adjustments]

head(df, 10)
#    id adj num_adjustments          val     val_adj
# 1   1 0.5               1  0.005771755  0.50577176
# 2   1 0.5               1 -0.350911424 -0.35091142
# 3   1 0.5               1 -1.736723720 -1.73672372
# 4   1 0.5               1  0.046552388  0.04655239
# 5   1 0.5               1 -1.121484596 -1.12148460
# 6   2 0.5               1 -0.781560056 -0.28156006
# 7   2 0.5               1  0.849904248  0.84990425
# 8   3 0.5               2  1.763790903  2.26379090
# 9   3 0.5               2  0.845641647  1.34564165
# 10  3 0.5               2 -0.544836427 -0.54483643
基准数据
set.seed(42)

n我们可以使用
数据表

library(data.table)
setDT(df)[, val_adj := val]
i1<- df[, .I[seq_len(.N) <= num_adjustments], id]$V1
df[i1, val_adj := val + adj]
库(data.table)
setDT(df)[,val_adj:=val]
i1
df$val_adj[x] <- 
  with(df, val + adj)[x <- with(df, ave(val_adj, id, FUN=seq_along)) <= df$num_adjustments]

head(df, 10)
#    id adj num_adjustments          val     val_adj
# 1   1 0.5               1  0.005771755  0.50577176
# 2   1 0.5               1 -0.350911424 -0.35091142
# 3   1 0.5               1 -1.736723720 -1.73672372
# 4   1 0.5               1  0.046552388  0.04655239
# 5   1 0.5               1 -1.121484596 -1.12148460
# 6   2 0.5               1 -0.781560056 -0.28156006
# 7   2 0.5               1  0.849904248  0.84990425
# 8   3 0.5               2  1.763790903  2.26379090
# 9   3 0.5               2  0.845641647  1.34564165
# 10  3 0.5               2 -0.544836427 -0.54483643
# Unit: milliseconds
#  expr      min       lq     mean   median       uq      max neval cld
# dplyr 45.41649 46.09873 47.46352 46.78096 48.48704 50.19312     3   a
#   ave 31.86809 34.95828 38.30649 38.04847 41.52569 45.00292     3   a
set.seed(42)
n <- 1e3
df <- do.call(rbind, lapply(1:n, function(i) 
  data.frame(id=i, adj=0.5, num_adjustments=rep(sample(1:(i-1), 1), sample(i:9, 1)))))
df <- transform(df, val=rnorm(nrow(df2)))
df$val_adj <- df$val
library(data.table)
setDT(df)[, val_adj := val]
i1<- df[, .I[seq_len(.N) <= num_adjustments], id]$V1
df[i1, val_adj := val + adj]
set.seed(1234)
df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28), 
      num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28)))
df$val_adj <- df$val