R 在数据帧中替换组内不同数量的行
我有以下数据框:R 在数据帧中替换组内不同数量的行,r,replace,group-by,R,Replace,Group By,我有以下数据框: df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28), num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28))) df$val_adj <- df$val 然而,这似乎将num_adjustments计算到该列的第一行是什么(5,在我的例子中),然后将id==1的前5个值
df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28), num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28)))
df$val_adj <- df$val
然而,这似乎将num_adjustments
计算到该列的第一行是什么(5
,在我的例子中),然后将id==1
的前5个值重复到所有其他id
s
我非常感谢你对这件事的任何建议。提前多谢 当每个组的当前
行数()
小于等于num调整值时,我们可以按id
分组并添加val+adj
library(dplyr)
df %>%
group_by(id) %>%
mutate(val_adj = ifelse(row_number() <= num_adjustments, val + adj, val))
# id val num_adjustments adj val_adj
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0.114 5 0.5 0.614
# 2 1 0.622 5 0.5 1.12
# 3 1 0.609 5 0.5 1.11
# 4 1 0.623 5 0.5 1.12
# 5 1 0.861 5 0.5 1.36
# 6 1 0.640 5 0.5 0.640
# 7 1 0.00950 5 0.5 0.00950
# 8 2 0.233 4 0.5 0.733
# 9 2 0.666 4 0.5 1.17
#10 2 0.514 4 0.5 1.01
# … with 18 more rows
库(dplyr)
df%>%
分组依据(id)%>%
mutate(val_adj=ifelse(row_number()您可以使用ave()
获得逻辑向量x
,指示id
的行号不超过num\u adjustments
x
的位置也可以在LHS上使用
df$val_adj[x] <-
with(df, val + adj)[x <- with(df, ave(val_adj, id, FUN=seq_along)) <= df$num_adjustments]
head(df, 10)
# id adj num_adjustments val val_adj
# 1 1 0.5 1 0.005771755 0.50577176
# 2 1 0.5 1 -0.350911424 -0.35091142
# 3 1 0.5 1 -1.736723720 -1.73672372
# 4 1 0.5 1 0.046552388 0.04655239
# 5 1 0.5 1 -1.121484596 -1.12148460
# 6 2 0.5 1 -0.781560056 -0.28156006
# 7 2 0.5 1 0.849904248 0.84990425
# 8 3 0.5 2 1.763790903 2.26379090
# 9 3 0.5 2 0.845641647 1.34564165
# 10 3 0.5 2 -0.544836427 -0.54483643
基准数据
set.seed(42)
n我们可以使用数据表
library(data.table)
setDT(df)[, val_adj := val]
i1<- df[, .I[seq_len(.N) <= num_adjustments], id]$V1
df[i1, val_adj := val + adj]
库(data.table)
setDT(df)[,val_adj:=val]
i1
df$val_adj[x] <-
with(df, val + adj)[x <- with(df, ave(val_adj, id, FUN=seq_along)) <= df$num_adjustments]
head(df, 10)
# id adj num_adjustments val val_adj
# 1 1 0.5 1 0.005771755 0.50577176
# 2 1 0.5 1 -0.350911424 -0.35091142
# 3 1 0.5 1 -1.736723720 -1.73672372
# 4 1 0.5 1 0.046552388 0.04655239
# 5 1 0.5 1 -1.121484596 -1.12148460
# 6 2 0.5 1 -0.781560056 -0.28156006
# 7 2 0.5 1 0.849904248 0.84990425
# 8 3 0.5 2 1.763790903 2.26379090
# 9 3 0.5 2 0.845641647 1.34564165
# 10 3 0.5 2 -0.544836427 -0.54483643
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# dplyr 45.41649 46.09873 47.46352 46.78096 48.48704 50.19312 3 a
# ave 31.86809 34.95828 38.30649 38.04847 41.52569 45.00292 3 a
set.seed(42)
n <- 1e3
df <- do.call(rbind, lapply(1:n, function(i)
data.frame(id=i, adj=0.5, num_adjustments=rep(sample(1:(i-1), 1), sample(i:9, 1)))))
df <- transform(df, val=rnorm(nrow(df2)))
df$val_adj <- df$val
library(data.table)
setDT(df)[, val_adj := val]
i1<- df[, .I[seq_len(.N) <= num_adjustments], id]$V1
df[i1, val_adj := val + adj]
set.seed(1234)
df <- data.frame(id = c(rep(1, 7), rep(2, 12), rep(3, 9)), val = runif(28),
num_adjustments = c(rep(5, 7), rep(4, 12), rep(2, 9)), adj = c(rep(0.5,28)))
df$val_adj <- df$val