相对于R中特定条件的行位置
我有一个数据集,“运动员”在随机的“日期”进行“比赛”(“比赛”==1)。例如:相对于R中特定条件的行位置,r,R,我有一个数据集,“运动员”在随机的“日期”进行“比赛”(“比赛”==1)。例如: df <- data.frame(matrix(nrow = 80, ncol = 5)) colnames(df) <- c("Athlete", "Date", "Match", "DaysAfter", "DaysBefore") df[,"Athlete"] <- c(rep(1, 20), rep(2,20), rep(3, 20), rep(4, 20)) df[,"Date"] &l
df <- data.frame(matrix(nrow = 80, ncol = 5))
colnames(df) <- c("Athlete", "Date", "Match", "DaysAfter", "DaysBefore")
df[,"Athlete"] <- c(rep(1, 20), rep(2,20), rep(3, 20), rep(4, 20))
df[,"Date"] <- rep(1:20, 4)
df[,"Match"] <- c(0,0,0,0,1,0,0,1,0,0)
我怎样才能做到这一点 此代码应该可以工作:
unique_list<-(unique(df$Athlete))
for(k in (1:length(unique_list))){
index<-c(1:dim(df)[1])[df$Athlete==unique_list[k]]
count=NA
for(j in index){
if(df$Mat[j]==1){
count=0
}else{
count=count+1
}
df$DaysAfter[j]=count
}
count=NA
for(j in index[c(length(index):1)]){
if(df$Mat[j]==1){
count=0
}else{
count=count-1
}
df$DaysBefore[j]=count
}
}
unique\u list我们可以使用data.table
。将“data.frame”转换为“data.table”(setDT(df)
),按“运动员”和另一个基于1在“比赛”中的位置创建的分组变量进行分组(cumsum(Match==1)
),我们创建两列-
1) DA-由于我们需要NA
用于所有元素,直到'Match'中的第一个1,因此使用if/else
创建一个逻辑条件,以便all
在'Match'中为0的元素将乘以'NA'(NA*任何数字返回NA)。正如我们通过cumsum
进行分组一样,只有第一个组的所有元素都为0,因此该部分得到了解决。else
条件获取行序列并从中减去1(`.seq_len(.N)-1)
2) DB-我们将“匹配”与行数相乘(.N
),然后从相反的顺序中减去(.N:1
)。完成此操作后,最后一部分涉及为“Match”中最后一个1之后的列中的元素创建NA。按“运动员”分组,我们获得序列的行索引(.I
),从“匹配”(下一个元素)中的最后一个1到行数(.N
),并根据该索引将(:=
)DB分配给NA
library(data.table)
df1 <- setDT(df)[, c("DA", "DB") := list(if(all(!Match)) NA*Match else
seq_len(.N)-1,Match*(.N) -(.N:1)) , by = .(cumsum(Match==1), Athlete)]
df1[df1[, .I[(max(which(Match==1))+1):.N] , by = Athlete]$V1, DB:= NA][]
# Athlete Date Match DA DB
# 1: 1 1 0 NA -4
# 2: 1 2 0 NA -3
# 3: 1 3 0 NA -2
# 4: 1 4 0 NA -1
# 5: 1 5 1 0 0
# 6: 1 6 0 1 -2
# 7: 1 7 0 2 -1
# 8: 1 8 1 0 0
# 9: 1 9 0 1 -6
#10: 1 10 0 2 -5
#11: 1 11 0 3 -4
#12: 1 12 0 4 -3
#13: 1 13 0 5 -2
#14: 1 14 0 6 -1
#15: 1 15 1 0 0
#16: 1 16 0 1 -2
#17: 1 17 0 2 -1
#18: 1 18 1 0 0
#19: 1 19 0 1 NA
#20: 1 20 0 2 NA
#21: 2 1 0 NA -4
#22: 2 2 0 NA -3
#23: 2 3 0 NA -2
#24: 2 4 0 NA -1
#25: 2 5 1 0 0
#26: 2 6 0 1 -2
#27: 2 7 0 2 -1
#28: 2 8 1 0 0
#29: 2 9 0 1 -6
#30: 2 10 0 2 -5
#31: 2 11 0 3 -4
#32: 2 12 0 4 -3
#33: 2 13 0 5 -2
#34: 2 14 0 6 -1
#35: 2 15 1 0 0
#36: 2 16 0 1 -2
#37: 2 17 0 2 -1
#38: 2 18 1 0 0
#39: 2 19 0 1 NA
#40: 2 20 0 2 NA
#41: 3 1 0 NA -4
#42: 3 2 0 NA -3
#43: 3 3 0 NA -2
#44: 3 4 0 NA -1
#45: 3 5 1 0 0
#46: 3 6 0 1 -2
#47: 3 7 0 2 -1
#48: 3 8 1 0 0
#49: 3 9 0 1 -6
#50: 3 10 0 2 -5
#51: 3 11 0 3 -4
#52: 3 12 0 4 -3
#53: 3 13 0 5 -2
#54: 3 14 0 6 -1
#55: 3 15 1 0 0
#56: 3 16 0 1 -2
#57: 3 17 0 2 -1
#58: 3 18 1 0 0
#59: 3 19 0 1 NA
#60: 3 20 0 2 NA
#61: 4 1 0 NA -4
#62: 4 2 0 NA -3
#63: 4 3 0 NA -2
#64: 4 4 0 NA -1
#65: 4 5 1 0 0
#66: 4 6 0 1 -2
#67: 4 7 0 2 -1
#68: 4 8 1 0 0
#69: 4 9 0 1 -6
#70: 4 10 0 2 -5
#71: 4 11 0 3 -4
#72: 4 12 0 4 -3
#73: 4 13 0 5 -2
#74: 4 14 0 6 -1
#75: 4 15 1 0 0
#76: 4 16 0 1 -2
#77: 4 17 0 2 -1
#78: 4 18 1 0 0
#79: 4 19 0 1 NA
#80: 4 20 0 2 NA
库(data.table)
df1我曾经编写过以下函数:
cumsum.r <- function (vals, restart)
{
if (!is.vector(vals) || !is.vector(restart))
stop("expect vectors")
if (length(vals) != length(restart))
stop("different length")
len = length(vals)
restart[1] = T
ind = which(restart)
ind = rep(ind, c(ind[-1], len + 1) - ind)
vals.c = cumsum(vals)
vals.c - vals.c[ind] + vals[ind]
}
cumsum.r谢谢。几乎是对的。我需要为每个运动员分别运行代码,以便运动员之间没有重叠。有可能吗?这并不完全显示预期的结果。第19行和第20行的结果错误。NA是正确的19:11901NA1-2201202NA2-1211:210NA-4NA-422:20NA-3NA-3@Otto_K您能确认更新的输出是否正确吗
library(data.table)
df1 <- setDT(df)[, c("DA", "DB") := list(if(all(!Match)) NA*Match else
seq_len(.N)-1,Match*(.N) -(.N:1)) , by = .(cumsum(Match==1), Athlete)]
df1[df1[, .I[(max(which(Match==1))+1):.N] , by = Athlete]$V1, DB:= NA][]
# Athlete Date Match DA DB
# 1: 1 1 0 NA -4
# 2: 1 2 0 NA -3
# 3: 1 3 0 NA -2
# 4: 1 4 0 NA -1
# 5: 1 5 1 0 0
# 6: 1 6 0 1 -2
# 7: 1 7 0 2 -1
# 8: 1 8 1 0 0
# 9: 1 9 0 1 -6
#10: 1 10 0 2 -5
#11: 1 11 0 3 -4
#12: 1 12 0 4 -3
#13: 1 13 0 5 -2
#14: 1 14 0 6 -1
#15: 1 15 1 0 0
#16: 1 16 0 1 -2
#17: 1 17 0 2 -1
#18: 1 18 1 0 0
#19: 1 19 0 1 NA
#20: 1 20 0 2 NA
#21: 2 1 0 NA -4
#22: 2 2 0 NA -3
#23: 2 3 0 NA -2
#24: 2 4 0 NA -1
#25: 2 5 1 0 0
#26: 2 6 0 1 -2
#27: 2 7 0 2 -1
#28: 2 8 1 0 0
#29: 2 9 0 1 -6
#30: 2 10 0 2 -5
#31: 2 11 0 3 -4
#32: 2 12 0 4 -3
#33: 2 13 0 5 -2
#34: 2 14 0 6 -1
#35: 2 15 1 0 0
#36: 2 16 0 1 -2
#37: 2 17 0 2 -1
#38: 2 18 1 0 0
#39: 2 19 0 1 NA
#40: 2 20 0 2 NA
#41: 3 1 0 NA -4
#42: 3 2 0 NA -3
#43: 3 3 0 NA -2
#44: 3 4 0 NA -1
#45: 3 5 1 0 0
#46: 3 6 0 1 -2
#47: 3 7 0 2 -1
#48: 3 8 1 0 0
#49: 3 9 0 1 -6
#50: 3 10 0 2 -5
#51: 3 11 0 3 -4
#52: 3 12 0 4 -3
#53: 3 13 0 5 -2
#54: 3 14 0 6 -1
#55: 3 15 1 0 0
#56: 3 16 0 1 -2
#57: 3 17 0 2 -1
#58: 3 18 1 0 0
#59: 3 19 0 1 NA
#60: 3 20 0 2 NA
#61: 4 1 0 NA -4
#62: 4 2 0 NA -3
#63: 4 3 0 NA -2
#64: 4 4 0 NA -1
#65: 4 5 1 0 0
#66: 4 6 0 1 -2
#67: 4 7 0 2 -1
#68: 4 8 1 0 0
#69: 4 9 0 1 -6
#70: 4 10 0 2 -5
#71: 4 11 0 3 -4
#72: 4 12 0 4 -3
#73: 4 13 0 5 -2
#74: 4 14 0 6 -1
#75: 4 15 1 0 0
#76: 4 16 0 1 -2
#77: 4 17 0 2 -1
#78: 4 18 1 0 0
#79: 4 19 0 1 NA
#80: 4 20 0 2 NA
cumsum.r <- function (vals, restart)
{
if (!is.vector(vals) || !is.vector(restart))
stop("expect vectors")
if (length(vals) != length(restart))
stop("different length")
len = length(vals)
restart[1] = T
ind = which(restart)
ind = rep(ind, c(ind[-1], len + 1) - ind)
vals.c = cumsum(vals)
vals.c - vals.c[ind] + vals[ind]
}
new.ath <- c(TRUE, df$Ath[-1]==df$Ath[-length(df$Ath)])
restart <- df$Math==1 | new.ath
days.after <- cumsum.r(1-restart, restart)
rr <- rev(restart)
days.before <- -rev(cumsum.r(1-rr, rr))