相对于R中特定条件的行位置

相对于R中特定条件的行位置,r,R,我有一个数据集,“运动员”在随机的“日期”进行“比赛”(“比赛”==1)。例如: df <- data.frame(matrix(nrow = 80, ncol = 5)) colnames(df) <- c("Athlete", "Date", "Match", "DaysAfter", "DaysBefore") df[,"Athlete"] <- c(rep(1, 20), rep(2,20), rep(3, 20), rep(4, 20)) df[,"Date"] &l

我有一个数据集,“运动员”在随机的“日期”进行“比赛”(“比赛”==1)。例如:

df <- data.frame(matrix(nrow = 80, ncol = 5))
colnames(df) <- c("Athlete", "Date", "Match", "DaysAfter", "DaysBefore")
df[,"Athlete"] <- c(rep(1, 20), rep(2,20), rep(3, 20), rep(4, 20))
df[,"Date"] <- rep(1:20, 4)
df[,"Match"] <- c(0,0,0,0,1,0,0,1,0,0)
我怎样才能做到这一点

此代码应该可以工作:

   unique_list<-(unique(df$Athlete))
for(k in (1:length(unique_list))){
index<-c(1:dim(df)[1])[df$Athlete==unique_list[k]]
count=NA
for(j in index){
  if(df$Mat[j]==1){
       count=0
      }else{
  count=count+1
  }
  df$DaysAfter[j]=count
  }
  count=NA
  for(j in index[c(length(index):1)]){
  if(df$Mat[j]==1){
       count=0
      }else{
  count=count-1
  }
  df$DaysBefore[j]=count
  }

}

unique\u list我们可以使用
data.table
。将“data.frame”转换为“data.table”(
setDT(df)
),按“运动员”和另一个基于1在“比赛”中的位置创建的分组变量进行分组(
cumsum(Match==1)
),我们创建两列-

1) DA-由于我们需要
NA
用于所有元素,直到'Match'中的第一个1,因此使用
if/else
创建一个逻辑条件,以便
all
在'Match'中为0的元素将乘以'NA'(NA*任何数字返回NA)。正如我们通过
cumsum
进行分组一样,只有第一个组的所有元素都为0,因此该部分得到了解决。
else
条件获取行序列并从中减去1(`.seq_len(.N)-1)

2) DB-我们将“匹配”与行数相乘(
.N
),然后从相反的顺序中减去(
.N:1
)。完成此操作后,最后一部分涉及为“Match”中最后一个1之后的列中的元素创建NA。按“运动员”分组,我们获得序列的行索引(
.I
),从“匹配”(下一个元素)中的最后一个1到行数(
.N
),并根据该索引将(
:=
)DB分配给NA

library(data.table)
df1 <- setDT(df)[, c("DA", "DB") := list(if(all(!Match)) NA*Match else 
   seq_len(.N)-1,Match*(.N) -(.N:1)) , by = .(cumsum(Match==1), Athlete)]
df1[df1[,  .I[(max(which(Match==1))+1):.N] , by = Athlete]$V1,  DB:= NA][]
#    Athlete Date Match DA DB
# 1:       1    1     0 NA -4
# 2:       1    2     0 NA -3
# 3:       1    3     0 NA -2
# 4:       1    4     0 NA -1
# 5:       1    5     1  0  0
# 6:       1    6     0  1 -2
# 7:       1    7     0  2 -1
# 8:       1    8     1  0  0
# 9:       1    9     0  1 -6
#10:       1   10     0  2 -5
#11:       1   11     0  3 -4
#12:       1   12     0  4 -3
#13:       1   13     0  5 -2
#14:       1   14     0  6 -1
#15:       1   15     1  0  0
#16:       1   16     0  1 -2
#17:       1   17     0  2 -1
#18:       1   18     1  0  0
#19:       1   19     0  1 NA
#20:       1   20     0  2 NA
#21:       2    1     0 NA -4
#22:       2    2     0 NA -3
#23:       2    3     0 NA -2
#24:       2    4     0 NA -1
#25:       2    5     1  0  0
#26:       2    6     0  1 -2
#27:       2    7     0  2 -1
#28:       2    8     1  0  0
#29:       2    9     0  1 -6
#30:       2   10     0  2 -5
#31:       2   11     0  3 -4
#32:       2   12     0  4 -3
#33:       2   13     0  5 -2
#34:       2   14     0  6 -1
#35:       2   15     1  0  0
#36:       2   16     0  1 -2
#37:       2   17     0  2 -1
#38:       2   18     1  0  0
#39:       2   19     0  1 NA
#40:       2   20     0  2 NA
#41:       3    1     0 NA -4
#42:       3    2     0 NA -3
#43:       3    3     0 NA -2
#44:       3    4     0 NA -1
#45:       3    5     1  0  0
#46:       3    6     0  1 -2
#47:       3    7     0  2 -1
#48:       3    8     1  0  0
#49:       3    9     0  1 -6
#50:       3   10     0  2 -5
#51:       3   11     0  3 -4
#52:       3   12     0  4 -3
#53:       3   13     0  5 -2
#54:       3   14     0  6 -1
#55:       3   15     1  0  0
#56:       3   16     0  1 -2
#57:       3   17     0  2 -1
#58:       3   18     1  0  0
#59:       3   19     0  1 NA
#60:       3   20     0  2 NA
#61:       4    1     0 NA -4
#62:       4    2     0 NA -3
#63:       4    3     0 NA -2
#64:       4    4     0 NA -1
#65:       4    5     1  0  0
#66:       4    6     0  1 -2
#67:       4    7     0  2 -1
#68:       4    8     1  0  0
#69:       4    9     0  1 -6
#70:       4   10     0  2 -5
#71:       4   11     0  3 -4
#72:       4   12     0  4 -3
#73:       4   13     0  5 -2
#74:       4   14     0  6 -1
#75:       4   15     1  0  0
#76:       4   16     0  1 -2
#77:       4   17     0  2 -1
#78:       4   18     1  0  0
#79:       4   19     0  1 NA
#80:       4   20     0  2 NA
库(data.table)

df1我曾经编写过以下函数:

cumsum.r <- function (vals, restart) 
{
    if (!is.vector(vals) || !is.vector(restart)) 
        stop("expect vectors")
    if (length(vals) != length(restart)) 
        stop("different length")
    len = length(vals)
    restart[1] = T
    ind = which(restart)
    ind = rep(ind, c(ind[-1], len + 1) - ind)
    vals.c = cumsum(vals)
    vals.c - vals.c[ind] + vals[ind]
}

cumsum.r谢谢。几乎是对的。我需要为每个运动员分别运行代码,以便运动员之间没有重叠。有可能吗?这并不完全显示预期的结果。第19行和第20行的结果错误。NA是正确的19:11901NA1-2201202NA2-1211:210NA-4NA-422:20NA-3NA-3@Otto_K您能确认更新的输出是否正确吗
library(data.table)
df1 <- setDT(df)[, c("DA", "DB") := list(if(all(!Match)) NA*Match else 
   seq_len(.N)-1,Match*(.N) -(.N:1)) , by = .(cumsum(Match==1), Athlete)]
df1[df1[,  .I[(max(which(Match==1))+1):.N] , by = Athlete]$V1,  DB:= NA][]
#    Athlete Date Match DA DB
# 1:       1    1     0 NA -4
# 2:       1    2     0 NA -3
# 3:       1    3     0 NA -2
# 4:       1    4     0 NA -1
# 5:       1    5     1  0  0
# 6:       1    6     0  1 -2
# 7:       1    7     0  2 -1
# 8:       1    8     1  0  0
# 9:       1    9     0  1 -6
#10:       1   10     0  2 -5
#11:       1   11     0  3 -4
#12:       1   12     0  4 -3
#13:       1   13     0  5 -2
#14:       1   14     0  6 -1
#15:       1   15     1  0  0
#16:       1   16     0  1 -2
#17:       1   17     0  2 -1
#18:       1   18     1  0  0
#19:       1   19     0  1 NA
#20:       1   20     0  2 NA
#21:       2    1     0 NA -4
#22:       2    2     0 NA -3
#23:       2    3     0 NA -2
#24:       2    4     0 NA -1
#25:       2    5     1  0  0
#26:       2    6     0  1 -2
#27:       2    7     0  2 -1
#28:       2    8     1  0  0
#29:       2    9     0  1 -6
#30:       2   10     0  2 -5
#31:       2   11     0  3 -4
#32:       2   12     0  4 -3
#33:       2   13     0  5 -2
#34:       2   14     0  6 -1
#35:       2   15     1  0  0
#36:       2   16     0  1 -2
#37:       2   17     0  2 -1
#38:       2   18     1  0  0
#39:       2   19     0  1 NA
#40:       2   20     0  2 NA
#41:       3    1     0 NA -4
#42:       3    2     0 NA -3
#43:       3    3     0 NA -2
#44:       3    4     0 NA -1
#45:       3    5     1  0  0
#46:       3    6     0  1 -2
#47:       3    7     0  2 -1
#48:       3    8     1  0  0
#49:       3    9     0  1 -6
#50:       3   10     0  2 -5
#51:       3   11     0  3 -4
#52:       3   12     0  4 -3
#53:       3   13     0  5 -2
#54:       3   14     0  6 -1
#55:       3   15     1  0  0
#56:       3   16     0  1 -2
#57:       3   17     0  2 -1
#58:       3   18     1  0  0
#59:       3   19     0  1 NA
#60:       3   20     0  2 NA
#61:       4    1     0 NA -4
#62:       4    2     0 NA -3
#63:       4    3     0 NA -2
#64:       4    4     0 NA -1
#65:       4    5     1  0  0
#66:       4    6     0  1 -2
#67:       4    7     0  2 -1
#68:       4    8     1  0  0
#69:       4    9     0  1 -6
#70:       4   10     0  2 -5
#71:       4   11     0  3 -4
#72:       4   12     0  4 -3
#73:       4   13     0  5 -2
#74:       4   14     0  6 -1
#75:       4   15     1  0  0
#76:       4   16     0  1 -2
#77:       4   17     0  2 -1
#78:       4   18     1  0  0
#79:       4   19     0  1 NA
#80:       4   20     0  2 NA
cumsum.r <- function (vals, restart) 
{
    if (!is.vector(vals) || !is.vector(restart)) 
        stop("expect vectors")
    if (length(vals) != length(restart)) 
        stop("different length")
    len = length(vals)
    restart[1] = T
    ind = which(restart)
    ind = rep(ind, c(ind[-1], len + 1) - ind)
    vals.c = cumsum(vals)
    vals.c - vals.c[ind] + vals[ind]
}
new.ath <- c(TRUE, df$Ath[-1]==df$Ath[-length(df$Ath)])
restart <- df$Math==1 |  new.ath
days.after <- cumsum.r(1-restart, restart)
rr <- rev(restart)
days.before <- -rev(cumsum.r(1-rr, rr))