在r中查找上一个和下一个观察结果

在r中查找上一个和下一个观察结果,r,R,这是我的数据帧的头部,df在r中。任何行或列中都没有模式 Type SIZE V1 V2 A 1 5 7 B 1 NA NA B 3 NA NA B 4 NA NA A 8 2 4

这是我的数据帧的头部,df在r中。任何行或列中都没有模式

         Type SIZE      V1   V2    
          A    1        5    7      
          B    1        NA   NA
          B    3        NA   NA
          B    4        NA   NA     
          A    8        2    4      
          A    6        6    50      
          A    12       2    8       
          B    8        NA   NA      
          A    9        51   63       
          A    11       93   70  
对于df$Type==“B”的每一行,我想找到df$Type==“A”的上一行和下一行,然后提取它们的“V1”和“V2”

期望输出

         Type SIZE      V1   V2   V1_lag   V2_lag   V1_lead   V2_lead
          A    1        5    7      NA       NA        NA       NA
          B    1        NA   NA     5        7         2        4
          B    3        NA   NA     5        7         2        4
          B    4        NA   NA     5        7         2        4
          A    8        2    4      NA       NA        NA       NA
          A    6        6    50     NA       NA        NA       NA
          A    12       2    8      NA       NA        NA       NA
          B    8        NA   NA     2        8         51       63
          A    9        51   63     NA       NA        NA       NA  
          A    11       93   70     NA       NA        NA       NA

如果有人能在这方面提供帮助,非常感谢,

例如,首先将索引存储在
type
A
的位置。。e、 g

dat <- data.frame(type = c("A", "B", "B", "B", "A", "A", "A", "B", "A", "A"),
                  size = c(1, 1, 3, 4, 8, 6, 12, 8, 9, 11),
                  v1 = c(5, NA, NA, NA, 2, 6, 2, NA, 51, 93),
                  v2 = c(7, NA, NA, NA, 4, 50, 8, NA, 63, 70))

dat$idx <- 1:nrow(dat)
a_idx <- which(dat$type == "A")
b_idx <- which(dat$type == "B")
用这些数据

dat <- data.frame(
    type = c("A", "B", "B", "B", "A", "A", "A", "B", "A", "A"),
    size = c(1, 1, 3, 4, 8, 6, 12, 8, 9, 11),
    v1 = c(5, NA, NA, NA, 2, 6, 2, NA, 51, 93),
    v2 = c(7, NA, NA, NA, 4, 50, 8, NA, 63, 70),
    stringsAsFactors = FALSE
)

(即,1A、3B、3A、1B和2A)。滞后值的指数为

lag <- setdiff(
    cumsum(r$lengths)[r$values == "A"],
    nrow(dat)   # ignore "A" value at end of column
)
一个类似的故事在主角中上演

lead <- pmin(
    cumsum(r$lengths)[r$values == "B"] + 1L,
    nrow(dat)   # ignore "B" value at end of column
)
value <- rep(dat$v1[lead], r$length[r$value == "B"])
lead
> r
Run Length Encoding
  lengths: int [1:5] 1 3 3 1 2
  values : chr [1:5] "A" "B" "A" "B" "A"
lag <- setdiff(
    cumsum(r$lengths)[r$values == "A"],
    nrow(dat)   # ignore "A" value at end of column
)
value <- rep(dat$v1[lag], r$length[r$value == "B"])
lead <- pmin(
    cumsum(r$lengths)[r$values == "B"] + 1L,
    nrow(dat)   # ignore "B" value at end of column
)
value <- rep(dat$v1[lead], r$length[r$value == "B"])
mm <- function(df) {
    r <- rle(df$type)
    lag <- setdiff(cumsum(r$lengths)[r$values == "A"], nrow(df))
    lead <- pmin(cumsum(r$lengths)[r$values == "B"] + 1L, nrow(df))
    len <- r$length[r$value == "B"]

    idx <- df$type == "B"
    df$v1_lag[idx] <- rep(df$v1[lag], len)
    df$v2_lag[idx] <- rep(df$v2[lag], len)
    df$v1_lead[idx] <- rep(df$v1[lead], len)
    df$v2_lead[idx] <- rep(df$v2[lead], len)

    df
}