R 基于跨行字符序列的子集数据_R

R 基于跨行字符序列的子集数据

R 基于跨行字符序列的子集数据,r,R,如何通过连续的字符行模式来子集df？在下面的示例中，我想将历史值连续为“真”、“假”、“真”的数据子集。下面的数据有点奇怪，但你明白了 value <- c(1/1/16,1/2/16, 1/3/16, 1/4/16, 1/5/16, 1/6/16, 1/7/16, 1/8/16, 1/9/16, 1/10/16) history <- c("TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "TRUE", "FALS

如何通过连续的字符行模式来子集df？在下面的示例中，我想将历史值连续为“真”、“假”、“真”的数据子集。下面的数据有点奇怪，但你明白了

value <- c(1/1/16,1/2/16, 1/3/16, 1/4/16, 1/5/16, 1/6/16, 1/7/16, 1/8/16, 1/9/16, 1/10/16)

history <- c("TRUE", "FALSE", "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "TRUE", "FALSE", "TRUE")

df <- data.frame(value, history)
df

         value history  
1  0.062500000    TRUE  
2  0.031250000   FALSE  
3  0.020833333    TRUE  
4  0.015625000    TRUE  
5  0.012500000   FALSE  
6  0.010416667    TRUE  
7  0.008928571    TRUE  
8  0.007812500    TRUE  
9  0.006944444   FALSE  
10 0.006250000    TRUE

value你可以做
s = c("TRUE", "FALSE", "TRUE")

library(data.table)
w = as.data.table(embed(history, length(s)))[as.list(s), on=paste0("V", seq_along(s)), which=TRUE]

df$v <- FALSE
df$v[w + rep(seq_along(s)-1L, each=length(s))] <- TRUE

         value history     v
1  0.062500000    TRUE  TRUE
2  0.031250000   FALSE  TRUE
3  0.020833333    TRUE  TRUE
4  0.015625000    TRUE  TRUE
5  0.012500000   FALSE  TRUE
6  0.010416667    TRUE  TRUE
7  0.008928571    TRUE FALSE
8  0.007812500    TRUE  TRUE
9  0.006944444   FALSE  TRUE
10 0.006250000    TRUE  TRUE

w+rep（…）
与@GGrothendieck的outer（…）
相同，只是这里w
包含匹配开始的位置，而不是结束的位置。
问题中的数据看起来很奇怪，因此我们使用了结尾注释中的数据。如果您确实有一个值为“真”和“假”的字符向量或因子，则可以使用以下方法将其转换为逻辑：
df <- transform(df, history = history == "TRUE")

给予：
         value history
1  0.062500000    TRUE
2  0.031250000   FALSE
3  0.020833333    TRUE
4  0.015625000    TRUE
5  0.012500000   FALSE
6  0.010416667    TRUE
8  0.007812500    TRUE
9  0.006944444   FALSE
10 0.006250000    TRUE

1a）magrittr可以使用magrittr表示（1）中的代码。（解决方案（2）也可以使用magrittr表达，如下所示。）
2）gregexpr使用上面定义的模式
我们将其转换为0和1的字符串，并将df$history转换为这样的字符串。然后，我们可以使用gregexpr找到每个匹配的第一个元素的索引，然后将其扩展到所有索引和子集。我们得到了和以前一样的答案。此替代方案不使用包
collapse <- function(x) paste0(x + 0, collapse = "")
ix <- gregexpr(collapse(pattern), collapse(df$history))[[1]]
ix <- unique(sort(c(outer(ix, seq_along(pattern) - 1L, "+"))))
df[ix, ]

使用滞后折叠选项：
    df <- data.frame(value, history)

    n<- grepl("TRUE, FALSE, TRUE", paste(lag(lag(history)), (lag(history)), history, sep = ", "))[-(1:2)]

    cond <- n |lag(n)|lag(lag(n)) 
    cond <- c(cond, cond[length(history)-2], cond[length(history)-2])
    df[cond, ]

df，如果使用基本Rlag
则给出0行，但是如果我们添加library（dplyr）
它将起作用。使用嵌入的不错选项
library(magrittr)
library(zoo)

df %>%
  extract(
   extract(.,, "history") %>%
   rollapplyr(length(pattern), identical, pattern, fill = FALSE) %>%
   which %>%
   outer(seq_along(pattern) - 1L, "-") %>%
   sort %>%
   unique, )

collapse <- function(x) paste0(x + 0, collapse = "")
ix <- gregexpr(collapse(pattern), collapse(df$history))[[1]]
ix <- unique(sort(c(outer(ix, seq_along(pattern) - 1L, "+"))))
df[ix, ]

Lines <- "
         value history  
1  0.062500000    TRUE  
2  0.031250000   FALSE  
3  0.020833333    TRUE  
4  0.015625000    TRUE  
5  0.012500000   FALSE  
6  0.010416667    TRUE  
7  0.008928571    TRUE  
8  0.007812500    TRUE  
9  0.006944444   FALSE  
10 0.006250000    TRUE"
df <- read.table(text = Lines)

    df <- data.frame(value, history)

    n<- grepl("TRUE, FALSE, TRUE", paste(lag(lag(history)), (lag(history)), history, sep = ", "))[-(1:2)]

    cond <- n |lag(n)|lag(lag(n)) 
    cond <- c(cond, cond[length(history)-2], cond[length(history)-2])
    df[cond, ]