R 连接给定距离的二元区间

R 连接给定距离的二元区间,r,R,我有一系列由值1定义的分区,我需要通过将这些0值替换为1来连接空间小于两个单元格的分区。例如,单元格df[11,1]需要替换为1,单元格df[15:16,1]需要替换为1,而单元格df[21:23,1]应该保持为0 > df <- data.frame("Zone" = 1:25) > df[1] <- 0 > df <- data.frame("Zone" = 1:25) > df[1] <- 0 > df[4:10,1] <-

我有一系列由值1定义的分区,我需要通过将这些0值替换为1来连接空间小于两个单元格的分区。例如,单元格df[11,1]需要替换为1,单元格df[15:16,1]需要替换为1,而单元格df[21:23,1]应该保持为0

> df <-  data.frame("Zone" = 1:25)
> df[1] <- 0
> df <-  data.frame("Zone" = 1:25)
> df[1] <- 0
> df[4:10,1] <- 1
> df[12:14,1] <- 1
> df[17:20,1] <- 1
> df[24:25,1] <- 1
> df
   Zone
1     0
2     0
3     0
4     1
5     1
6     1
7     1
8     1
9     1
10    1
11    0
12    1
13    1
14    1
15    0
16    0
17    1
18    1
19    1
20    1
21    0
22    0
23    0
24    1
25    1
>df-df[1]df-df[1]df[4:10,1]df[12:14,1]df[17:20,1]df[24:25,1]df
区
1     0
2     0
3     0
4     1
5     1
6     1
7     1
8     1
9     1
10    1
11    0
12    1
13    1
14    1
15    0
16    0
17    1
18    1
19    1
20    1
21    0
22    0
23    0
24    1
25    1

使用基本R中的
rle
,我们可以检查
区域中的0值,如果其长度小于等于2,则将其更改为1

df$Zone[with(rle(df$Zone == 0), rep(values & lengths <= 2, lengths))] <- 1
df

#   Zone
#1     0
#2     0
#3     0
#4     1
#5     1
#6     1
#7     1
#8     1
#9     1
#10    1
#11    1
#12    1
#13    1
#14    1
#15    1
#16    1
#17    1
#18    1
#19    1
#20    1
#21    0
#22    0
#23    0
#24    1
#25    1

df$Zone[with(rle(df$Zone==0),rep(values&length我们可以使用
rleid
from
data.table
根据列相邻元素的差异获取运行长度编码id,然后获取行索引(
.I
)在满足条件的情况下,通过在
i
中指定索引来更新“区域”,同时将“区域”更新为1

library(data.table)
i1 <- setDT(df)[, grp := rleid(Zone)][, .I[Zone == 0 & .N <=2], grp]$V1

df[i1, Zone := 1][, grp  := NULL][]
#    Zone
# 1:    0
# 2:    0
# 3:    0
# 4:    1
# 5:    1
# 6:    1
# 7:    1
# 8:    1
# 9:    1
#10:    1
#11:    1
#12:    1
#13:    1
#14:    1
#15:    1
#16:    1
#17:    1
#18:    1
#19:    1
#20:    1
#21:    0
#22:    0
#23:    0
#24:    1
#25:    1

或者使用
rle/inverse.rle
from
base R

inverse.rle(within.list(rle(df$Zone), values[values== 0 & lengths <=2] <- 1))
#[1] 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1
inverse.rle(在.list内(rle(df$Zone),值[值==0&长度
library(dplyr)
df %>%
   group_by(grp = cumsum(Zone != lag(Zone, default = first(Zone)))) %>%
    mutate(Zone = case_when(Zone == 0 & n() <=2 ~ 1, TRUE  ~Zone)) %>%
    ungroup %>%
    select(-grp) 
inverse.rle(within.list(rle(df$Zone), values[values== 0 & lengths <=2] <- 1))
#[1] 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1
with(rle(df$Zone), +(rep((!values & lengths <=2)|values, lengths)))
df <- structure(list(Zone = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 
1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1)), row.names = c(NA, -25L),
   class = "data.frame")