R 连接给定距离的二元区间
我有一系列由值1定义的分区,我需要通过将这些0值替换为1来连接空间小于两个单元格的分区。例如,单元格df[11,1]需要替换为1,单元格df[15:16,1]需要替换为1,而单元格df[21:23,1]应该保持为0R 连接给定距离的二元区间,r,R,我有一系列由值1定义的分区,我需要通过将这些0值替换为1来连接空间小于两个单元格的分区。例如,单元格df[11,1]需要替换为1,单元格df[15:16,1]需要替换为1,而单元格df[21:23,1]应该保持为0 > df <- data.frame("Zone" = 1:25) > df[1] <- 0 > df <- data.frame("Zone" = 1:25) > df[1] <- 0 > df[4:10,1] <-
> df <- data.frame("Zone" = 1:25)
> df[1] <- 0
> df <- data.frame("Zone" = 1:25)
> df[1] <- 0
> df[4:10,1] <- 1
> df[12:14,1] <- 1
> df[17:20,1] <- 1
> df[24:25,1] <- 1
> df
Zone
1 0
2 0
3 0
4 1
5 1
6 1
7 1
8 1
9 1
10 1
11 0
12 1
13 1
14 1
15 0
16 0
17 1
18 1
19 1
20 1
21 0
22 0
23 0
24 1
25 1
>df-df[1]df-df[1]df[4:10,1]df[12:14,1]df[17:20,1]df[24:25,1]df
区
1 0
2 0
3 0
4 1
5 1
6 1
7 1
8 1
9 1
10 1
11 0
12 1
13 1
14 1
15 0
16 0
17 1
18 1
19 1
20 1
21 0
22 0
23 0
24 1
25 1
使用基本R中的rle
,我们可以检查区域中的0值,如果其长度小于等于2,则将其更改为1
df$Zone[with(rle(df$Zone == 0), rep(values & lengths <= 2, lengths))] <- 1
df
# Zone
#1 0
#2 0
#3 0
#4 1
#5 1
#6 1
#7 1
#8 1
#9 1
#10 1
#11 1
#12 1
#13 1
#14 1
#15 1
#16 1
#17 1
#18 1
#19 1
#20 1
#21 0
#22 0
#23 0
#24 1
#25 1
df$Zone[with(rle(df$Zone==0),rep(values&length我们可以使用rleid
fromdata.table
根据列相邻元素的差异获取运行长度编码id,然后获取行索引(.I
)在满足条件的情况下,通过在i
中指定索引来更新“区域”,同时将“区域”更新为1
library(data.table)
i1 <- setDT(df)[, grp := rleid(Zone)][, .I[Zone == 0 & .N <=2], grp]$V1
df[i1, Zone := 1][, grp := NULL][]
# Zone
# 1: 0
# 2: 0
# 3: 0
# 4: 1
# 5: 1
# 6: 1
# 7: 1
# 8: 1
# 9: 1
#10: 1
#11: 1
#12: 1
#13: 1
#14: 1
#15: 1
#16: 1
#17: 1
#18: 1
#19: 1
#20: 1
#21: 0
#22: 0
#23: 0
#24: 1
#25: 1
或者使用rle/inverse.rle
frombase R
inverse.rle(within.list(rle(df$Zone), values[values== 0 & lengths <=2] <- 1))
#[1] 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1
inverse.rle(在.list内(rle(df$Zone),值[值==0&长度
library(dplyr)
df %>%
group_by(grp = cumsum(Zone != lag(Zone, default = first(Zone)))) %>%
mutate(Zone = case_when(Zone == 0 & n() <=2 ~ 1, TRUE ~Zone)) %>%
ungroup %>%
select(-grp)
inverse.rle(within.list(rle(df$Zone), values[values== 0 & lengths <=2] <- 1))
#[1] 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1
with(rle(df$Zone), +(rep((!values & lengths <=2)|values, lengths)))
df <- structure(list(Zone = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1)), row.names = c(NA, -25L),
class = "data.frame")