R 有没有办法确定重叠区域而不仅仅是边界的地理位置？_R_Sf_Sp

R 有没有办法确定重叠区域而不仅仅是边界的地理位置？

R 有没有办法确定重叠区域而不仅仅是边界的地理位置？,r,sf,sp,R,Sf,Sp,我试图获取一个邮政编码数据集，并将其限制为芝加哥境内的邮政编码。然而，我尝试的任何合并方式都会捕获太多或太少的邮政编码。下面是一个可复制的示例： ## Load packages library(tigris) library(sf) library(ggplot2) ## Load shapefiles ZIPs <- tigris::zctas(cb = TRUE) ZIPs <- sf::st_as_sf(ZIPs) places <- tigris::places

我试图获取一个邮政编码数据集，并将其限制为芝加哥境内的邮政编码。然而，我尝试的任何合并方式都会捕获太多或太少的邮政编码。下面是一个可复制的示例：

## Load packages
library(tigris)
library(sf)
library(ggplot2)

## Load shapefiles
ZIPs <- tigris::zctas(cb = TRUE) 
ZIPs <- sf::st_as_sf(ZIPs)

places <- tigris::places(state = "17", cb = T)
chicago <- places[places$NAME == "Chicago",]
chicago <- sf::st_as_sf(chicago)

## Filter ZIPs to those within Chicago using st_intersects
overlap <- st_filter(ZIPs, chicago, .predicate = st_intersects) #Using st_intersects captures too many ZIPs

## Visualize ZIPs vs Chicago
ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)

##加载包
图书馆（底格里斯）
图书馆（sf）
图书馆（GG2）
##加载形状文件
ZIPs我希望，比我知识渊博的人能给你一个更好的答案，以便更好地了解发生了什么。现在，我可以通过排除ZCTA来稍微改进一下，st_touch
返回TRUE。看来我们仍然有一些不受欢迎的ZCTA。您还可以评估每个ZCTA与芝加哥的交叉区域，以了解该区域是什么（以了解返回这些区域的原因）——在某些情况下，我们讨论的是非常多的重叠
overlap <- st_filter(ZIPs, chicago, .predicate = st_intersects)
overlap_extra <- st_filter(overlap, chicago, .predicate = st_touches)
nrow(overlap_extra) # Will remove 8 ZCTAs that are touching only
overlap_removed <- 
  overlap[-which(overlap$ZCTA5CE10 %in% overlap_extra$ZCTA5CE10), ]

ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = overlap_removed, color = "red", fill = "red", alpha = 0.2,
          size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)


area_intersections <- rep(NA, nrow(overlap_removed))
# Probably not the most efficient way of doing this -- 
for (i in seq(nrow(overlap_removed))) {
  area_intersections[i] <- 
    st_area(
      st_intersection(
        st_geometry(overlap_removed[i, ]), st_geometry(chicago)))
}
summary(area_intersections)
length(which(area_intersections < 1)) # 1 has less than 1m^2 overlap
length(which(area_intersections < 1000)) # 3 have less than 1km^2 overlap

# Small improvement -- if you really want to remove more ZCTAs
overlap_removed2 <- overlap_removed[-which(area_intersections < 1000), ]

ggplot() +
  geom_sf(data = overlap_removed, color = "black", size = 1) +
  geom_sf(data = overlap_removed2, color = "red", fill = "red", alpha = 0.2,
          size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)

overlap这里我提出了一个函数，可以根据相交区域和原始区域与阈值的比率过滤ZIPs
。下面是如何使用此函数的示例。似乎threshold=0.3
工作得很好，但您可以根据需要设置任何阈值
## Load packages
library(tigris)
library(sf)
library(ggplot2)
library(dplyr)

# A function that can filter ZIPs based on the ratio of intersected area to original area
# The default of the threshold is set to be 0.3
# If the ratio is larger than or equal to 0.3, that ZIPs would be kept
intersection_area <- function(x, y, threshold = 0.3){
  z <- st_intersection(x, y)
  z2 <- z %>% 
    mutate(Area_Inter = as.numeric(st_area(.))) %>%
    select(ZCTA5CE10, Area_Inter) %>%
    st_set_geometry(NULL)
  x2 <- x %>%
    st_filter(y, .predicate = st_intersects)  %>%
    mutate(Area = as.numeric(st_area(.))) %>%
    select(ZCTA5CE10, Area) %>%
    left_join(z2, by = "ZCTA5CE10") %>%
    mutate(Area_Ratio = Area_Inter/Area) %>%
    filter(Area_Ratio >= threshold)
  return(x2)
}

overlap <- intersection_area(ZIPs, chicago)

## Visualize ZIPs vs Chicago
ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)

##加载包
图书馆（底格里斯）
图书馆（sf）
图书馆（GG2）
图书馆（dplyr）
#一个可以根据相交面积与原始面积之比过滤拉链的函数
#阈值的默认值设置为0.3
#如果比率大于或等于0.3，则该拉链将保持不变
交叉口面积%
st_集_几何体（空）
x2%
st_过滤器（y，.谓词=st_相交）%>%
变异（面积=数值（st_面积（%））%>%
选择（ZCTA5CE10，面积）%>%
左联合（z2，by=“ZCTA5CE10”）%>%
突变（面积比=面积比/面积）%>%
过滤器（面积比>=阈值）
返回（x2）
}
重叠我找到了另一个选项，在st_filter

st_overlaps_with_threshold = function(x, y, threshold) {
  int = st_intersects(x, y)
  lapply(seq_along(int), function(ix)
    if (length(int[[ix]]))
        int[[ix]][which(as.numeric(suppressMessages(st_area(st_intersection(x[ix,], y[int[[ix]],])) / st_area(x[ix,]))) > threshold)]
    else
      integer(0)
  )
}

overlap <- st_filter(ZIPs, chicago, .predicate = st_overlaps_with_threshold, threshold = .05)

st_与_threshold=函数（x，y，threshold）重叠{
int=st_相交（x，y）
lapply（顺时针方向），函数（ix）
if（长度（int[[ix]]））
int[[ix]][其中（以数字形式显示）（st_区域（st_交叉点（x[ix，]），y[int[[ix]]，]）/st_区域（x[ix，]））>阈值]
其他的
整数（0）
)
}
重叠除了下面的注释之外，我发现这个线程也很有帮助（并展示了如何使用自定义谓词函数解决这个问题，如果这样更好的话）
st_overlaps_with_threshold = function(x, y, threshold) {
  int = st_intersects(x, y)
  lapply(seq_along(int), function(ix)
    if (length(int[[ix]]))
        int[[ix]][which(as.numeric(suppressMessages(st_area(st_intersection(x[ix,], y[int[[ix]],])) / st_area(x[ix,]))) > threshold)]
    else
      integer(0)
  )
}

overlap <- st_filter(ZIPs, chicago, .predicate = st_overlaps_with_threshold, threshold = .05)