R 有没有办法确定重叠区域而不仅仅是边界的地理位置?

R 有没有办法确定重叠区域而不仅仅是边界的地理位置?,r,sf,sp,R,Sf,Sp,我试图获取一个邮政编码数据集,并将其限制为芝加哥境内的邮政编码。然而,我尝试的任何合并方式都会捕获太多或太少的邮政编码。下面是一个可复制的示例: ## Load packages library(tigris) library(sf) library(ggplot2) ## Load shapefiles ZIPs <- tigris::zctas(cb = TRUE) ZIPs <- sf::st_as_sf(ZIPs) places <- tigris::places

我试图获取一个邮政编码数据集,并将其限制为芝加哥境内的邮政编码。然而,我尝试的任何合并方式都会捕获太多或太少的邮政编码。下面是一个可复制的示例:

## Load packages
library(tigris)
library(sf)
library(ggplot2)

## Load shapefiles
ZIPs <- tigris::zctas(cb = TRUE) 
ZIPs <- sf::st_as_sf(ZIPs)

places <- tigris::places(state = "17", cb = T)
chicago <- places[places$NAME == "Chicago",]
chicago <- sf::st_as_sf(chicago)

## Filter ZIPs to those within Chicago using st_intersects
overlap <- st_filter(ZIPs, chicago, .predicate = st_intersects) #Using st_intersects captures too many ZIPs

## Visualize ZIPs vs Chicago
ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)
##加载包
图书馆(底格里斯)
图书馆(sf)
图书馆(GG2)
##加载形状文件

ZIPs我希望,比我知识渊博的人能给你一个更好的答案,以便更好地了解发生了什么。现在,我可以通过排除ZCTA来稍微改进一下,
st_touch
返回TRUE。看来我们仍然有一些不受欢迎的ZCTA。您还可以评估每个ZCTA与芝加哥的交叉区域,以了解该区域是什么(以了解返回这些区域的原因)——在某些情况下,我们讨论的是非常多的重叠

overlap <- st_filter(ZIPs, chicago, .predicate = st_intersects)
overlap_extra <- st_filter(overlap, chicago, .predicate = st_touches)
nrow(overlap_extra) # Will remove 8 ZCTAs that are touching only
overlap_removed <- 
  overlap[-which(overlap$ZCTA5CE10 %in% overlap_extra$ZCTA5CE10), ]

ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = overlap_removed, color = "red", fill = "red", alpha = 0.2,
          size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)


area_intersections <- rep(NA, nrow(overlap_removed))
# Probably not the most efficient way of doing this -- 
for (i in seq(nrow(overlap_removed))) {
  area_intersections[i] <- 
    st_area(
      st_intersection(
        st_geometry(overlap_removed[i, ]), st_geometry(chicago)))
}
summary(area_intersections)
length(which(area_intersections < 1)) # 1 has less than 1m^2 overlap
length(which(area_intersections < 1000)) # 3 have less than 1km^2 overlap

# Small improvement -- if you really want to remove more ZCTAs
overlap_removed2 <- overlap_removed[-which(area_intersections < 1000), ]

ggplot() +
  geom_sf(data = overlap_removed, color = "black", size = 1) +
  geom_sf(data = overlap_removed2, color = "red", fill = "red", alpha = 0.2,
          size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)

overlap这里我提出了一个函数,可以根据相交区域和原始区域与阈值的比率过滤
ZIPs
。下面是如何使用此函数的示例。似乎
threshold=0.3
工作得很好,但您可以根据需要设置任何阈值

## Load packages
library(tigris)
library(sf)
library(ggplot2)
library(dplyr)

# A function that can filter ZIPs based on the ratio of intersected area to original area
# The default of the threshold is set to be 0.3
# If the ratio is larger than or equal to 0.3, that ZIPs would be kept
intersection_area <- function(x, y, threshold = 0.3){
  z <- st_intersection(x, y)
  z2 <- z %>% 
    mutate(Area_Inter = as.numeric(st_area(.))) %>%
    select(ZCTA5CE10, Area_Inter) %>%
    st_set_geometry(NULL)
  x2 <- x %>%
    st_filter(y, .predicate = st_intersects)  %>%
    mutate(Area = as.numeric(st_area(.))) %>%
    select(ZCTA5CE10, Area) %>%
    left_join(z2, by = "ZCTA5CE10") %>%
    mutate(Area_Ratio = Area_Inter/Area) %>%
    filter(Area_Ratio >= threshold)
  return(x2)
}

overlap <- intersection_area(ZIPs, chicago)

## Visualize ZIPs vs Chicago
ggplot() +
  geom_sf(data = overlap, color = "black", size = 1) +
  geom_sf(data = chicago, color = NA, fill = "blue", alpha = .25)
##加载包
图书馆(底格里斯)
图书馆(sf)
图书馆(GG2)
图书馆(dplyr)
#一个可以根据相交面积与原始面积之比过滤拉链的函数
#阈值的默认值设置为0.3
#如果比率大于或等于0.3,则该拉链将保持不变
交叉口面积%
st_集_几何体(空)
x2%
st_过滤器(y,.谓词=st_相交)%>%
变异(面积=数值(st_面积(%))%>%
选择(ZCTA5CE10,面积)%>%
左联合(z2,by=“ZCTA5CE10”)%>%
突变(面积比=面积比/面积)%>%
过滤器(面积比>=阈值)
返回(x2)
}

重叠我找到了另一个选项,在
st_filter

st_overlaps_with_threshold = function(x, y, threshold) {
  int = st_intersects(x, y)
  lapply(seq_along(int), function(ix)
    if (length(int[[ix]]))
        int[[ix]][which(as.numeric(suppressMessages(st_area(st_intersection(x[ix,], y[int[[ix]],])) / st_area(x[ix,]))) > threshold)]
    else
      integer(0)
  )
}

overlap <- st_filter(ZIPs, chicago, .predicate = st_overlaps_with_threshold, threshold = .05)
st_与_threshold=函数(x,y,threshold)重叠{
int=st_相交(x,y)
lapply(顺时针方向),函数(ix)
if(长度(int[[ix]]))
int[[ix]][其中(以数字形式显示)(st_区域(st_交叉点(x[ix,]),y[int[[ix]],])/st_区域(x[ix,]))>阈值]
其他的
整数(0)
)
}

重叠除了下面的注释之外,我发现这个线程也很有帮助(并展示了如何使用自定义谓词函数解决这个问题,如果这样更好的话)
st_overlaps_with_threshold = function(x, y, threshold) {
  int = st_intersects(x, y)
  lapply(seq_along(int), function(ix)
    if (length(int[[ix]]))
        int[[ix]][which(as.numeric(suppressMessages(st_area(st_intersection(x[ix,], y[int[[ix]],])) / st_area(x[ix,]))) > threshold)]
    else
      integer(0)
  )
}

overlap <- st_filter(ZIPs, chicago, .predicate = st_overlaps_with_threshold, threshold = .05)