R-在给定半径内查找最近邻点和邻数，坐标为lat long_R_Distance_Latitude Longitude

R-在给定半径内查找最近邻点和邻数，坐标为lat long

R-在给定半径内查找最近邻点和邻数，坐标为lat long,r,distance,latitude-longitude,R,Distance,Latitude Longitude,我试图弄清楚在我的数据集中某些点是多么孤立。我使用两种方法来确定隔离度，最近邻居的距离和给定半径内相邻站点的数量。我所有的坐标都是经纬度以下是我的数据： pond lat long area canopy avg.depth neighbor n.lat n.long n.distance n.area n.canopy n.depth n.avg.depth radius1500 A10

我试图弄清楚在我的数据集中某些点是多么孤立。我使用两种方法来确定隔离度，最近邻居的距离和给定半径内相邻站点的数量。我所有的坐标都是经纬度

以下是我的数据：

    pond            lat         long        area    canopy  avg.depth   neighbor    n.lat   n.long  n.distance  n.area  n.canopy    n.depth n.avg.depth radius1500
    A10             41.95928    -72.14605   1500    66      60.61538462                                 
    AA006           41.96431    -72.121     250     0       57.77777778                                 
    Blacksmith      41.95508    -72.123803  361     77      71.3125                                 
    Borrow.Pit.1    41.95601    -72.15419   0       0       41.44444444                                 
    Borrow.Pit.2    41.95571    -72.15413   0       0       37.7                                    
    Borrow.Pit.3    41.95546    -72.15375   0       0       29.22222222                                 
    Boulder         41.918223   -72.14978   1392    98      43.53333333

我想在“邻居”列中输入最近相邻池塘的名称，在n.lat和n.long中输入其lat和long，在n.distance中输入两个池塘之间的距离，在每个适当的列中输入面积、树冠和平均深度

第二，我想把目标池塘1500米范围内的池塘数量放入半径1500

有人知道可以帮助我计算所需距离/数字的函数或包吗？如果这是一个问题，输入我需要的其他数据并不困难，但最近邻居的名字和距离，加上1500米范围内的池塘数量才是我真正需要帮助的

谢谢。

最好的选择是使用库

sp

和

rgeos

，它们使您能够构建空间类并执行地理处理

library(sp)
library(rgeos)

读取数据并将其转换为空间对象：

mydata <- read.delim('d:/temp/testfile.txt', header=T)

sp.mydata <- mydata
coordinates(sp.mydata) <- ~long+lat

class(sp.mydata)
[1] "SpatialPointsDataFrame"
attr(,"package")
[1] "sp"

这将提供更好的结果，如果点分散在地球上，坐标以度为单位，@Zbynek提出的解决方案非常好，但是如果你想寻找两个邻居之间的距离，或者像我一样以km为单位，我会提出这个解决方案

   earth.dist<-function(lat1,long1,lat2,long2){

           rad <- pi/180
           a1 <- lat1 * rad
           a2 <- long1 * rad
           b1 <- lat2 * rad
           b2 <- long2 * rad
           dlat <- b1-a1
           dlon<- b2-a2
           a <- (sin(dlat/2))^2 +cos(a1)*cos(b1)*(sin(dlon/2))^2
           c <- 2*atan2(sqrt(a),sqrt(1-a))
           R <- 6378.145
           dist <- R *c
           return(dist)
           }


    Dist <- matrix(0,ncol=length(mydata),nrow=length(mydata.sp))

  for (i in 1:length(mydata)){
      for(j in 1:length(mydata.sp)){
          Dist[i,j] <- earth.dist(mydata$lat[i],mydata$long[i],mydata.sp$lat[j],mydata.sp$long[j])
 }}



     DDD <- matrix(0, ncol=5,nrow=ncol(Dist))   ### RECTIFY the nb of col by the number of variable you want

   for(i in 1:ncol(Dist)){
       sub<- sort(Dist[,i])[2]
       DDD[i,1] <- names(sub) 
       DDD[i,2] <- sub
       DDD[i,3] <- rownames(Dist)[i]
       sub_neig_atr <- Coord[Coord$ID==names(sub),]
       DDD[i,4] <- sub_neig_atr$area
       DDD[i,5] <- sub_neig_atr$canopy
       ### Your can add any variable you want here 

   }

    DDD <- as.data.frame(DDD)

    names(DDD)<-c("neigboor_ID","distance","pond","n.area","n.canopy")
   data <- merge(mydata,DDD, by="pond")

earth.dist我使用spacerisk包在下面添加了一个解决方案。这个包中的关键功能是用C++编写的，因此非常快。
首先，加载数据：
df <- data.frame(pond = c("A10", "AA006", "Blacksmith", "Borrow.Pit.1", 
                          "Borrow.Pit.2", "Borrow.Pit.3", "Boulder"), 
                 lat = c(41.95928, 41.96431, 41.95508, 41.95601, 
                         41.95571, 41.95546, 41.918223), 
                 long = c(-72.14605, -72.121, -72.123803, -72.15419, 
                          -72.15413, -72.15375, -72.14978), 
                 area = c(1500, 250, 361, 0, 0, 0, 1392), 
                 canopy = c(66, 0, 77, 0, 0, 0, 98), 
                 avg.depth = c(60.61538462, 57.77777778, 71.3125, 41.44444444,
                               37.7, 29.22222222, 43.53333333))

现在计算目标池塘1500m范围内的池塘数量。函数spatialrisk:：concentration
对从中心点开始的半径内的观察次数求和。从池塘数量中减去1，以排除池塘本身
df$npond <- 1  
radius1500 <- spatialrisk::concentration(df, df, npond, lon_sub = long, 
                                         lon_full = long, radius = 1500, 
                                         display_progress = FALSE)$concentration - 1

我在下面添加了一个使用较新的sf
软件包的替代解决方案，供感兴趣的人使用，现在就来此页面（就像我所做的那样）
首先，加载数据并创建sf
对象
# Using sf
mydata <- structure(
  list(pond = c("A10", "AA006", "Blacksmith", "Borrow.Pit.1", 
                "Borrow.Pit.2", "Borrow.Pit.3", "Boulder"), 
       lat = c(41.95928, 41.96431, 41.95508, 41.95601, 41.95571, 41.95546, 
               41.918223), 
       long = c(-72.14605, -72.121, -72.123803, -72.15419, -72.15413, 
                -72.15375, -72.14978), 
       area = c(1500L, 250L, 361L, 0L, 0L, 0L, 1392L), 
       canopy = c(66L, 0L, 77L, 0L, 0L, 0L, 98L), 
       avg.depth = c(60.61538462, 57.77777778, 71.3125, 41.44444444, 
                     37.7, 29.22222222, 43.53333333)), 
  class = "data.frame", row.names = c(NA, -7L))


library(sf)
data_sf <- st_as_sf(mydata, coords = c("long", "lat"),
                    # Change to your CRS
                    crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
st_is_longlat(data_sf)

要在计算距离后获得最近的邻居，可以使用sort（）
和partial=2
参数。根据数据量的不同，这可能比在上一个解决方案中使用order
要快得多。包Rfast
可能更快，但我避免在这里包含额外的包。有关各种解决方案的讨论和基准测试，请参阅此相关帖子：
在Rfast中，有一个名为“dista”的函数，只计算欧几里德距离或曼哈顿距离（目前）。它提供了计算k-最小距离的选项。或者，它可以返回距离最小的观测值的索引。余弦距离基本上与欧氏距离相同（我认为排除常数2）
 另一个答案，虽然速度可能较慢，但对dplyr上瘾者可能有直观的吸引力
您可以创建一个由每个可能的纬度/纬度组合组成的巨型网格，然后使用geosphere找到距离最小的网格
例如，您有两个具有不同点的数据集要比较，但是您可以通过复制第一个数据集来轻松地进行调整
    library(tidyverse)
    library(geosphere)
    library(data.table)

    #This function creates a big dataframe with every possible combination
    expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))

shortest_distance <- expand.grid.df(df1,df2) %>%
      mutate(distance = distHaversine(p1 = cbind(lon_2,lat_2),
                                      p2 = cbind(lon,lat))) %>% 
      group_by(ACCIDENT_NO) %>% 
      slice(which.min(distance))

库（tidyverse）
图书馆（地球圈）
库（数据表）
#此函数使用各种可能的组合创建一个大数据帧
展开.grid.df%
组别(意外数字)%>%
切片（哪个.min（距离））
非常感谢。你建议的图书馆正是我需要的！这是一些非常有用和可读的代码，谢谢！然而，我无法将其调整到我略有不同的用例：我需要在两个不同的数据集之间找到最近的点（我有一个tweet数据集，我需要每个tweet最近的城市）。我应该更改什么？使用以下函数：排序（x[x>0]，递减=F）[1]
@ike<代码>排序

将行/列中的值从最小到最大排序，反之亦然。但由于要忽略零距离，首先必须过滤数据-

x[x>0]

。然后对它们进行排序，最后只取排序数组（[1]）中的第一个值。明白了吗？@Nicocalier这取决于坐标系的单位（例如，对于WGS，它只是一个数字，不使用哈弗森距离），无需自行绘制，已经有了

geosphere

软件包-该软件包中的哪个函数可以计算以公里为单位的欧几里德距离？我认为

distm

并且您可以选择精确的公式-默认值为Haversine，但还有更多选项（请参阅手册）有没有可能举个例子？

df <- data.frame(pond = c("A10", "AA006", "Blacksmith", "Borrow.Pit.1", 
                          "Borrow.Pit.2", "Borrow.Pit.3", "Boulder"), 
                 lat = c(41.95928, 41.96431, 41.95508, 41.95601, 
                         41.95571, 41.95546, 41.918223), 
                 long = c(-72.14605, -72.121, -72.123803, -72.15419, 
                          -72.15413, -72.15375, -72.14978), 
                 area = c(1500, 250, 361, 0, 0, 0, 1392), 
                 canopy = c(66, 0, 77, 0, 0, 0, 98), 
                 avg.depth = c(60.61538462, 57.77777778, 71.3125, 41.44444444,
                               37.7, 29.22222222, 43.53333333))

ans1 <- purrr::map2_dfr(df$long, df$lat, 
                        ~spatialrisk::points_in_circle(df, .x, .y, 
                                                       lon = long, 
                                                       radius = 100000)[2,])

colnames(ans1) <- c("neighbor", "n.lat", "n.long", "n.area", 
                    "n.canopy", "n.avg.depth", "distance_m")

      neighbor    n.lat    n.long n.area n.canopy n.avg.depth distance_m
1 Borrow.Pit.1 41.95601 -72.15419      0        0    41.44444  765.87823
2   Blacksmith 41.95508 -72.12380    361       77    71.31250 1053.35200
3        AA006 41.96431 -72.12100    250        0    57.77778 1053.35200
4 Borrow.Pit.2 41.95571 -72.15413      0        0    37.70000   33.76321
5 Borrow.Pit.1 41.95601 -72.15419      0        0    41.44444   33.76321
6 Borrow.Pit.2 41.95571 -72.15413      0        0    37.70000   42.00128
7 Borrow.Pit.3 41.95546 -72.15375      0        0    29.22222 4158.21978

df$npond <- 1  
radius1500 <- spatialrisk::concentration(df, df, npond, lon_sub = long, 
                                         lon_full = long, radius = 1500, 
                                         display_progress = FALSE)$concentration - 1

cbind(df, ans1, radius1500)

          pond      lat      long area canopy avg.depth     neighbor    n.lat    n.long n.area n.canopy n.avg.depth distance_m radius1500
1          A10 41.95928 -72.14605 1500     66  60.61538 Borrow.Pit.1 41.95601 -72.15419      0        0    41.44444  765.87823          3
2        AA006 41.96431 -72.12100  250      0  57.77778   Blacksmith 41.95508 -72.12380    361       77    71.31250 1053.35200          1
3   Blacksmith 41.95508 -72.12380  361     77  71.31250        AA006 41.96431 -72.12100    250        0    57.77778 1053.35200          1
4 Borrow.Pit.1 41.95601 -72.15419    0      0  41.44444 Borrow.Pit.2 41.95571 -72.15413      0        0    37.70000   33.76321          3
5 Borrow.Pit.2 41.95571 -72.15413    0      0  37.70000 Borrow.Pit.1 41.95601 -72.15419      0        0    41.44444   33.76321          3
6 Borrow.Pit.3 41.95546 -72.15375    0      0  29.22222 Borrow.Pit.2 41.95571 -72.15413      0        0    37.70000   42.00128          3
7      Boulder 41.91822 -72.14978 1392     98  43.53333 Borrow.Pit.3 41.95546 -72.15375      0        0    29.22222 4158.21978          0

# Using sf
mydata <- structure(
  list(pond = c("A10", "AA006", "Blacksmith", "Borrow.Pit.1", 
                "Borrow.Pit.2", "Borrow.Pit.3", "Boulder"), 
       lat = c(41.95928, 41.96431, 41.95508, 41.95601, 41.95571, 41.95546, 
               41.918223), 
       long = c(-72.14605, -72.121, -72.123803, -72.15419, -72.15413, 
                -72.15375, -72.14978), 
       area = c(1500L, 250L, 361L, 0L, 0L, 0L, 1392L), 
       canopy = c(66L, 0L, 77L, 0L, 0L, 0L, 98L), 
       avg.depth = c(60.61538462, 57.77777778, 71.3125, 41.44444444, 
                     37.7, 29.22222222, 43.53333333)), 
  class = "data.frame", row.names = c(NA, -7L))


library(sf)
data_sf <- st_as_sf(mydata, coords = c("long", "lat"),
                    # Change to your CRS
                    crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
st_is_longlat(data_sf)

dist.mat <- st_distance(data_sf) # Great Circle distance since in lat/lon
# Number within 1.5km: Subtract 1 to exclude the point itself
num.1500 <- apply(dist.mat, 1, function(x) {
  sum(x < 1500) - 1
})

# Calculate nearest distance
nn.dist <- apply(dist.mat, 1, function(x) {
  return(sort(x, partial = 2)[2])
})
# Get index for nearest distance
nn.index <- apply(dist.mat, 1, function(x) { order(x, decreasing=F)[2] })

n.data <- mydata
colnames(n.data)[1] <- "neighbor"
colnames(n.data)[2:ncol(n.data)] <- 
  paste0("n.", colnames(n.data)[2:ncol(n.data)])
mydata2 <- data.frame(mydata,
                      n.data[nn.index, ],
                      n.distance = nn.dist,
                      radius1500 = num.1500)
rownames(mydata2) <- seq(nrow(mydata2))

mydata2
          pond      lat      long area canopy avg.depth     neighbor    n.lat    n.long n.area n.canopy
1          A10 41.95928 -72.14605 1500     66  60.61538 Borrow.Pit.1 41.95601 -72.15419      0        0
2        AA006 41.96431 -72.12100  250      0  57.77778   Blacksmith 41.95508 -72.12380    361       77
3   Blacksmith 41.95508 -72.12380  361     77  71.31250        AA006 41.96431 -72.12100    250        0
4 Borrow.Pit.1 41.95601 -72.15419    0      0  41.44444 Borrow.Pit.2 41.95571 -72.15413      0        0
5 Borrow.Pit.2 41.95571 -72.15413    0      0  37.70000 Borrow.Pit.1 41.95601 -72.15419      0        0
6 Borrow.Pit.3 41.95546 -72.15375    0      0  29.22222 Borrow.Pit.2 41.95571 -72.15413      0        0
7      Boulder 41.91822 -72.14978 1392     98  43.53333 Borrow.Pit.3 41.95546 -72.15375      0        0
  n.avg.depth n.distance radius1500
1    41.44444  766.38426          3
2    71.31250 1051.20527          1
3    57.77778 1051.20527          1
4    37.70000   33.69099          3
5    41.44444   33.69099          3
6    37.70000   41.99576          3
7    29.22222 4149.07406          0

    library(tidyverse)
    library(geosphere)
    library(data.table)

    #This function creates a big dataframe with every possible combination
    expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))

shortest_distance <- expand.grid.df(df1,df2) %>%
      mutate(distance = distHaversine(p1 = cbind(lon_2,lat_2),
                                      p2 = cbind(lon,lat))) %>% 
      group_by(ACCIDENT_NO) %>% 
      slice(which.min(distance))