Sql 使用R根据日期和几何图形相交两个sf data.frames

Sql 使用R根据日期和几何图形相交两个sf data.frames,sql,r,dataframe,spatial,sf,Sql,R,Dataframe,Spatial,Sf,因此,我有两个R sf data.frames,一个包含数百万个线串几何体vsr_段:见下文,另一个包含5个多边形vsr_区域:见下文。每个线条字符串都有一个日期时间,每个多边形都有一个唯一的日期范围 我试图根据linestring日期时间是否在特定多边形的日期范围内,将linestring数据框与polygon data.frame相交。 基本上,如果此linestring datetime在任何多边形的日期范围内,请执行与相应多边形的相交,并返回相交的linestring的sf data.f

因此,我有两个R sf data.frames,一个包含数百万个线串几何体vsr_段:见下文,另一个包含5个多边形vsr_区域:见下文。每个线条字符串都有一个日期时间,每个多边形都有一个唯一的日期范围

我试图根据linestring日期时间是否在特定多边形的日期范围内,将linestring数据框与polygon data.frame相交。 基本上,如果此linestring datetime在任何多边形的日期范围内,请执行与相应多边形的相交,并返回相交的linestring的sf data.frame

我有一个sql查询,基本上就是这样做的,但这只适用于我的postgres数据库

资料来源:

我很好奇是否有更好的方法。这很简单,但当新数据进来时,我必须删除表,创建一个新表,然后创建新索引

我希望有一种方法,只在日期范围内运行这个datetime,与新数据sf data.frame进行空间交集,使用约5000行字符串,并将结果data.frame附加到现有的数据库表中。 有没有办法做到这一点?我已经尝试过sqldf使用我的r data.frames执行下面的查询,而不是在我的数据库上执行查询。如蒙协助,将不胜感激

query = ("CREATE TABLE vsr_segments AS
            SELECT
            s.name, s.mmsi, s.speed,
            s.seg_mins, s.seg_km,
            s.seg_kmhr, s.seg_knots, s.speed_diff,
            s.year, s.beg_dt, s.end_dt,
            s.beg_lon, s.beg_lat,
            s.end_lon, s.end_lat, z.gid,
            CASE
            WHEN
            ST_CoveredBy(s.geometry, z.geom)
            THEN s.geometry
            ELSE
            ST_Multi(
            ST_Intersection(s.geometry, z.geom)
            ) END AS geometry
            FROM ais_segments AS s
            INNER JOIN vsr_zones AS z
            ON ST_Intersects(s.geometry, z.geom)
            WHERE
            s.datetime::date <= z.date_end AND
            s.datetime >= z.date_beg;")

    dbExecute(con, query)

样本数据

vsr_segments <- structure(list(
datetime = structure(c(1573348510.52, 1573348830.935, 
1573349296.305, 1573349746.216, 1573349840.846, 1573350013.303, 
1573350371.104, 1573350793.237, 1573350929.837, 1573351206.262, 
1573351530.493, 1573351598.156, 1573351686.598, 1573353232.418, 
1573353368.013, 1573353476.023, 1573354582.045, 1573355374.706, 
1573355522.445, 1573355611.793), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), 
name = c("ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T"), 
geometry = structure(list(structure(c(-119.498252, 
-119.49837, 34.375007, 34.37505), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.49837, -119.498255, 34.37505, 
34.374992), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg"
)), structure(c(-119.498255, -119.498193, 34.374992, 34.374958
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498193, 
-119.498303, 34.374958, 34.375055), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498303, -119.498337, 
34.375055, 34.375078), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498337, -119.49841, 34.375078, 34.375062
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.49841, 
-119.498435, 34.375062, 34.375055), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498435, -119.498368, 
34.375055, 34.375092), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498368, -119.498357, 34.375092, 34.375058
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498357, 
-119.498292, 34.375058, 34.375048), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498325, -119.498342, 
34.375035, 34.375053), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498342, -119.498427, 34.375053, 34.375072
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498427, 
-119.49849, 34.375072, 34.375062), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498505, -119.498617, 
34.375062, 34.375048), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498617, -119.498602, 34.375037, 34.375027
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498602, 
-119.498607, 34.375027, 34.375028), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498607, -119.498267, 
34.375028, 34.374993), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498267, -119.4989, 34.374993, 34.374715
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.4989, 
-119.498898, 34.374715, 34.374748), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498898, -119.4989, 34.374748, 
34.374723), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg"
))), class = c("sfc_LINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = -119.4989, 
ymin = 34.374715, xmax = -119.498193, ymax = 34.375092), class = "bbox"), crs = structure(list(
    epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
20L), class = c("sf", "data.table", "data.frame"), sf_column = "geometry", agr = structure(c(datetime = NA_integer_, 
name = NA_integer_), .Label = c("constant", "aggregate", "identity"
), class = "factor"))


vsr_zones <- structure(list(
gid = c(5L, 1L, 2L, 3L, 4L), 
vsr_category = c("2019", 
"2017v1", "2017v2", "2017v3", "2018"), 
date_beg = structure(c(18031, 
17318, 17348, 17508, 17686), class = "Date"), 
date_end = structure(c(18215, 
17347, 17507, 17590, 17896), class = "Date"), 
date_range = structure(c("(\"2019-05-15 00:00:00+00\",\"2019-11-15 23:59:59.99+00\")", 
"(\"2017-07-01 00:00:00+00\",\"2017-12-07 23:59:59.99+00\")", 
"(\"2017-07-01 00:00:00+00\",\"2017-12-07 23:59:59.99+00\")", 
"(\"2017-12-08 00:00:00+00\",\"2018-02-28 23:59:59.99+00\")", 
"(\"2018-06-04 00:00:00+00\",\"2018-12-31 23:59:59.99+00\")"), class = "pq_tstzrange"), 
    rec_speed_knots = c(10L, 10L, 10L, 10L, 10L), 
geom = structure(list(
        structure(list(list(structure(c(-120.632908463866, -120.604354931796, 
        -120.58151210614, -120.55676571168, -120.530115748414, 
        -120.509176491563, -120.492044372321, -120.476815821884, 
        -120.465394409056, -120.429226601767, -120.383540950455, 
        -120.343566005557, -120.294073216636, -120.252194702933, 
        -120.223641170863, -120.179859088356, -120.126559161826, 
        -120.084680648123, -120.058030684857, -120.037091428006, 
        -120.021862877569, -120.006634327132, -119.95714153821, 
        -119.922877299726, -119.877191648414, -119.844830978735, 
        -119.825795290689, -119.791531052205, -119.709677593604, 
        -119.686834767948, -119.639245547831, -119.58975275891, 
        -119.56119922684, -119.53645283238, -119.483152905849, 
        -119.458406511388, -119.439370823342, -119.38987803442, 
        -119.372745915178, -119.30802457582, -119.290892456578, 
        -119.266146062117, -119.22236397961, -119.207135429173, 
        -119.121474832963, -119.091017732088, -119.037717805557, 
        -119.005357135878, -118.933021521301, -118.847360925091, 
        -118.799771704974, -118.784543154537, -118.735050365616, 
        -118.681750439085, -118.664618319843, -118.613221962117, 
        -118.544693485149, -118.491393558619, -118.451418613721, 
        -118.390504411972, -118.388600843167, -118.400022255995, 
        -118.424768650456, -118.405732962409, -118.36956515512, 
        -118.280097421301, -118.247736751622, -118.186822549873, 
        -118.186822549873, -118.165883293021, -118.110679797686, 
        -118.064994146374, -118.019308495062, -117.964104999727, 
        -117.906997935587, -117.85940871547, -117.808012357744, 
        -117.781362394479, -117.710930348707, -117.680473247832, 
        -117.638594734129, -117.587198376403, -117.571969825966, 
        -117.505344917803, -117.480260689924, -121.029936561572, 
        -121.029936561572, -120.646233445499, -120.632908463866, 
        34.5567050238998, 34.5528978862905, 34.5548014550951, 
        34.5395729046579, 34.5319586294392, 34.5205372166113, 
        34.4900801157366, 34.4786587029085, 34.44249089562, 34.448201602034, 
        34.4596230148619, 34.4577194460573, 34.4691408588853, 
        34.4615265836666, 34.4748515652994, 34.4672372900806, 
        34.4710444276899, 34.4577194460573, 34.4596230148619, 
        34.4596230148619, 34.4577194460573, 34.4596230148619, 
        34.4329730515967, 34.431069482792, 34.4082266571361, 
        34.4006123819174, 34.4120337947453, 34.4158409323548, 
        34.391094537894, 34.4082266571361, 34.41393736355, 34.4158409323548, 
        34.4101302259407, 34.3968052443081, 34.3777695562615, 
        34.3720588498474, 34.35683029941, 34.3168553545121, 34.3187589233168, 
        34.273073272005, 34.273073272005, 34.244519739935, 34.1436305932877, 
        34.1455341620923, 34.0979449419757, 34.0960413731712, 
        34.078909253929, 34.0655842722966, 34.0408378778358, 
        34.0313200338124, 33.9951522265239, 34.01799505218, 34.0313200338124, 
        34.0294164650079, 34.0370307402266, 34.0332236026171, 
        34.0370307402266, 34.0046700705472, 33.9589844192354, 
        33.8352524469322, 33.8143131900808, 33.8047953460574, 
        33.7781453827922, 33.7343633002849, 33.7362668690895, 
        33.7039061994104, 33.7457847131129, 33.7381704378942, 
        33.7591096947457, 33.7610132635502, 33.7438811443082, 
        33.711520474629, 33.6677383921216, 33.6258598784191, 
        33.6011134839584, 33.5839813647164, 33.5478135574277, 
        33.5382957134045, 33.4583458236086, 33.4583458236086, 
        33.4355029979527, 33.3783959338127, 33.3783959338127, 
        33.3365174201101, 33.299879212642, 33.299879212642, 34.5736940817683, 
        34.5738371431418, 34.5567050238998), .Dim = c(89L, 2L
        )))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
            list(structure(c(-120.873273035994, -120.873274461718, 
            -120.487219702881, -120.506822270857, -120.873273035994, 
            34.3781494612153, 34.4709485281576, 34.3814708197392, 
            34.2930682831847, 34.3781494612153), .Dim = c(5L, 
            2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), 
        structure(list(list(structure(c(-120.876185794554, -120.873274695607, 
        -119.449753967941, -119.469363746559, -120.876185794554, 
        34.3960669578761, 34.4861721639365, 34.1563446852463, 
        34.0713786726887, 34.3960669578761), .Dim = c(5L, 2L)))), class = c("XY", 
        "MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-120.050247166472, 
        -120.027227238885, -119.26260029791, -119.285620225496, 
        -120.050247166472, 34.1816435818833, 34.2854817798431, 
        34.1159713573041, 34.0121331593446, 34.1816435818833), .Dim = c(5L, 
        2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
            list(structure(c(-118.986389683899, -118.994289494438, 
            -119.276683926611, -119.874404531275, -120.177071971217, 
            -120.709119452121, -120.873778153724, -120.873274108521, 
            -120.672951644832, -120.507341158826, -120.343634241625, 
            -120.195155874861, -119.90105449454, -119.744010068156, 
            -119.586013857368, -119.427065862179, -119.249082178943, 
            -118.974016486669, -118.986389683899, 33.9223965975751, 
            33.9007910916422, 34.0337553726479, 34.1698605421817, 
            34.2383890191496, 34.3592656382457, 34.397337014339, 
            34.4479592752089, 34.403047720753, 34.3649763446597, 
            34.3278567529687, 34.2935925144848, 34.2279193907238, 
            34.1917515834352, 34.1555837761465, 34.1203677532603, 
            34.0794410239599, 33.9471429920358, 33.9223965975751
            ), .Dim = c(19L, 2L)))), class = c("XY", "MULTIPOLYGON", 
        "sfg"))), n_empty = 0L, class = c("sfc_MULTIPOLYGON", 
    "sfc"), precision = 0, bbox = structure(c(xmin = -121.029936561572, 
    ymin = 33.299879212642, xmax = -117.480260689924, ymax = 34.5738371431418
    ), class = "bbox"), crs = structure(list(epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), class = "crs"))), row.names = c(NA, 
-5L), class = c("sf", "data.frame"), sf_column = "geom", agr = structure(c(gid = NA_integer_, 
vsr_category = NA_integer_, date_beg = NA_integer_, date_end = NA_integer_, 
date_range = NA_integer_, rec_speed_knots = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")))


两步方法可以是进行范围连接或非等连接,以查找哪些数据行按日期范围重叠,然后进行成对几何体相交,以确定它们是否在空间上相交

我喜欢使用librarydata.table进行所有连接操作,因为它具有优越的内存管理和速度

图书馆 图书馆数据表 setDT vsr_段 setDT vsr_区域 在段上添加行索引 我假设gid是vsr_区域上的唯一id vsr_段[,行id:=.I] 制作一个POSIXct列,这样我们就可以对相同的数据进行范围联接 vsr_区[ , `:=` posix_beg=as.POSIXct date_beg设置适当的时区 ,posix_end=as.POSIXct date_end ] 使用范围联接或非等联接可以及时提供重叠的几何图形
请原谅这些糟糕的示例数据-如果您需要特定的帮助,您必须提供高质量的示例数据,其他人可以将这些数据复制粘贴到他们的R会话中并直接使用。否则,您会要求人们为您创建一个示例数据集,而大多数人都不愿意这样做。但是,当新数据出现时,我必须删除该表,创建一个新表,并创建新索引。-您可以将新数据插入临时表,进行查询,然后将结果附加到主表中?@symbolxau对此表示抱歉。我添加了简单的复制/粘贴dput样本数据,所以如果有人想解决这个问题,这将是非常可行的。我只保留了vsr_段df的datetime、name和geometry,这样,如果有一个很酷的sqldf解决方案,SQL就不会有那么多的select属性。@SYMBOLXAU很好!我只是做了临时表/追加方法,但我真的很想看到一个R解决方案。特别是因为这一步是20个步骤中的5个,所以我想要一种在R中执行日期范围空间相交的方法,然后在R中继续分析,而不必查询数据库并将相交数据拉回到R中。。如果说得通的话,谢谢你!我运行了你的解决方案,它似乎有效。我需要再次检查其他样本数据,看看它是否真的工作正常。。。我认为您最初的想法是只将新数据写入临时表->在临时表上执行sql intersect->附加到主表,这一想法非常好。即使使用data.tables速度,处理更大的sf data.frame 100000行字符串仍然需要很长时间。而我在问题中写的sql intersect需要270秒才能完成1700万行字符串的相交,这取决于它是否在某个日期范围内。是的,听起来临时表是正确的选择!
library(DBI)
library(RSQLite)

#rename
ais_segments_temp <- vsr_segments
#my db connection...
con

update_vsr_segments <- function(con = NULL){

# SQL Date and Geometry intersect query
  # Creates "vsr_segments_temp" table which will be inserted into "vsr_segments" table.
  query = ("CREATE TABLE vsr_segments_temp AS
            SELECT
            s.name, s.mmsi, s.speed,
            s.seg_mins, s.seg_km,
            s.seg_kmhr, s.seg_knots, s.speed_diff,
            s.beg_dt, s.end_dt,
            s.beg_lon, s.beg_lat,
            s.end_lon, s.end_lat, z.gid,
            CASE
            WHEN
            ST_CoveredBy(s.geometry, z.geom)
            THEN s.geometry
            ELSE
            ST_Multi(
            ST_Intersection(s.geometry, z.geom)
            ) END AS geometry
            FROM ais_segments_temp AS s
            INNER JOIN vsr_zones AS z
            ON ST_Intersects(s.geometry, z.geom)
            WHERE
            s.datetime::date <= z.date_end AND
            s.datetime >= z.date_beg;")

  # get list of tables in database
  database_tables_list = db_list_tables(con)
  # made a 'not within' function
  if ('vsr_segments_temp' %!in% database_tables_list){

    dbExecute(con, query)

  }

  else if ('vsr_segments_temp' %in% database_tables_list){

    dbRemoveTable(con, "vsr_segments_temp")

    dbExecute(con, query)
  }

else {

  print(NULL)

  }
}

update_vsr_segments(con = con)

dbExecute(con, "INSERT INTO vsr_segments
SELECT * FROM vsr_segments_temp;")

dbDisconnect(con)