Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/80.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何用data.table替换plyr::ddply_R_Data.table_Geospatial_Plyr_Sf - Fatal编程技术网

如何用data.table替换plyr::ddply

如何用data.table替换plyr::ddply,r,data.table,geospatial,plyr,sf,R,Data.table,Geospatial,Plyr,Sf,我有一个聚合操作,它计算R中多边形中的点,当前使用plyr::ddply()作为主函数,其中我需要按两个变量分组:dayofweek和hour。它非常慢,所以我想用一个更快的函数替换它,比如data.table包中的一些函数 雷普雷克斯 创建数据帧 该操作的主要目标是获取点的数据帧df,并使用sf包中的st_intersects()方法来计算有多少点与grid.sf中的多边形相交 创建DF对象 library(sf) library(tidyverse) library(plyr) df &l

我有一个聚合操作,它计算R中多边形中的点,当前使用
plyr::ddply()
作为主函数,其中我需要按两个变量分组:
dayofweek
hour
。它非常慢,所以我想用一个更快的函数替换它,比如
data.table
包中的一些函数

雷普雷克斯 创建数据帧 该操作的主要目标是获取点的数据帧
df
,并使用
sf
包中的
st_intersects()
方法来计算有多少点与
grid.sf
中的多边形相交

创建DF对象

library(sf)
library(tidyverse)
library(plyr)

df <- data.frame(X = seq(1,100,1), 
                 dayofweek = rep(c("Sun", "Mon", "Tues", "Wed", "Thur"), 20),
                 hour = sample(seq(0, 23, 1),  100, replace = T),
                 lat = sample(seq(37.1234, 37.2345, 0.001),  100, replace = T),
                 lon = sample(seq(-122.5432, -122.4111, 0.001),  100, replace = T)
)


projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

df <- st_as_sf(x = df,                         
               coords = c("lon", "lat"),
               crs = projcrs)
因此,如果有一种方法可以将这个
ddply
转换为
data.table
操作,我会被逗乐,因为
data.table
要快得多


谢谢

创建grid.sf对象中的
g
是什么?您可以执行类似于
grid.sf[df,on=c(“Var2”=“dayofweek”,“Var3”=“hour”),count:=长度(st_相交(Var1,df))]
的操作,如果您必须多次运行它,最好在grid.sf中添加一个迭代id,这样您就可以在1中翻阅整个数据集go@chinsoon12“g”只是我在下一行中转换为简单功能的网格对象。抱歉,我错过了右侧的滚动条。假设一周中的某一天的标签是一致的,这应该让您开始:
library(data.table);setDT(grid.sf);setDT(df);grid.sf[,Var2:=as.character(Var2)];grid.sf[,Var3:=as.numeric(Var3)];grid.sf[df,on=c(“Var2”=“dayofweek”,“Var3”=“hour”),count:=长度(st_相交(Var1,i.geometry))
@chinsoon12我在运行上述代码段时出错错误消息是什么
# Function to create the grid we need
buildBaseGrid <- function(x) {
  
  # create a 500m tesseract over these points
  g <- st_make_grid(x, cellsize = 0.005)
  
  # plot to make sure
  # ggplot() +
  #   geom_sf(data = df.sf, size = 3) +
  #   geom_sf(data = g, alpha = 0)
  # 
  # ggplot() +
  #   geom_sf(data = g, alpha = 0)
  grid.sf <- st_sf(g) 
  # Need to expand by day of week
  days <- c('SU', 'MO', 'TU', 'WE', 'TH', 'FR', 'SA')
  hours <- c('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
             '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23')
  grid.sf <- expand.grid(g, days, hours)
  grid.sf$id <- 1:nrow(grid.sf)
  
  #### Clean up home grid
  
  # Calc centroid
  grid.sf <- grid.sf %>%
    dplyr::mutate(center = sf::st_centroid(grid.sf$Var1))
  
  # Parse out lat and lon
  grid.sf <- grid.sf %>%
    dplyr::mutate(lonn = sf::st_coordinates(grid.sf$center)[,1]) %>%
    dplyr::mutate(latt = sf::st_coordinates(grid.sf$center)[,2])
  
  # Create primary key field
  grid.sf <- grid.sf %>%
    dplyr::mutate(pkey = paste0(lonn,";",latt,";",Var2,";",Var3))
  
  
  grid.sf <- st_as_sf(grid.sf) 
  
  return(grid.sf)
  
  
}

# Now build the grid.sf object
grid.sf <- buildBaseGrid(df)

# Create function to use in operation
myf <- function(x) {
  x <- as.data.frame(x)
  df <- df %>% dplyr::filter(dayofweek %in% x$Var2)
  df <- df %>% dplyr::filter(hour %in% x$Var3)
  x$count <- sf::st_intersects(x$Var1, df) %>% lengths()
  x %>%
    data.frame(x)
}

# Do the operation
output <-  plyr::ddply(grid.sf, .(Var2, Var3), .fun = myf, .parallel = F) %>% as.data.frame()

test4 <- grid.sf[, .(test = myf()), by = key(grid.sf)]