R 计算每个网格框内的唯一点

R 计算每个网格框内的唯一点,r,filter,counting,expss,R,Filter,Counting,Expss,我有以下数据: 编辑:使用较小的样本数据 dat <- structure(list(SN = c(198305L, 198305L, 198305L, 198305L, 198305L,198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305

我有以下数据:

编辑:使用较小的样本数据

dat <- structure(list(SN = c(198305L, 198305L, 198305L, 198305L, 
198305L,198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 
198305L, 198305L, 198305L, 198306L, 198306L, 198306L, 198306L, 
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 
198308L, 198308L, 198308L, 198310L, 198310L, 198310L, 198310L, 
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 
198310L), CY = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L), Year = c(1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 
1983L, 1983L), Month = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
), Day = c(9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 
15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 14L, 14L, 
14L, 14L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L, 22L, 
22L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 
5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L), Hour = c(0L, 6L, 12L, 
18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 
6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 
18L, 0L, 6L, 12L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 
0L, 6L, 12L, 18L, 0L, 6L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 
0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 
12L, 18L), Lat = c(17.8, 18.1, 18.7, 18.9, 19.2, 19.5, 19.9, 
20.1, 20.6, 21.2, 21.6, 22, 22.5, 22.9, 23.4, 23.9, 24.6, 24.9, 
25.4, 26.1, 26.6, 27.2, 27.6, 28.1, 28.5, 29.1, 29.5, 30, 31.1, 
31.8, 32.7, 33.8, 34.6, 35.1, 35.6, 19.8, 19.9, 19.9, 20.2, 15.9, 
16.1, 16.3, 16.5, 16.9, 17.4, 18, 18.7, 19.3, 20, 23.8, 24.2, 
24.9, 25.4, 25.8, 25.5, 25.1, 25.3, 25.8, 26.2, 26.5, 27.1, 27.9, 
29.1, 10.3, 10.2, 9, 9.2, 9.2, 9.5, 10, 10.5, 10.9, 11.3, 12.3, 
13, 13.7, 14.4, 15, 15.9, 16.8, 17.2, 17.8, 18.3, 18.7, 19, 19.3, 
19.5, 19.7, 20), Lon = c(130.8, 130.7, 130.3, 130.4, 130.4, 130.4, 
130.5, 130.5, 130.7, 130.8, 130.7, 130.6, 130.7, 130.8, 131.2, 
131.5, 131.8, 132.2, 132.6, 133, 133.3, 133.5, 133.5, 133.5, 
133.6, 134.1, 134.3, 134.8, 135.1, 135.8, 136.5, 137, 137.3, 
138.1, 139.4, 121.5, 122.7, 124.4, 126.2, 133.8, 133.2, 132.8, 
132.4, 132.2, 132.5, 133, 133.7, 134.7, 135.6, 140.1, 141.6, 
142.6, 143.1, 143.5, 144, 144.3, 144.7, 144.7, 144.1, 143.4, 
142.8, 141.8, 141.3, 151.2, 149.2, 143.4, 141.8, 140.2, 138.9, 
137.5, 136, 134.4, 133, 131.7, 130.7, 129.6, 128.8, 128, 126.9, 
125.8, 125.1, 124.1, 123.2, 122.2, 121.2, 120.2, 119.2, 118.3, 
117.5), VMax = c(145L, 135L, 125L, 120L, 120L, 120L, 115L, 110L, 
110L, 115L, 120L, 120L, 120L, 120L, 125L, 120L, 115L, 110L, 105L, 
105L, 110L, 100L, 100L, 95L, 90L, 85L, 85L, 80L, 75L, 70L, 70L, 
70L, 60L, 55L, 45L, 35L, 40L, 45L, 40L, 35L, 40L, 45L, 50L, 55L, 
50L, 45L, 40L, 35L, 35L, 35L, 45L, 50L, 50L, 45L, 40L, 40L, 45L, 
50L, 50L, 50L, 45L, 40L, 35L, 35L, 35L, 35L, 40L, 45L, 50L, 55L, 
60L, 65L, 70L, 75L, 85L, 90L, 95L, 95L, 95L, 95L, 100L, 120L, 
125L, 120L, 100L, 90L, 85L, 85L, 80L), Cat = structure(c(5L, 
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
4L, 4L, 4L, 3L, 2L, 2L, 2L, 1L), .Label = c("Cat1", "Cat2", "Cat3", 
"Cat4", "Cat5", "TS"), class = "factor")), row.names = c(243L, 
244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L, 
255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L, 265L, 
266L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L, 276L, 
277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 285L, 286L, 287L, 
288L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L, 297L, 298L, 
299L, 300L, 301L, 302L, 303L, 304L, 305L, 927L, 928L, 929L, 930L, 
931L, 932L, 933L, 934L, 935L, 936L, 937L, 938L, 939L, 940L, 941L, 
942L, 943L, 944L, 945L, 946L, 947L, 948L, 949L, 950L, 951L, 952L
), class = "data.frame")

dat给定观测数据框
dat
和网格数据框
df2
。对于
dat
中的每个观测值,在
df2
中找到最近的网格中心,以获得其所属的网格

这是基于一个假设,即观测值属于其最近的栅格中心。此外,对于每个
SN
,仅将报告的第一个条目作为唯一观察值

下图显示了您的完整数据集:


是否
x.Lon
y.Lat
指的是网格单元的中心???@humand\u 008,是的..它们是每个网格的中心Lat和Lon.Ok。为了清楚起见,数据中的多个数据观测值具有相同的
SN
。但是差异
lat
lon
。所以,当你说
unique
时,你想要这些观测中的哪一个??数据是关于热带气旋路径的。每个热带气旋都有一个独特的SN编号。每当一个热带气旋穿过网格盒时,应只计算一次。两个lat-lon对可以属于同一热带气旋(即,相同的SN编号),并位于一个网格盒中。所以我只想在每个网格框中只计算一次具有唯一序号的lat-lon对。我问这个问题的原因是因为对于单个
SN
相关的lat,lon对非常分散,可能位于不同的网格单元中。如果我理解正确的话。对于每个
SN
首先出现的条目,它是第一个报告的旋风分离器lat、lon,需要考虑计数。是。是。你是正确的。第一个条目是成因位置(地层位置)。有时旋风分离器可能会返回到更靠近前一个位置的位置,这就是为什么计数应基于SN column.Cool。我更正了我的代码。检查这是否符合你的目的。我只使用``duplicated`函数为每个
SN
保留第一个观察值。你的问题很有趣。已经投了赞成票。
latmin=0
latmax=50
lonmin=60
lonmax=180


dlat=2.5
dlon=2.5

latint=dlat*0.5
lonint=dlon*0.5

## derive center lat and lon points

x.Lon<-seq((lonmin+lonint),(lonmax-lonint),lonint)
y.Lat<-seq((latmin+latint),(latmax-latint),latint)

df2<-as.data.frame(expand.grid(x.Lon=x.Lon,y.Lat=y.Lat))
df2$count<-"0"

library(data.table)
library(expss)


setDT(dat)

dummy<-matrix(ncol=1,nrow=nrow(df2))

for (i in 1:nrow(df2)){
df_bounds<-data.frame(north=(df2[i,]$y.Lat+latint),south=(df2[i,]$y.Lat-latint),west=(df2[i,]$x.Lon-lonint),east=(df2[i,]$x.Lon+lonint))

dat[,inBounds := Lat >= (df2[i,]$y.Lat-latint) & Lat <= (df2[i,]$y.Lat+latint) & Lon >= (df2[i,]$x.Lon-lonint) & Lon >= (df2[i,]$x.Lon+lonint)]

dat1<-dat[SN %in% dat[inBounds == TRUE, unique(SN)],passesThroughBox := T]

#dat2<-dat[is.na(passesThroughBox),passesThroughBox := F]
#dat3<-dat1[which(passesThroughBox == TRUE),]

dummy[i,]<-count_if("TRUE",dat1$passesThroughBox)
 }
df2<-as.data.frame(expand.grid(x.Lon=x.Lon,y.Lat=y.Lat))         # grid box centers
df2$count<- 0L                                           # keeping count as integer

# function to calculate distance between two points 
dist <- function(x, y) {diff <- (y - x) ; sqrt(sum(diff^2))}     

# Filtering only the unique observation
dat <- dat[!duplicated(dat$SN), ]

# Finding closest grid center for every observation in dat
closest_grid <- apply(dat[,c('Lon','Lat')],1, function(x){
                      dist_grid <- apply(df2[,c('x.Lon','y.Lat')], 1, function(y) dist(x,y))
                                              return(which.min(dist_grid))
                                                         })

# Summarizing no of of counts for each grid center with non zero counts
df2[names(table(closest_grid)),'count'] <- as.integer(table(closest_grid))
df2[names(table(closest_grid)),]                              # the non-zero counts

#      x.Lon y.Lat count
#738  151.25 10.00     1
#1199 133.75 16.25     1
#1292 131.25 17.50     1
#1474 121.25 20.00     1
library(ggplot2)                                               # fill = counts data 
ggplot(data = df2, aes(x = x.Lon, y = y.Lat)) + geom_tile(aes(fill = count))