R-计算每天最接近X的时间

R-计算每天最接近X的时间,r,time,range,calculation,R,Time,Range,Calculation,我的数据集如下所示: structure(list(value = c(0.22, 0.68, 0.55, 0.68, 0.48, 0.94), date = structure(c(1583964300, 1583979060, 1583986500, 1583992140, 1584000540, 1584003300), tzone = "UTC", class = c("POSIXct", "POSIXt"))), row.names = c(NA, -6L

我的数据集如下所示:

structure(list(value = c(0.22, 0.68, 0.55, 0.68, 0.48, 0.94), 
    date = structure(c(1583964300, 1583979060, 1583986500, 1583992140, 
    1584000540, 1584003300), tzone = "UTC", class = c("POSIXct", 
    "POSIXt"))), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
其中一列包含值,第二列包含日期和时间。对于每一天,我想找到最接近某个时间点的值,比如06:00。但是,如果在4小时的范围内没有值(因此在04:00和08:00之间),我只希望脚本不返回任何内容,或者返回一个NaN

为此,我查看了StAdvSoad,但从我可以看出,它们都是基于多个DFS,不考虑某个范围。 如果您对整个df感兴趣:

structure(list(value = c(0.22, 0.68, 0.55, 0.68, 0.48, 0.94, 
0.09, 0.87, 0.74, 0.42, 1.65, 1.78, 1.78, 2.89, 2.5, 12.39, 2.37, 
1.52, 0.29, 2.82, 2.17, 3.34, 3.67, 3.41, 0.16, 0.61, 1.13, 5.43, 
2.69, 4.32, 3.74, 6.14, 4.26, 3.48, 2.95, 1.78, 2.37, 1.46, 2.63, 
4, 2.69, 2.63, 2.5, 2.17, 2.04, 2.3, 3.41, 2.89, 2.76, 2.76, 
3.15, 3.8, 3.54, 3.74, 4.06, 3.87, 2.56, 1, 0.94, 2.76, 2.76, 
2.63, 3.02, 3.41, 2.56, 2.89, 3.74, 1.39, 0.55, 0.61, 0.29, 0.09, 
0.16, 1.65, 2.56, 3.54, 3.34, 3.74, 3.87, 4.45, 4, 4.06, 4.06, 
4.65, 5.36, 4.45, 4.84, 4.13, 4.26, 4, 3.93, 4.06, 4.19, 4.19, 
4.06, 3.74, 4.06, 3.67, 4.13, 4.19, 4.71, 4.71, 5.56, 2.76, 5.88, 
6.14, 4.91, 5.04, 5.95, 6.08, 5.23, 5.95, 5.43, 5.75, 6.47, 5.62, 
5.82, 5.82, 5.49, 5.88, 5.56, 4.84, 5.49, 5.43, 5.04, 4.58, 4.97, 
5.3, 5.56, 5.88, 5.62, 4.84, 4.84, 2.37, 3.87, 5.69, 5.04, 4.65, 
4.97, 4.84, 5.3, 4.65, 3.54, 3.93, 4.65, 4.97, 4, 4.52, 4.71, 
4.26, 4.84, 5.75, 4.97, 4.65, 4.52, 5.1, 4.97, 4.91, 4.71, 5.3, 
4.78, 4.52, 4.19, 3.34, 3.15, 3.15, 3.15, 2.89, 3.15, 2.95, 3.28, 
4.39, 5.1, 5.43, 2.17, 2.76, 0.74, 0.74, 0.48, 3.61, 3.48, 4.19, 
3.48, 3.21, 3.34, 3.34, 3.21, 3.41, 4, 3.93, 4.06, 3.48, 3.87, 
2.95, 3.61, 3.54, 3.41, 3.8, 3.48, 4.26, 4.58, 5.04, 4.52, 5.1, 
3.74, 4.45, 5.23, 5.04, 4.91, 4.97, 4.84, 5.23, 5.04, 4.58, 6.34, 
5.56, 3.93, 4.71, 4.58, 5.23, 1.85, 1.98, 3.87, 6.34, 4.84, 4.58, 
5.36, 4.58, 3.87, 3.93, 1.46, 3.21, 3.74, 3.67, 3.41, 4.39, 3.41, 
1.85, 5, 4, 3.67, 3.87, 3.61, 4.26, 4.45, 4.45, 5.49, 6.14, 0.42, 
1.2, -0.3, 1, 1.26, 0.55, 3.34, 4.19, 4.13, 4.39, 4.45, 3.67, 
3.61, 4, 4.97, 6.34, 5.69, 6.08, 5.3, 6.8, 6.34, 5.36, 2.76, 
3.34, 2.69, 3.67, 3.02, 3.41, 2.43, 2.82, 2.82, 3.02, 2.56, 2.76, 
2.76, 2.63, 2.37), date = structure(c(1583964300, 1583979060, 
1583986500, 1583992140, 1584000540, 1584003300, 1584006000, 1584086400, 
1584128880, 1584142680, 1584201600, 1584239460, 1584244800, 1584252840, 
1584260580, 1584266760, 1584280800, 1584310260, 1584333900, 1584367200, 
1584377400, 1584378000, 1584381480, 1584381600, 1584388800, 1584411540, 
1584417480, 1584426660, 1584550020, 1584568560, 1584576180, 1584590700, 
1584595380, 1584604740, 1584612000, 1584619080, 1584619200, 1584631320, 
1584633600, 1584639720, 1584648000, 1584654900, 1584662640, 1584670500, 
1584677400, 1584682920, 1584691200, 1584698400, 1584705600, 1584716400, 
1584721680, 1584730800, 1584735420, 1584741900, 1584748800, 1584755520, 
1584768180, 1584808560, 1584813600, 1584819300, 1584820380, 1584828600, 
1584836160, 1584842100, 1584850200, 1584854700, 1584858780, 1584878400, 
1584890760, 1584900000, 1584922320, 1584928620, 1584936420, 1584957600, 
1584964140, 1584972000, 1584978480, 1584980220, 1584986400, 1584989220, 
1584991740, 1584993600, 1585000260, 1585007340, 1585015200, 1585022220, 
1585030140, 1585037460, 1585038600, 1585040400, 1585041000, 1585044480, 
1585053360, 1585059360, 1585067400, 1585074300, 1585079580, 1585085160, 
1585094040, 1585101600, 1585108740, 1585116000, 1585122540, 1585127520, 
1585130880, 1585137360, 1585143840, 1585144800, 1585151760, 1585159200, 
1585166400, 1585173600, 1585180260, 1585188000, 1585195200, 1585202400, 
1585208400, 1585217160, 1585224180, 1585230540, 1585238400, 1585245600, 
1585251300, 1585251360, 1585252800, 1585267200, 1585274400, 1585281000, 
1585292100, 1585310280, 1585326540, 1585333740, 1585337760, 1585338120, 
1585338480, 1585340700, 1585342800, 1585355100, 1585355640, 1585355940, 
1585369920, 1585382400, 1585404000, 1585426140, 1585440000, 1585454400, 
1585468080, 1585482360, 1585497600, 1585512000, 1585525740, 1585536000, 
1585540200, 1585540260, 1585547100, 1585556280, 1585567440, 1585569420, 
1585574880, 1585584960, 1585597020, 1585612020, 1585620720, 1585634520, 
1585641360, 1585648800, 1585659600, 1585663200, 1585670400, 1585684800, 
1585699200, 1585702560, 1585706400, 1585708740, 1585715220, 1585719840, 
1585724760, 1585728000, 1585742400, 1585828800, 1585839600, 1585844220, 
1585845960, 1585848660, 1585855980, 1585867860, 1585872000, 1585885500, 
1585901160, 1585914780, 1585929600, 1585930860, 1585933200, 1585943100, 
1585950180, 1585958400, 1585972380, 1585987440, 1585995480, 1586001600, 
1586017500, 1586044200, 1586059200, 1586074980, 1586086380, 1586108820, 
1586111700, 1586111760, 1586131800, 1586147160, 1586160000, 1586172960, 
1586191380, 1586203920, 1586210340, 1586218740, 1586219520, 1586231220, 
1586241060, 1586247240, 1586262060, 1586262480, 1586275020, 1586289060, 
1586302920, 1586318520, 1586332800, 1586346360, 1586363280, 1586376000, 
1586383020, 1586390400, 1586397600, 1586404800, 1586412000, 1586419200, 
1586427180, 1586432340, 1586437200, 1586461740, 1586468520, 1586476380, 
1586505600, 1586517600, 1586520660, 1586534880, 1586548980, 1586556780, 
1586558400, 1586563200, 1586570400, 1586632080, 1586664000, 1586750400, 
1586792400, 1586793600, 1586807520, 1586814000, 1586822400, 1586835360, 
1586844900, 1586851200, 1586865240, 1586874960, 1586880000, 1586894400, 
1586908260, 1586922180, 1586931480, 1586937600, 1586952000, 1586962800, 
1586966400, 1586980800, 1586997060, 1587009000, 1587018180, 1587027600, 
1587040200, 1587052800, 1587066300, 1587080700, 1587094380, 1587109620, 
1587120300), tzone = "UTC", class = c("POSIXct", "POSIXt"))), row.names = c(NA, 
-285L), class = c("tbl_df", "tbl", "data.frame"))

这里有一种方法,它与
fuzzyjoin
包中的
difference\u-join
不同

首先,我们使用
distinct
df
中创建一个不同日期的列表,并在
06:00
上粘贴
。然后我们使用
max\u dist
2小时的
fuzzy\u left\u加入该列表,我们使用
as.difftime
创建该列表

这将为
max_dist
中的所有值创建行,因此我们可以使用
filter
选择最接近的值

NA==NA
的计算结果为
NA
,因此我们需要添加
| is.NA

library(dplyr)
library(fuzzyjoin)
df %>%
  distinct(date = as.POSIXct(paste0(as.Date(date)," 06:00"),tz="UTC")) %>%
  difference_left_join(df,max_dist = as.difftime(2,units = "hours"),distance_col = "diff") %>%
  group_by(date.x) %>%
  filter(diff==min(diff)| is.na(diff))
# A tibble: 38 x 4
# Groups:   date.x [38]
   date.x               value date.y              diff     
   <dttm>               <dbl> <dttm>              <drtn>   
 1 2020-03-11 06:00:00 NA     NA                    NA secs
 2 2020-03-12 06:00:00  0.68  2020-03-12 05:49:00  660 secs
 3 2020-03-13 06:00:00  0.87  2020-03-13 08:00:00 7200 secs
 4 2020-03-14 06:00:00 NA     NA                    NA secs
 5 2020-03-15 06:00:00  2.89  2020-03-15 06:14:00  840 secs
 6 2020-03-16 06:00:00  0.290 2020-03-16 04:45:00 4500 secs
 7 2020-03-17 06:00:00  5.43  2020-03-17 06:31:00 1860 secs
 8 2020-03-18 06:00:00 NA     NA                    NA secs
 9 2020-03-19 06:00:00  4.26  2020-03-19 05:23:00 2220 secs
10 2020-03-20 06:00:00  2.3   2020-03-20 05:42:00 1080 secs
# … with 28 more rows
库(dplyr)
库(模糊连接)
df%>%
不同的(日期=as.POSIXct(粘贴0(as.date(日期),“06:00”),tz=“UTC”))%>%
差异左连接(df,最大距离=as.difftime(2,units=“hours”),距离
分组人(日期x)%>%
过滤器(diff==min(diff)|是.na(diff))
#A tibble:38 x 4
#分组:date.x[38]
日期.x值日期.y差异
1 2020-03-11 06:00:00不,不,不,不
2020-03-12 06:00:00 0.68 2020-03-12 05:49:00 660秒
3 2020-03-13 06:00:00 0.87 2020-03-13 08:00:00 7200秒
4 2020-03-14 06:00:00不到几秒
5 2020-03-15 06:00:00 2.89 2020-03-15 06:14:00 840秒
6 2020-03-16 06:00:00 0.290 2020-03-16 04:45:00 4500秒
7 2020-03-17 06:00:00 5.43 2020-03-17 06:31:00 1860秒
8 2020-03-18 06:00:00纳秒
9 2020-03-19 06:00:00 4.26 2020-03-19 05:23:00 2220秒
10 2020-03-20 06:00:00 2.3 2020-03-20 05:42:00 1080秒
#…还有28行

对于以后找到此答案的任何人,必须确保您的
tz=
参数与数据的时区匹配。

这里有一个快速方法,它当然可以封装在函数中。假设您的数据集名为
df

## df <- .... the given data set
target    <- 6 * 60 * 60 # 06:00:00 converted to secs
tolerance <- 2 * 60 * 60 

df$secs <- as.numeric(df$date) %% (24 * 60 * 60)
df$diff <- abs(df$secs - target)

df$x1 <- c(df$diff[-1], 0)
df$x2 <- c(0, df$diff[-length(df$diff)])

## to see what happens
#df$pit <-  (df$diff <= df$x1) & (df$diff <= df$x2)

subset(df, (df$diff <= df$x1) & (df$diff <= df$x2) & (df$diff <= tolerance))
伊恩·坎贝尔的答案当然更为笼统。很高兴知道这一点:)
当然,这还取决于编程风格,以及特定任务是否需要依赖其他软件包。

非常感谢您抽出时间,这很有魅力。非常感谢!
select_time <- function(date, target, tolerance) {
  secs <- as.numeric(date) %% (24 * 60 * 60)
  diff <- abs(secs - target)
  x1 <- c(diff[-1], 0)
  x2 <- c(0, diff[-length(diff)])
  which((diff <= x1) & (diff <= x2) & (diff <= tolerance))
}

ndx <- select_time(df$date, target = 6 * 60 * 60, tolerance = 2 * 60 * 60)

df[ndx, ]