根据组内最近的时间戳在R中连接两个数据帧_R_Dataframe_Datetime

根据组内最近的时间戳在R中连接两个数据帧

r dataframe datetime

根据组内最近的时间戳在R中连接两个数据帧,r,dataframe,datetime,R,Dataframe,Datetime,我有以下数据帧 structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800, 1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))

我有以下数据帧

structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800, 
1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct", 
"POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))

     id time               
  <dbl> <dttm>             
1     1 2017-01-11 00:00:00
2     2 2017-02-01 00:00:00
3     3 2017-03-23 00:00:00
4     4 2017-03-31 00:00:00
5     5 2017-04-12 00:00:00

structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600, 
1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800, 
1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600, 
1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400, 
1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3, 
2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38, 
38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA, 
-25L), class = c("tbl_df", "tbl", "data.frame"))

      id time                score
   <dbl> <dttm>              <dbl>
 1     1 2016-06-22 00:00:00   3  
 2     1 2016-06-15 00:00:00   2  
 3     1 2016-01-22 00:00:00   5  
 4     1 2017-01-25 00:00:00   4  
 5     1 2015-05-31 00:00:00   5  
 6     2 2015-01-20 00:00:00  24.2
 7     2 2016-01-27 00:00:00  24.8
 8     2 2017-01-25 00:00:00  25.4
 9     2 2017-02-02 00:00:00  26  
10     2 2018-02-02 00:00:00  26.6
# … with 15 more rows

结构（列表id=c（1,2,3,4,5），时间=c（1484092800， 1485907200、1490272200、1490918400、1491955200），类=c（“POSIXct”， “POSIXt”）、tzone=“UTC”）、row.names=c（NA，-5L）、class=c（“待定”， “tbl”、“data.frame”））身份证时间 1 1 2017-01-11 00:00:00 2 2 2017-02-01 00:00:00 3 3 2017-03-23 00:00:00 4 4 2017-03-31 00:00:00 5 5 2017-04-12 00:00:00 结构（列表id=c（1，1，1，1，1，2，2，2，2，2，3，3，3， 3,3,4,4,4,4,4,4,5,5,5），时间=结构（c（1466553600， 1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800, 1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600, 1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400, 1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800 )，class=c（“POSIXct”，“POSIXt”），tzone=“UTC”），分数=c（3， 2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38, 38.6,44,44.6,45.2,45.8,46.4,59,59.6,60.2,60.8,61.4）），row.names=c（NA， -25L），类=c（“tbl_df”，“tbl”，“data.frame”）） id时间分数 1 1 2016-06-22 00:00:00 3 2 1 2016-06-15 00:00:00 2 3 1 2016-01-22 00:00:00 5 4 1 2017-01-25 00:00:00 4 5 1 2015-05-31 00:00:00 5 6 2 2015-01-20 00:00:00 24.2 7 2 2016-01-27 00:00:00 24.8 8 2 2017-01-25 00:00:00 25.4 9 2 2017-02-02 00:00:00 26 10 2 2018-02-02 00:00:00 26.6 #…还有15行我想要sdf的分数，其中时间最接近df中的时间。但我还得看看身份证！我已经试过了：

d我们可以通过id
连接数据帧，然后计算时差，并以每个个体的最小时差保持观察：
library(tidyverse)
df2 %>% 
  left_join(df1, by = "id") %>% 
  mutate(time_dif = abs(time.x - time.y)) %>% 
  group_by(id) %>% 
  filter(time_dif == min(time_dif))

# A tibble: 5 x 5
# Groups:   id [5]
     id time.x              score time.y              time_dif
  <dbl> <dttm>              <dbl> <dttm>              <drtn>  
1     1 2017-01-25 00:00:00   4   2017-01-11 00:00:00 14 days 
2     2 2017-02-02 00:00:00  26   2017-02-01 00:00:00  1 days 
3     3 2017-06-15 00:00:00  38   2017-03-23 00:00:00 84 days 
4     4 2017-06-20 00:00:00  45.2 2017-03-31 00:00:00 81 days 
5     5 2017-01-31 00:00:00  61.4 2017-04-12 00:00:00 71 days 

库（tidyverse）
df2%>%
左联合（df1，by=“id”）%%>%
突变（time_dif=abs（time.x-time.y））%>%
分组依据（id）%>%
过滤器（时间=分钟）
#一个tibble:5x5
#组别:id[5]
id时间.x得分时间.y时间
1 2017-01-25 00:00:00 4 2017-01-11 00:00:00 14天
2 2017-02-02 00:00:00 26 2017-02-01 00:00:00 1天
3 2017-06-15 00:00:00 38 2017-03-23 00:00:00 84天
4 2017-06-20 00:00:00 45.2 2017-03-31 00:00:00 81天
5 5 2017-01-31 00:00:00 61.4 2017-04-12 00:00:00 71天

数据
df1 <- structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800, 
                                                         1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct", 
                                                                                                                    "POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df", 
                                                                                                                                                                                  "tbl", "data.frame"))



df2 <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 
                      3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600, 
                                                                              1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800, 
                                                                              1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600, 
                                                                              1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400, 
                                                                              1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
                      ), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3, 
                                                                                   2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38, 
                                                                                   38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA, 
                                                                                                                                                                 -25L), class = c("tbl_df", "tbl", "data.frame"))

df1
df1 <- structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800, 
                                                         1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct", 
                                                                                                                    "POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df", 
                                                                                                                                                                                  "tbl", "data.frame"))



df2 <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 
                      3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600, 
                                                                              1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800, 
                                                                              1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600, 
                                                                              1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400, 
                                                                              1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
                      ), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3, 
                                                                                   2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38, 
                                                                                   38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA, 
                                                                                                                                                                 -25L), class = c("tbl_df", "tbl", "data.frame"))