根据组内最近的时间戳在R中连接两个数据帧
我有以下数据帧根据组内最近的时间戳在R中连接两个数据帧,r,dataframe,datetime,R,Dataframe,Datetime,我有以下数据帧 structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800, 1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800,
1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
id time
<dbl> <dttm>
1 1 2017-01-11 00:00:00
2 2 2017-02-01 00:00:00
3 3 2017-03-23 00:00:00
4 4 2017-03-31 00:00:00
5 5 2017-04-12 00:00:00
structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600,
1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800,
1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600,
1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400,
1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3,
2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38,
38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA,
-25L), class = c("tbl_df", "tbl", "data.frame"))
id time score
<dbl> <dttm> <dbl>
1 1 2016-06-22 00:00:00 3
2 1 2016-06-15 00:00:00 2
3 1 2016-01-22 00:00:00 5
4 1 2017-01-25 00:00:00 4
5 1 2015-05-31 00:00:00 5
6 2 2015-01-20 00:00:00 24.2
7 2 2016-01-27 00:00:00 24.8
8 2 2017-01-25 00:00:00 25.4
9 2 2017-02-02 00:00:00 26
10 2 2018-02-02 00:00:00 26.6
# … with 15 more rows
结构(列表id=c(1,2,3,4,5),时间=c(1484092800,
1485907200、1490272200、1490918400、1491955200),类=c(“POSIXct”,
“POSIXt”)、tzone=“UTC”)、row.names=c(NA,-5L)、class=c(“待定”,
“tbl”、“data.frame”))
身份证时间
1 1 2017-01-11 00:00:00
2 2 2017-02-01 00:00:00
3 3 2017-03-23 00:00:00
4 4 2017-03-31 00:00:00
5 5 2017-04-12 00:00:00
结构(列表id=c(1,1,1,1,1,2,2,2,2,2,3,3,3,
3,3,4,4,4,4,4,4,5,5,5),时间=结构(c(1466553600,
1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800,
1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600,
1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400,
1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
),class=c(“POSIXct”,“POSIXt”),tzone=“UTC”),分数=c(3,
2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38,
38.6,44,44.6,45.2,45.8,46.4,59,59.6,60.2,60.8,61.4)),row.names=c(NA,
-25L),类=c(“tbl_df”,“tbl”,“data.frame”))
id时间分数
1 1 2016-06-22 00:00:00 3
2 1 2016-06-15 00:00:00 2
3 1 2016-01-22 00:00:00 5
4 1 2017-01-25 00:00:00 4
5 1 2015-05-31 00:00:00 5
6 2 2015-01-20 00:00:00 24.2
7 2 2016-01-27 00:00:00 24.8
8 2 2017-01-25 00:00:00 25.4
9 2 2017-02-02 00:00:00 26
10 2 2018-02-02 00:00:00 26.6
#…还有15行
我想要sdf的分数,其中时间最接近df中的时间。但我还得看看身份证!我已经试过了:
d我们可以通过id
连接数据帧,然后计算时差,并以每个个体的最小时差保持观察:
library(tidyverse)
df2 %>%
left_join(df1, by = "id") %>%
mutate(time_dif = abs(time.x - time.y)) %>%
group_by(id) %>%
filter(time_dif == min(time_dif))
# A tibble: 5 x 5
# Groups: id [5]
id time.x score time.y time_dif
<dbl> <dttm> <dbl> <dttm> <drtn>
1 1 2017-01-25 00:00:00 4 2017-01-11 00:00:00 14 days
2 2 2017-02-02 00:00:00 26 2017-02-01 00:00:00 1 days
3 3 2017-06-15 00:00:00 38 2017-03-23 00:00:00 84 days
4 4 2017-06-20 00:00:00 45.2 2017-03-31 00:00:00 81 days
5 5 2017-01-31 00:00:00 61.4 2017-04-12 00:00:00 71 days
库(tidyverse)
df2%>%
左联合(df1,by=“id”)%%>%
突变(time_dif=abs(time.x-time.y))%>%
分组依据(id)%>%
过滤器(时间=分钟)
#一个tibble:5x5
#组别:id[5]
id时间.x得分时间.y时间
1 2017-01-25 00:00:00 4 2017-01-11 00:00:00 14天
2 2017-02-02 00:00:00 26 2017-02-01 00:00:00 1天
3 2017-06-15 00:00:00 38 2017-03-23 00:00:00 84天
4 2017-06-20 00:00:00 45.2 2017-03-31 00:00:00 81天
5 5 2017-01-31 00:00:00 61.4 2017-04-12 00:00:00 71天
数据
df1 <- structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800,
1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
df2 <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600,
1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800,
1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600,
1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400,
1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3,
2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38,
38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA,
-25L), class = c("tbl_df", "tbl", "data.frame"))
df1
df1 <- structure(list(id = c(1, 2, 3, 4, 5), time = structure(c(1484092800,
1485907200, 1490227200, 1490918400, 1491955200), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
df2 <- structure(list(id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), time = structure(c(1466553600,
1465948800, 1453420800, 1485302400, 1433030400, 1421712000, 1453852800,
1485302400, 1485993600, 1517529600, 1400544000, 1434067200, 1466985600,
1497484800, 1390003200, 1516060800, 1464825600, 1497916800, 1527638400,
1454025600, 1390608000, 1421712000, 1466467200, 1453852800, 1485820800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), score = c(3,
2, 5, 4, 5, 24.2, 24.8, 25.4, 26, 26.6, 36.2, 36.8, 37.4, 38,
38.6, 44, 44.6, 45.2, 45.8, 46.4, 59, 59.6, 60.2, 60.8, 61.4)), row.names = c(NA,
-25L), class = c("tbl_df", "tbl", "data.frame"))