R 如何基于多种条件将数据从一个数据帧复制到另一个数据帧

R 如何基于多种条件将数据从一个数据帧复制到另一个数据帧,r,dataframe,datetime,R,Dataframe,Datetime,在我的df1(包括df1$id,df1$datetime\u interval,df1$datetime\u事件和df1$event)中,我想根据以下条件放置df2(包括df2$id,df2$datetime\u事件)中的数据: 如果df1$id和df2$id匹配 和如果df2$datetime\u事件在df1$datetime\u间隔内 比我想把df2$datetime_事件的数据复制到df1$datetime_事件的df1中相应行的df2$datetime_事件的列中,和df1$event中

在我的
df1
(包括
df1$id
df1$datetime\u interval
df1$datetime\u事件
df1$event
)中,我想根据以下条件放置
df2
(包括
df2$id
df2$datetime\u事件
)中的数据:

如果
df1$id
df2$id
匹配

如果
df2$datetime\u事件
df1$datetime\u间隔内

我想把
df2$datetime_事件的数据
复制到
df1$datetime_事件
df1
中相应行的
df2$datetime_事件的列中,
df1$event
中的字符串(例如“yes”)

如果不满足条件,我不希望有结果(NA)

因此:

我试过一些方法,但效果并不理想。我仍然错过了一些步骤,我不知道如何继续前进。这就是我到目前为止所做的:

for(i in seq_along(df1$id)){
  for(j in seq_along(df2$id)){
    ifelse(df2$id[j] ==  df1$id[i]) {
       ifelse(df2$datetime_event[j] %within% df1$datetime_interval[i] == TRUE){
        df1$datetime_event <- df2$datetime_ic_corr[j]
       }
     }
   }
 } 
提前感谢您的所有新输入!因为我被卡住了

dput(df1)
structure(list(ID = c(1, 1, 2, 3, 3, 6), datetime_interval = c("2019-04-19 21:50:00 UTC--2019-04-20 21:31:00 UTC", 
"2019-07-02 04:23:00 UTC--2019-07-02 08:51:00 UTC", "2019-07-04 19:45:00 UTC--2019-07-05 00:30:00 UTC", 
"2019-06-07 08:55:00 UTC--2019-06-07 14:43:00 UTC", "2019-05-06 17:18:00 UTC--2019-05-06 23:18:00 UTC", 
"2019-08-02 22:00:00 UTC--2019-08-04 03:10:00 UTC"), datetime_event = c("NA", 
"NA", "NA", "NA", "NA", "NA"), event = c("NA", "NA", "NA", "NA", 
"NA", "NA")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))

dput(df2)
structure(list(ID = c(1, 3, 5, 6, 6), datetime_event = c("2019-04-19 21:55:00 UTC", 
"2019-05-06 21:23:00 UTC", "2019-07-04 19:45:00 UTC", "2019-05-06 17:18:00 UTC", 
"2019-08-03 10:10:00 UTC")), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))

棘手的问题。我认为这是可行的:

library(dplyr)
library(tidyr)

# convert datetime_interval to datetime class start and end columns
# and add row IDs
df1 = df1 %>% 
  separate(datetime_interval, into = c("start", "end"), sep = "--") %>%
  mutate_at(vars(start, end), as.POSIXct) %>%
  select(-datetime_event, -event) %>%
  mutate(row_id = row_number())

# convert datetime event to datetime class
df2 = df2 %>%
  mutate(datetime_event = as.POSIXct(datetime_event))

# join and filter
df1 %>% left_join(df2, by = "ID") %>%
  mutate(
    datetime_event = ifelse(datetime_event >= start & datetime_event <= end, datetime_event, NA),
    event = ifelse(is.na(datetime_event), NA, "yes")
  ) %>%
  arrange(row_id, datetime_event) %>%
  group_by(row_id) %>%
  slice(1)
# # A tibble: 6 x 6
# # Groups:   row_id [6]
#      ID start               end                 row_id datetime_event event
#   <dbl> <dttm>              <dttm>               <int>          <dbl> <chr>
# 1     1 2019-04-19 21:50:00 2019-04-20 21:31:00      1     1555725300 yes  
# 2     1 2019-07-02 04:23:00 2019-07-02 08:51:00      2             NA NA   
# 3     2 2019-07-04 19:45:00 2019-07-05 00:30:00      3             NA NA   
# 4     3 2019-06-07 08:55:00 2019-06-07 14:43:00      4             NA NA   
# 5     3 2019-05-06 17:18:00 2019-05-06 23:18:00      5     1557192180 yes  
# 6     6 2019-08-02 22:00:00 2019-08-04 03:10:00      6     1564841400 yes  
库(dplyr)
图书馆(tidyr)
#将datetime\u interval转换为datetime类的开始列和结束列
#并添加行ID
df1=df1%>%
分开(日期时间间隔,分为=c(“开始”、“结束”)、sep=“-->%)
在(变量(开始,结束),如.POSIXct)%>%
选择(-datetime_event,-event)%>%
变异(行id=行编号())
#将datetime事件转换为datetime类
df2=df2%>%
mutate(datetime_事件=as.POSIXct(datetime_事件))
#加入并筛选
df1%>%左联合(df2,by=“ID”)%>%
变异(
datetime\u event=ifelse(datetime\u event>=start&datetime\u event%
排列(行id,日期时间事件)%>%
分组依据(行id)%>%
切片(1)
##tibble:6 x 6
##组:行id[6]
#ID开始结束行\u ID日期时间\u事件
#                                            
#1199-04-19 21:50:00 2019-04-20 21:31:00 1155725300是
#2019-07-02 04:23:00 2019-07-02 08:51:00 2不适用
#3 2 2019-07-04 19:45:00 2019-07-05 00:30:00 3北美
#2019-06-07 08:55:00 2019-06-07 14:43:00 4 NA
#5 3 2019-05-06 17:18:00 2019-05-06 23:18:00 5 1557192180是
#6 2019-08-02 22:00:00 2019-08-04 03:10:00 6 1564841400是

棘手的问题。我认为这是可行的:

library(dplyr)
library(tidyr)

# convert datetime_interval to datetime class start and end columns
# and add row IDs
df1 = df1 %>% 
  separate(datetime_interval, into = c("start", "end"), sep = "--") %>%
  mutate_at(vars(start, end), as.POSIXct) %>%
  select(-datetime_event, -event) %>%
  mutate(row_id = row_number())

# convert datetime event to datetime class
df2 = df2 %>%
  mutate(datetime_event = as.POSIXct(datetime_event))

# join and filter
df1 %>% left_join(df2, by = "ID") %>%
  mutate(
    datetime_event = ifelse(datetime_event >= start & datetime_event <= end, datetime_event, NA),
    event = ifelse(is.na(datetime_event), NA, "yes")
  ) %>%
  arrange(row_id, datetime_event) %>%
  group_by(row_id) %>%
  slice(1)
# # A tibble: 6 x 6
# # Groups:   row_id [6]
#      ID start               end                 row_id datetime_event event
#   <dbl> <dttm>              <dttm>               <int>          <dbl> <chr>
# 1     1 2019-04-19 21:50:00 2019-04-20 21:31:00      1     1555725300 yes  
# 2     1 2019-07-02 04:23:00 2019-07-02 08:51:00      2             NA NA   
# 3     2 2019-07-04 19:45:00 2019-07-05 00:30:00      3             NA NA   
# 4     3 2019-06-07 08:55:00 2019-06-07 14:43:00      4             NA NA   
# 5     3 2019-05-06 17:18:00 2019-05-06 23:18:00      5     1557192180 yes  
# 6     6 2019-08-02 22:00:00 2019-08-04 03:10:00      6     1564841400 yes  
库(dplyr)
图书馆(tidyr)
#将datetime\u interval转换为datetime类的开始列和结束列
#并添加行ID
df1=df1%>%
分开(日期时间间隔,分为=c(“开始”、“结束”)、sep=“-->%)
在(变量(开始,结束),如.POSIXct)%>%
选择(-datetime_event,-event)%>%
变异(行id=行编号())
#将datetime事件转换为datetime类
df2=df2%>%
mutate(datetime_事件=as.POSIXct(datetime_事件))
#加入并筛选
df1%>%左联合(df2,by=“ID”)%>%
变异(
datetime\u event=ifelse(datetime\u event>=start&datetime\u event%
排列(行id,日期时间事件)%>%
分组依据(行id)%>%
切片(1)
##tibble:6 x 6
##组:行id[6]
#ID开始结束行\u ID日期时间\u事件
#                                            
#1199-04-19 21:50:00 2019-04-20 21:31:00 1155725300是
#2019-07-02 04:23:00 2019-07-02 08:51:00 2不适用
#3 2 2019-07-04 19:45:00 2019-07-05 00:30:00 3北美
#2019-06-07 08:55:00 2019-06-07 14:43:00 4 NA
#5 3 2019-05-06 17:18:00 2019-05-06 23:18:00 5 1557192180是
#6 2019-08-02 22:00:00 2019-08-04 03:10:00 6 1564841400是

您能否与
dput
共享输入
df1
df2
?这将使它们可以复制/粘贴,并使人们更容易演示解决方案。如果您的实际数据大于您显示的数据,请将
dput(df1)
的输出放入
dput(head(df1))
进入你的问题。@GregorThomas谢谢!我已经把它们放入我的问题中了!你能将输入
df1
df2
dput
共享吗?这将使它们可以复制/粘贴,并使人们更容易演示解决方案。将
dput(df1)
的输出(或者
dput(head(df1))
,如果您的实际数据比您显示的数据大),请输入您的问题。@GregorThomas谢谢!我已经将它们输入到我的问题中了!
dput(df1)
structure(list(ID = c(1, 1, 2, 3, 3, 6), datetime_interval = c("2019-04-19 21:50:00 UTC--2019-04-20 21:31:00 UTC", 
"2019-07-02 04:23:00 UTC--2019-07-02 08:51:00 UTC", "2019-07-04 19:45:00 UTC--2019-07-05 00:30:00 UTC", 
"2019-06-07 08:55:00 UTC--2019-06-07 14:43:00 UTC", "2019-05-06 17:18:00 UTC--2019-05-06 23:18:00 UTC", 
"2019-08-02 22:00:00 UTC--2019-08-04 03:10:00 UTC"), datetime_event = c("NA", 
"NA", "NA", "NA", "NA", "NA"), event = c("NA", "NA", "NA", "NA", 
"NA", "NA")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))

dput(df2)
structure(list(ID = c(1, 3, 5, 6, 6), datetime_event = c("2019-04-19 21:55:00 UTC", 
"2019-05-06 21:23:00 UTC", "2019-07-04 19:45:00 UTC", "2019-05-06 17:18:00 UTC", 
"2019-08-03 10:10:00 UTC")), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))
library(dplyr)
library(tidyr)

# convert datetime_interval to datetime class start and end columns
# and add row IDs
df1 = df1 %>% 
  separate(datetime_interval, into = c("start", "end"), sep = "--") %>%
  mutate_at(vars(start, end), as.POSIXct) %>%
  select(-datetime_event, -event) %>%
  mutate(row_id = row_number())

# convert datetime event to datetime class
df2 = df2 %>%
  mutate(datetime_event = as.POSIXct(datetime_event))

# join and filter
df1 %>% left_join(df2, by = "ID") %>%
  mutate(
    datetime_event = ifelse(datetime_event >= start & datetime_event <= end, datetime_event, NA),
    event = ifelse(is.na(datetime_event), NA, "yes")
  ) %>%
  arrange(row_id, datetime_event) %>%
  group_by(row_id) %>%
  slice(1)
# # A tibble: 6 x 6
# # Groups:   row_id [6]
#      ID start               end                 row_id datetime_event event
#   <dbl> <dttm>              <dttm>               <int>          <dbl> <chr>
# 1     1 2019-04-19 21:50:00 2019-04-20 21:31:00      1     1555725300 yes  
# 2     1 2019-07-02 04:23:00 2019-07-02 08:51:00      2             NA NA   
# 3     2 2019-07-04 19:45:00 2019-07-05 00:30:00      3             NA NA   
# 4     3 2019-06-07 08:55:00 2019-06-07 14:43:00      4             NA NA   
# 5     3 2019-05-06 17:18:00 2019-05-06 23:18:00      5     1557192180 yes  
# 6     6 2019-08-02 22:00:00 2019-08-04 03:10:00      6     1564841400 yes