R 如何映射两个数据集A&;B在数据集A的月份=数据集B的前一个月的情况下

R 如何映射两个数据集A&;B在数据集A的月份=数据集B的前一个月的情况下,r,R,有两个数据集-“开放的原始数据”和“交换率”。在Open_raw_数据中,我想获得当前日期列和货币列的汇率基础。但是,在必须使用前一个月的汇率进行映射的情况下 数据 Open_raw_data <- structure(list(Current_date = structure(c(3L, 1L, 2L), .Label = c("11-May-20", "15-Jun-20", "27-Apr-20"), class = &q

有两个数据集-“开放的原始数据”和“交换率”。在Open_raw_数据中,我想获得当前日期列和货币列的汇率基础。但是,在必须使用前一个月的汇率进行映射的情况下

数据

Open_raw_data <- structure(list(Current_date = structure(c(3L, 1L, 2L), .Label = c("11-May-20", 
"15-Jun-20", "27-Apr-20"), class = "factor"), INV_AMOUNT = c(7269.11, 
2367.51, 12694.02), CURRENCY = structure(c(1L, 1L, 1L), .Label = "EUR", class = "factor")), class = "data.frame", row.names = c(NA, 
-3L))

exchange_rate <- structure(list(Currency = structure(c(8L, 8L, 8L, 8L, 7L, 7L, 
7L, 7L, 13L, 13L, 13L, 13L, 20L, 20L, 20L, 20L, 5L, 5L, 5L, 5L, 
16L, 16L, 16L, 16L, 3L, 3L, 3L, 3L, 17L, 17L, 17L, 17L, 2L, 2L, 
2L, 2L, 15L, 15L, 15L, 15L, 4L, 4L, 4L, 4L, 8L, 7L, 13L, 20L, 
5L, 16L, 3L, 17L, 2L, 15L, 4L, 11L, 11L, 11L, 11L, 11L, 8L, 7L, 
13L, 20L, 5L, 16L, 3L, 17L, 2L, 15L, 4L, 11L, 14L, 14L, 14L, 
14L, 14L, 14L, 18L, 18L, 18L, 18L, 18L, 18L, 10L, 10L, 10L, 10L, 
10L, 10L, 12L, 12L, 12L, 12L, 12L, 12L, 21L, 21L, 21L, 21L, 21L, 
21L, 9L, 9L, 9L, 9L, 9L, 9L, 19L, 19L, 19L, 19L, 19L, 19L, 1L, 
1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 11L, 13L, 15L, 16L, 17L, 20L, 14L, 18L, 10L, 12L, 
21L, 9L, 19L), .Label = c("AUD", "CNY", "CZK", "EUR", "GBP", 
"HKD", "IDR", "INR", "KZT", "MAD", "MXN", "NGN", "PLN", "RON", 
"RUB", "SEK", "SGD", "TRY", "UAH", "USD", "VND"), class = "factor"), 
    Year = c(2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 
    2020L, 2020L, 2020L, 2020L, 2020L), Month = c(1L, 2L, 3L, 
    4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 
    1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
    4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 
    1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), Exchnage_Rate = c(0.01266, 
    0.01256, 0.01204, 0.01224, 6.6e-05, 6.4e-05, 5.5e-05, 6.16e-05, 
    0.23288, 0.23283, 0.2197, 0.22014, 0.90671, 0.90721, 0.91044, 
    0.91894, 1.18646, 1.16307, 1.12671, 1.14633, 0.09401, 0.09444, 
    0.09037, 0.09348, 0.0396, 0.0331, 0.0366, 0.03691, 0.66551, 
    0.65106, 0.63837, 0.65294, 0.13068, 0.12975, 0.12894, 0.13019, 
    0.01434, 0.01357, 0.0114, 0.01259, 1, 1, 1, 1, 0.01192, 6.1e-05, 
    0.22483, 0.90058, 1.11213, 0.09536, 0.03714, 0.63709, 0.12622, 
    0.01283, 1, 0.04824, 0.04625, 0.03803, 0.03885, 0.04053, 
    0.01192, 6.1e-05, 0.22483, 0.90058, 1.11213, 0.09536, 0.03714, 
    0.63709, 0.12622, 0.01283, 1, 0.03859, 0.20928, 0.20821, 
    0.207, 0.20657, 0.20638, 0.20652, 0.15065, 0.14644, 0.14303, 
    0.13266, 0.13203, 0.1297, 0.09419, 0.09447, 0.09344, 0.09283, 
    0.09197, 0.0919, 0.0025, 0.00253, 0.00233, 0.00237, 0.00232, 
    0.00232, 3.91e-05, 3.97e-05, 3.86e-05, 3.91e-05, 3.86e-05, 
    3.83e-05, 0.0024, 0.00237, 0.00204, 0.00214, 0.00221, 0.00222, 
    0.03693, 0.03708, 0.03263, 0.03406, 0.03341, 0.03325, 0.61695, 
    0.59838, 0.55659, 0.60261, 0.60036, 0.6116, 0.1168, 0.11828, 
    0.11845, 0.11863, 0.11616, 0.11474, 0.6114, 0.12609, 0.03737, 
    1, 1.09587, 0.11484, 6.3e-05, 0.0118, 0.03859, 0.22492, 0.01251, 
    0.0954, 0.63841, 0.89127, 0.20652, 0.1297, 0.0919, 0.00232, 
    3.83e-05, 0.00222, 0.03325)), class = "data.frame", row.names = c(NA, 
-147L))
打开原始数据试试这个

library(dplyr)
library(lubridate)

exchange_rate %>%
  mutate(Date = make_date(Year, Month), .keep = "unused") %>% 
  right_join(Open_raw_data %>% mutate(
               Current_date = dmy(Current_date),
               Prev_date = rollback(Current_date, roll_to_first = T) - months(1)),
             by = c("Date" = "Prev_date", "Currency" = "CURRENCY")) %>%
  select(-Date)

#   Currency Exchnage_Rate Current_date INV_AMOUNT
# 1      EUR             1   2020-04-27    7269.11
# 2      EUR             1   2020-05-11    2367.51
# 3      EUR             1   2020-06-15   12694.02
基本R解决方案:

merge(transform(exchange_rate, mon = as.Date(paste(Year, Month+1, "01", sep = "-"))),
      transform(Open_raw_data, mon = as.Date(gsub("^\\d+", "01", Current_date), "%d-%b-%y")), 
                by.x = c("mon", "Currency"), by.y = c("mon", "CURRENCY"), all.y = TRUE)

您在第二个
df
中有86个月,在第一个列表的
Current\u date
列中只有50个条目。你怎么知道哪些数据属于哪个月?由于数据量巨大,很难与你分享完整的数据集。如果可能的话,您可以构建一个逻辑代码,以便我能够轻松实现。例如,如果当前数据为2020年8月30日,货币为欧元,那么我需要获得上个月的汇率,即2020年7月的货币为欧元。这是我编辑的数据逻辑,我希望这将是有用的警告消息:所有格式无法解析。找不到格式。-得到这个警告,所有的编辑都完成了,实际上日期格式有问题。它应该是as.Date(打开原始数据$Current\u Date,格式=“%d-%b-%y”)。现在在我的系统中运行良好。谢谢你的帮助
merge(transform(exchange_rate, mon = as.Date(paste(Year, Month+1, "01", sep = "-"))),
      transform(Open_raw_data, mon = as.Date(gsub("^\\d+", "01", Current_date), "%d-%b-%y")), 
                by.x = c("mon", "Currency"), by.y = c("mon", "CURRENCY"), all.y = TRUE)