Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/72.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何根据一个dataframe中的列的值和R中另一个dataframe的列标题名称有条件地创建新列_R_Dataframe_Merge_Conditional_Names - Fatal编程技术网

如何根据一个dataframe中的列的值和R中另一个dataframe的列标题名称有条件地创建新列

如何根据一个dataframe中的列的值和R中另一个dataframe的列标题名称有条件地创建新列,r,dataframe,merge,conditional,names,R,Dataframe,Merge,Conditional,Names,我有一个数据帧df1,它有一个名为averageDate的列,其中包含日期,格式为%Y-%m 我还有另一个数据框df2,其中大多数列名是%Y-%m格式的日期值,这些列中的数据是经济指标的数值 我想在df1中填充一个新列(如df3所示),其中的值是在df2中找到的值,其中df`中的averageDate值与df2中的列名匹配 对于前面的问题,我已经成功地解决了基于独立数据帧中的两列的条件合并,但我在这里坚持的是第二个匹配条件是df2中的列名 我的数据帧的复制如下所示: df1 <- stru

我有一个数据帧
df1
,它有一个名为
averageDate
的列,其中包含日期,格式为%Y-%m

我还有另一个数据框
df2
,其中大多数列名是%Y-%m格式的日期值,这些列中的数据是经济指标的数值

我想在df1中填充一个新列(如
df3
所示),其中的值是在df2中找到的值,其中df`中的
averageDate
值与df2中的列名匹配

对于前面的问题,我已经成功地解决了基于独立数据帧中的两列的条件合并,但我在这里坚持的是第二个匹配条件是
df2
中的列名

我的数据帧的复制如下所示:

df1 <- structure(list(zipcode = structure(c(1L, 2L, 4L, 3L), .Label = c("10019", 
"10027", "20009", "94117"), class = "factor"), averageDate = c("2017-08", 
"2017-04", NA, "2015-11")), .Names = c("zipcode", "averageDate"
), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"
))

df2 <-structure(list(RegionName = c(20009, 10019, 10027), `2015-01` = c(444500, 
1855000, NA), `2015-02` = c(439000, 1715000, NA), `2015-03` = c(437000, 
1775000, NA), `2015-04` = c(475000, 1855000, NA), `2015-05` = c(489000, 
1860000, NA), `2015-06` = c(489750, 1877500, NA), `2015-07` = c(479900, 
1957500, NA), `2015-08` = c(489900, 1950000, NA), `2015-09` = c(5e+05, 
1947500, NA), `2015-10` = c(512450, 1958000, NA), `2015-11` = c(503999.5, 
1990000, NA), `2015-12` = c(499900, 1995000, NA), `2016-01` = c(499500, 
1995000, NA), `2016-02` = c(529900, 1822500, NA), `2016-03` = c(5e+05, 
1820000, 872000), `2016-04` = c(5e+05, 1930000, 887000), `2016-05` = c(492500, 
1795500, 837000), `2016-06` = c(529000, 1750000, 819000), `2016-07` = c(549000, 
1832500, 725800), `2016-08` = c(577000, 1850000, 725000), `2016-09` = c(549900, 
1762500, 753500), `2016-10` = c(529000, 1777500, 737900), `2016-11` = c(519000, 
1787000, 750000), `2016-12` = c(499000, 1795000, 725800), `2017-01` = c(549000, 
1795000, 749000), `2017-02` = c(522450, 1833000, 845000), `2017-03` = c(546950, 
1836500, 867250), `2017-04` = c(572247.5, 1849450, 929000), `2017-05` = c(549900, 
1850000, 929000), `2017-06` = c(540000, 1875000, 899000), `2017-07` = c(519900, 
1895000, 899000), `2017-08` = c(525000, 1849990, 897000), `2017-09` = c(572450, 
1795000, 840000), `2017-10` = c(595000, 1795000, 882000), `2017-11` = c(555650, 
1825000, 949000), `2017-12` = c(525000, 1799950, 795000), `2018-01` = c(557000, 
1925000, 772500)), .Names = c("RegionName", "2015-01", "2015-02", 
"2015-03", "2015-04", "2015-05", "2015-06", "2015-07", "2015-08", 
"2015-09", "2015-10", "2015-11", "2015-12", "2016-01", "2016-02", 
"2016-03", "2016-04", "2016-05", "2016-06", "2016-07", "2016-08", 
"2016-09", "2016-10", "2016-11", "2016-12", "2017-01", "2017-02", 
"2017-03", "2017-04", "2017-05", "2017-06", "2017-07", "2017-08", 
"2017-09", "2017-10", "2017-11", "2017-12", "2018-01"), row.names = c(38L, 
82L, 226L), class = "data.frame")

df3 <- structure(list(RegionName = c("10019", "10027", "20009", "94117"
    ), variable = structure(c(32L, 28L, 11L, NA), .Label = c("2015-01", 
    "2015-02", "2015-03", "2015-04", "2015-05", "2015-06", "2015-07", 
    "2015-08", "2015-09", "2015-10", "2015-11", "2015-12", "2016-01", 
    "2016-02", "2016-03", "2016-04", "2016-05", "2016-06", "2016-07", 
    "2016-08", "2016-09", "2016-10", "2016-11", "2016-12", "2017-01", 
    "2017-02", "2017-03", "2017-04", "2017-05", "2017-06", "2017-07", 
    "2017-08", "2017-09", "2017-10", "2017-11", "2017-12", "2018-01"
    ), class = "factor"), value = c(1849990, 929000, 503999.5, NA
    )), .Names = c("RegionName", "variable", "value"), row.names = c(NA, 
    -4L), class = "data.frame")
类似这样的东西

require(tidyverse);
left_join(
    df1,
    df2 %>%
        gather(averageDate, Value, 2:ncol(df2)) %>%
        rename(zipcode = RegionName) %>%
        mutate(zipcode = as.character(zipcode)))
## A tibble: 4 x 3
#  zipcode averageDate   Value
#  <chr>   <chr>         <dbl>
#1 10019   2017-08     1849990
#2 10027   2017-04      929000
#3 94117   NA               NA
#4 20009   2015-11      504000
require(tidyverse);
左联合(
df1,
df2%>%
聚集(averageDate,值,2:ncol(df2))%>%
重命名(zipcode=RegionName)%%>%
变异(zipcode=as.character(zipcode)))
##一个tibble:4x3
#zipcode平均日期值
#              
#1 10019   2017-08     1849990
#2 10027   2017-04      929000
#394117纳
#4 20009   2015-11      504000

来自@Marko的见解

long <-melt(df2, id.vars = "RegionName")

df3 <- merge(long, df1, by.x=c("RegionName", "variable"), by.y=c("zipcode", "averageDate"), all.y=T)

long将df2从宽转换为长,然后根据“averageDate”与df合并,可以满足您的需要吗?是的,我刚刚尝试过,效果不错!