R 如果日期匹配,则将不同数据框中的列相乘
我有以下两个数据帧:R 如果日期匹配,则将不同数据框中的列相乘,r,R,我有以下两个数据帧: df1 <- data.frame(Category = c("A", "A", "A", "B", "B", "B", "C", "C", "C"), Date = c(2001, 2002, 2003, 2001, 2002, 2003, 2001, 2002, 2003), Beta1 = c(1, 3, 4, 4, 5, 3, 5, 3, 1), Bet
df1 <- data.frame(Category = c("A", "A", "A", "B", "B", "B", "C", "C", "C"),
Date = c(2001, 2002, 2003, 2001, 2002, 2003, 2001, 2002, 2003),
Beta1 = c(1, 3, 4, 4, 5, 3, 5, 3, 1),
Beta2 = c(2, 4, 6, 1, 1, 2, 5, 4, 2))
df2 <- data.frame(Date = c(2001, 2002, 2003),
Column1 = c(10, 20, 30),
Column2 = c(40, 50, 60))
df1在最后一个数据帧中保留Category
变量时,一种方法是:
df3 <- left_join(df1, df2, by="Date")
df4 <- df3 %>%
group_by(Date, Category) %>%
mutate(Col1Bet1 = Column1 * Beta1, Col1Bet2 = Column1 * Beta2, Col2Bet1 = Column2 * Beta1, Col2Bet2 = Column2 * Beta2)
# A tibble: 9 x 10
# Groups: Date, Category [9]
Category Date Beta1 Beta2 Column1 Column2 Col1Bet1 Col1Bet2 Col2Bet1 Col2Bet2
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 2001 1 2 10 40 10 20 40 80
2 A 2002 3 4 20 50 60 80 150 200
3 A 2003 4 6 30 60 120 180 240 360
4 B 2001 4 1 10 40 40 10 160 40
5 B 2002 5 1 20 50 100 20 250 50
6 B 2003 3 2 30 60 90 60 180 120
7 C 2001 5 5 10 40 50 50 200 200
8 C 2002 3 4 20 50 60 80 150 200
9 C 2003 1 2 30 60 30 60 60 120
df3%
突变(Col1Bet1=Column1*Beta1,Col1Bet2=Column1*Beta2,Col2Bet1=Column2*Beta1,Col2Bet2=Column2*Beta2)
其中给出了以下内容:
df3 <- left_join(df1, df2, by="Date")
df4 <- df3 %>%
group_by(Date, Category) %>%
mutate(Col1Bet1 = Column1 * Beta1, Col1Bet2 = Column1 * Beta2, Col2Bet1 = Column2 * Beta1, Col2Bet2 = Column2 * Beta2)
# A tibble: 9 x 10
# Groups: Date, Category [9]
Category Date Beta1 Beta2 Column1 Column2 Col1Bet1 Col1Bet2 Col2Bet1 Col2Bet2
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 2001 1 2 10 40 10 20 40 80
2 A 2002 3 4 20 50 60 80 150 200
3 A 2003 4 6 30 60 120 180 240 360
4 B 2001 4 1 10 40 40 10 160 40
5 B 2002 5 1 20 50 100 20 250 50
6 B 2003 3 2 30 60 90 60 180 120
7 C 2001 5 5 10 40 50 50 200 200
8 C 2002 3 4 20 50 60 80 150 200
9 C 2003 1 2 30 60 30 60 60 120
#一个tible:9 x 10
#分组:日期,类别[9]
类别日期Beta1 Beta2 Column1 Col1Bet1 Col1Bet2 Col2Bet1 Col2Bet2
1 A 2001 1 2 10 40 10 40 80
2 A 2002 3 4 20 50 60 80 150 200
3 A 2003 4 6 30 60 120 180 240 360
4 B 2001 4 110 40 10 160 40
5 B 2002 5 1 20 50 100 20 250 50
6 B 2003 32 30 60 60 180 120
7 C 2001 5 10 40 50 200
8 C 2002 3 4 20 50 60 80 150 200
9 C 2003 12306060120
使用tidyr
和dplyr
进行一些数据争用,可以这样实现:
df1%
左联合(df1)%>%
变异(Beta1=值*Beta1,
Beta2=值*Beta2)%>%
选择(日期、类别、列、Beta1、Beta2)%>%
pivot(id\u cols=Date,name\u from=c(“列”、“类别”),value\u from=c(“Beta1”、“Beta2”))
#>加入,由=c(“日期”、“类别”)
#>警告:“类别”列连接字符向量和因子,强制为
#>特征向量
#>#tibble:3 x 5
#>日期Beta1_栏1_A Beta1_栏2_C Beta2_栏1_A Beta2_栏2_C
#>
#> 1 2001 10 200 20 200
#> 2 2002 60 150 80 200
#> 3 2003 120 60 180 120
由(v0.3.0)于2020年4月14日创建,这可能是一个开始。result data.table以另一种格式包含您需要的所有信息
df3 <- merge(df1, df2)
df3$b1 <- ifelse(df3$Category=="A", df3$Beta1*df3$Column1, ifelse(df3$Category=="C", df3$Beta1*df3$Column2, NA))
df3$b2 <- ifelse(df3$Category=="A", df3$Beta2*df3$Column1, ifelse(df3$Category=="C", df3$Beta2*df3$Column2, NA))
# Date Category Beta1 Beta2 Column1 Column2 b1 b2
# 1 2001 A 1 2 10 40 10 20
# 2 2001 C 5 5 10 40 200 200
# 3 2001 B 4 1 10 40 NA NA
# 4 2002 A 3 4 20 50 60 80
# 5 2002 B 5 1 20 50 NA NA
# 6 2002 C 3 4 20 50 150 200
# 7 2003 B 3 2 30 60 NA NA
# 8 2003 A 4 6 30 60 120 180
# 9 2003 C 1 2 30 60 60 120
df3