R 根据所有可能的组合,将来自不同数据帧的数字相乘

R 根据所有可能的组合,将来自不同数据帧的数字相乘,r,data-wrangling,R,Data Wrangling,我有5个数据帧,如下所示: df_mon <- data.frame(mon = as.factor(c(6, 7, 8, 9, 10)), number = c(1.11, 1.02, 0.95, 0.92, 0.72)) df_year <- data.frame(year = as.factor(c(1, 2)), number = c(1.61, 0.4)) df_cat <- data.f

我有5个数据帧,如下所示:

df_mon <- data.frame(mon = as.factor(c(6, 7, 8, 9, 10)),
                   number = c(1.11, 1.02, 0.95, 0.92, 0.72))

df_year <- data.frame(year = as.factor(c(1, 2)),
                   number = c(1.61, 0.4))

df_cat <- data.frame(cat = c("A", "B", "C"),
                   number = c(1.11, 1.02, 0.44))

df_bin <- data.frame(bin = as.factor(c(1, 2)),
                      number = c(1.42, 0.56))

df_cat2 <- data.frame(cat2 = c("A", "B", "C", "D", "AA"),
                     number = c(0.11, 1.22, 1.34, 0.88, 0.75))

df_mon也许你可以尝试
expand.grid
如下

lst <- list(df_mon, df_year, df_cat, df_bin, df_cat2)
results_df <- data.frame(
  combi = do.call(
    paste,
    c(do.call(
      expand.grid,
      lapply(lst, function(v) paste0(names(v[1]), v[, 1]))
    ), sep = "_")
  ),
  final_number = Reduce(
    "*",
    do.call(
      expand.grid,
      lapply(lst, `[[`, 2)
    )
  )
)

下面是一种使用
dplyr
tidyr
的方法

df_all <- df_mon %>%
  full_join(df_year, by = character()) %>%  # by = character() ensures cross join
  full_join(df_cat, by = character()) %>%
  full_join(df_bin, by = character()) %>%
  full_join(df_cat2, by = character()) %>%
  pivot_longer(cols = c(-mon, -year, -cat, -bin, -cat2)) %>%
  group_by(mon, year, cat, bin, cat2) %>%
  summarize(final_number = prod(value), .groups = "keep")
# A tibble: 300 x 6
# Groups:   mon, year, cat, bin, cat2 [300]
   mon   year  cat   bin   cat2  final_number
   <fct> <fct> <chr> <fct> <chr>        <dbl>
 1 6     1     A     1     A            0.310
 2 6     1     A     1     AA           2.11 
 3 6     1     A     1     B            3.44 
 4 6     1     A     1     C            3.77 
 5 6     1     A     1     D            2.48 
 6 6     1     A     2     A            0.122
 7 6     1     A     2     AA           0.833
 8 6     1     A     2     B            1.36 
 9 6     1     A     2     C            1.49 
10 6     1     A     2     D            0.978
# ... with 290 more rows
df_all%
完全联接(df_year,by=character())%>%#by=character()确保交叉联接
完全联接(df_cat,by=character())%>%
完全联接(df_bin,by=character())%>%
完全联接(df_cat2,by=character())%>%
枢轴长度(cols=c(-mon,-year,-cat,-bin,-cat2))%>%
分组依据(周一、全年、类别、bin、类别2)%>%
汇总(最终编号=产品(值),.groups=“保留”)
#一个tibble:300x6
#分组:周一、全年、cat、bin、cat2[300]
一年一次的cat箱cat2最终编号
161A1A0.310
2 6 1 A 1 AA 2.11
361A3B3.44
4 6 1 A 1 C 3.77
5 6 1 A 1 D 2.48
61A2A0.122
7 6 1 A 2 AA 0.833
861A2B1.36
9 6 1 A 2 C 1.49
1061A2D0.978
# ... 还有290行

它保留了其他data.frames中的变量作为列进行进一步分析,但是您可以创建
combi
列,只需使用一点
paste()

非常有用,我喜欢整洁的解决方案,它们很容易理解。然而,我尝试了这个解决方案,但没有成功。我得到这个错误:错误:
by
必须指定变量来加入byI使用的是
dplyr
1.0.1版。如果您仍然使用0.8或更早版本,则使用
by=character()
可能是一种新行为。是的,我使用的是旧版本的dplyr。一切正常,谢谢:)
> head(results_df)
                        combi final_number
1  mon6_year1_catA_bin1_cat2A   0.30985097
2  mon7_year1_catA_bin1_cat2A   0.28472792
3  mon8_year1_catA_bin1_cat2A   0.26518777
4  mon9_year1_catA_bin1_cat2A   0.25681342
5 mon10_year1_catA_bin1_cat2A   0.20098441
6  mon6_year2_catA_bin1_cat2A   0.07698161
df_all <- df_mon %>%
  full_join(df_year, by = character()) %>%  # by = character() ensures cross join
  full_join(df_cat, by = character()) %>%
  full_join(df_bin, by = character()) %>%
  full_join(df_cat2, by = character()) %>%
  pivot_longer(cols = c(-mon, -year, -cat, -bin, -cat2)) %>%
  group_by(mon, year, cat, bin, cat2) %>%
  summarize(final_number = prod(value), .groups = "keep")
# A tibble: 300 x 6
# Groups:   mon, year, cat, bin, cat2 [300]
   mon   year  cat   bin   cat2  final_number
   <fct> <fct> <chr> <fct> <chr>        <dbl>
 1 6     1     A     1     A            0.310
 2 6     1     A     1     AA           2.11 
 3 6     1     A     1     B            3.44 
 4 6     1     A     1     C            3.77 
 5 6     1     A     1     D            2.48 
 6 6     1     A     2     A            0.122
 7 6     1     A     2     AA           0.833
 8 6     1     A     2     B            1.36 
 9 6     1     A     2     C            1.49 
10 6     1     A     2     D            0.978
# ... with 290 more rows