如何在两个data.tables（或data.frames）的行之间创建随机匹配_R_Data.table

如何在两个data.tables（或data.frames）的行之间创建随机匹配

如何在两个data.tables（或data.frames）的行之间创建随机匹配,r,data.table,R,Data.table,对于本例，我将使用data.table包假设你有一张教练桌 coaches <- data.table(CoachID=c(1,2,3), CoachName=c("Bob","Sue","John"), NumPlayers=c(2,3,0)) coaches CoachID CoachName NumPlayers 1: 1 Bob 2 2: 2 Sue 3 3: 3 Jo

对于本例，我将使用

data.table

包

假设你有一张教练桌

coaches <- data.table(CoachID=c(1,2,3), CoachName=c("Bob","Sue","John"), NumPlayers=c(2,3,0))
coaches
   CoachID CoachName NumPlayers
1:       1       Bob          2
2:       2       Sue          3
3:       3      John          0

coach您可以在不更换球员ID的情况下从球员ID中取样，获取您需要的球员总数：
set.seed(144)
(selections <- sample(players$PlayerID, sum(coaches$NumPlayers)))
# [1] 1 4 3 2 6

如果您想为没有球员选择的任何教练提供NA
值，您可以执行以下操作：
rbind(data.frame(CoachID=rep(coaches$CoachID, coaches$NumPlayers),
                 PlayerID=selections),
      data.frame(CoachID=coaches$CoachID[coaches$NumPlayers==0],
                 PlayerID=rep(NA, sum(coaches$NumPlayers==0))))
#   CoachID PlayerID
# 1       1        1
# 2       1        4
# 3       2        3
# 4       2        2
# 5       2        6
# 6       3       NA

可以说，从每一方获得需求和供给：
demand <- with(coaches,rep(CoachID,NumPlayers))
supply <- players$PlayerID

不过，我不确定这是否是OP想要报道的案件

对于OP的期望输出
m <- randmatch(demand,supply)
merge(m,coaches,by.x="d",by.y="CoachID",all=TRUE)
#   d  s CoachName NumPlayers
# 1 1  2       Bob          2
# 2 1  6       Bob          2
# 3 2  3       Sue          3
# 4 2  4       Sue          3
# 5 2  1       Sue          3
# 6 3 NA      John          0

下面是一个使用简单dplyr的答案。首先选择教练需求，然后对球员需求进行抽样，最后确定所有需求
library(dplyr)

set.seed(1234)

coach_needs <- coaches %>%
  group_by( CoachID ) %>%
  do( sample_n(., size=.$NumPlayers, replace=TRUE) ) %>%
  select( -CoachID ) %>% ungroup()

player_needs <- players %>%
  sample_n( size = nrow(coach_needs))

result <- cbind(coach_needs, player_needs)

result

更新：如果NA
s是numlayer==0
的教练所需要的，那么这是一个简单的一行：
result <- cbind(coach_needs, player_needs) %>%
  rbind( coaches %>% filter(NumPlayers == 0), fill=TRUE )

result

你的最终结果不是PlayerID 6，而是NA@Frank，是的。这是因为CoachID 3（John）的NumPlayers==0，因此不应将任何人分配给他。
randmatch <- function(demand,supply){
  n_demand  <- length(demand)
  n_supply  <- length(supply)
  n_matches <- min(n_demand,n_supply)

  if (n_demand >= n_supply) 
    data.frame(d=sample(demand,n_matches),s=supply)
  else 
    data.frame(d=demand,s=sample(supply,n_matches))
}

set.seed(1)
randmatch(demand,supply)    # some players unmatched, OP's example
randmatch(rep(1:3,1:3),1:4) # some coaches unmatched 

m <- randmatch(demand,supply)
merge(m,coaches,by.x="d",by.y="CoachID",all=TRUE)
#   d  s CoachName NumPlayers
# 1 1  2       Bob          2
# 2 1  6       Bob          2
# 3 2  3       Sue          3
# 4 2  4       Sue          3
# 5 2  1       Sue          3
# 6 3 NA      John          0

merge(m,players,by.x="s",by.y="PlayerID",all=TRUE)
#   s  d PlayerName
# 1 1  2        Abe
# 2 2  1       Bart
# 3 3  2       Chad
# 4 4  2     Dalton
# 5 5 NA       Egor
# 6 6  1      Frank

library(dplyr)

set.seed(1234)

coach_needs <- coaches %>%
  group_by( CoachID ) %>%
  do( sample_n(., size=.$NumPlayers, replace=TRUE) ) %>%
  select( -CoachID ) %>% ungroup()

player_needs <- players %>%
  sample_n( size = nrow(coach_needs))

result <- cbind(coach_needs, player_needs)

result

   CoachID CoachName NumPlayers PlayerID PlayerName
1:       1       Bob          2        4     Dalton
2:       1       Bob          2        1        Abe
3:       2       Sue          3        5       Egor
4:       2       Sue          3        2       Bart
5:       2       Sue          3        3       Chad

result <- cbind(coach_needs, player_needs) %>%
  rbind( coaches %>% filter(NumPlayers == 0), fill=TRUE )

result

   CoachID CoachName NumPlayers PlayerID PlayerName
1:       1       Bob          2        4     Dalton
2:       1       Bob          2        1        Abe
3:       2       Sue          3        5       Egor
4:       2       Sue          3        2       Bart
5:       2       Sue          3        3       Chad
6:       3      John          0       NA         NA