R 如何从数据帧的上一个样本创建样本_R

R 如何从数据帧的上一个样本创建样本

R 如何从数据帧的上一个样本创建样本,r,R,我创建了一个包含两个变量的数据框：一个包含字符（团队）和一个数字。我想做一个完整的随机抽样，选择两个团队，然后在两个团队中选择另一个样本，只得到一个。最后，我想在前两个当选团队无法复制的情况下重复这一点。我已经用这个代码试过了。然而，当涉及到第二个样本时，选举不是来自两个当选的团队，而是来自另外两个团队 teams <- c('madrid','barcelona','psg','mancunited','mancity','juve') mean <- c(14, 14.5,

我创建了一个包含两个变量的数据框：一个包含字符（团队）和一个数字。我想做一个完整的随机抽样，选择两个团队，然后在两个团队中选择另一个样本，只得到一个。最后，我想在前两个当选团队无法复制的情况下重复这一点。我已经用这个代码试过了。然而，当涉及到第二个样本时，选举不是来自两个当选的团队，而是来自另外两个团队

teams <- c('madrid','barcelona','psg','mancunited','mancity','juve')
mean  <- c(14, 14.5, 13, 10, 13.4, 13.7)
df    <- data.frame(teams, stats)
x     <- 1:nrow(df)


a1 <- df[sample((x),2),]
y  <- sample(c(a1[1,1], a1[2,1]), 1, 
           prob = c((a1[1,2]/(a1[1,2]+a1[2,2])), (a1[2,2]/(a1[1,2]+a1[2,2]))))
A1 <- df[y,]
A1

df <- df[!(df$teams==a1[1,1] | df$teams==a1[2,1]),]
x  <- 1:nrow(df)

b1 <- df[sample((x),2),]
B1 <- df[sample(c(b1[1,1], b1[2,1]), 1,  
                prob = c((b1[1,2]/(b1[1,2]+b1[2,2])), (b1[2,2]/(b1[1,2]+b1[2,2])))),]
B1

团队您可以使用：
#Choose two teams
random_2_x <- sample(x, 2)
#Chose one out of the above two
random_2_1_x <- sample(random_2_x, 1)

#Chose two from the one not in random_2_x
random_2_y <- sample(x[-random_2_x], 2)
#Chose one out of the above two
random_2_y_1 <- sample(random_2_y, 1)

数据
df<- data.frame(teams, mean)

df如果您想使用您的stats
列来加权第二次抽签时的选择概率（从已经选择的2支球队中选择1支球队），您可以使用以下函数。sample
的prob
参数可以是概率权重的向量。因此，您不需要手动计算实际比例-只需提供stats
列，R将执行您想要的操作
game <- function(df){
  x     <- 1:nrow(df)
  a1 <-  df[sample((x),2),]
  y1  <- sample(a1$teams, 1, prob = a1$stats)

  df2 <- df[!(df$teams %in% a1$teams),]
  x  <- 1:nrow(df2)
  b1 <- df2[sample(x,2),]
  y2  <- sample(b1$teams, 1, prob = b1$stats)

  c(y1, y2)
}

game <- function(df){
  x     <- 1:nrow(df)
  a1 <-  df[sample((x),2),]
  y1  <- sample(a1$teams, 1, prob = a1$stats)

  df2 <- df[!(df$teams %in% a1$teams),]
  x  <- 1:nrow(df2)
  b1 <- df2[sample(x,2),]
  y2  <- sample(b1$teams, 1, prob = b1$stats)

  c(y1, y2)
}

teams <- c('madrid','barcelona','psg','mancunited','mancity','juve')
stats  <- c(14, 14.5, 13, 10, 13.4, 13.7)
df    <- data.frame(teams, stats) # R 4.0.0 no need to convert strings to factors.

games <- t(replicate(10000, game(df)))

head(games)
#      [,1]        [,2]        
# [1,] "barcelona" "mancity"   
# [2,] "madrid"    "mancunited"
# [3,] "madrid"    "psg"       
# [4,] "juve"      "psg"       
# [5,] "mancity"   "barcelona" 
# [6,] "mancity"   "juve"

sort(prop.table(table(games[,1])), decr = TRUE) # phase 1

# barcelona     madrid        psg       juve    mancity mancunited 
#    0.1797     0.1787     0.1687     0.1677     0.1663     0.1389

sort(prop.table(table(games[,1])), decr = TRUE) # phase 2

# madrid  barcelona       juve    mancity        psg mancunited 
# 0.1826     0.1755     0.1691     0.1670     0.1663     0.1395