R 计算满足特定条件的唯一组合
问题:R 计算满足特定条件的唯一组合,r,group-by,count,unique,combinations,R,Group By,Count,Unique,Combinations,问题: TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B") PLAYER &
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM UNIQUE
A n
B n
我想使用以下数据统计每个团队满足以下标准的独特5人组合的数量
数据:
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM UNIQUE
A n
B n
编辑:LP
列与输出无关。这并不像我在原始帖子中希望的那样清楚
标准:
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM UNIQUE
A n
B n
PLAYER
(一名球员将始终被排除在外,因为每个队有六名球员可用)POS
只能使用一次,但的除外,该位置最多可以使用的三次。请考虑几个争论步骤:
将新列指定为PLAYER
和POS
的串联
运行by
以按团队拆分数据帧,并对拆分运行操作(规则3)
在PLAYER\u POS
上运行combn
,选择5个列表
运行ave
查看类似播放器的运行计数
运行Filter
以保留5行数据帧、5个唯一的播放器,并遵守位置标准(规则1和规则2)
基R码
# HELPER COLUMN
df$PLAYER_POS <- with(df, paste(PLAYER, POS, sep="_"))
# BUILD LIST OF DFs BY TEAM
df_list <- by(df, df$TEAM, function(sub){
combn(sub$PLAYER_POS, 5, FUN = function(p)
transform(subset(sub, PLAYER_POS %in% p),
PLAYER_NUM = ave(LP, PLAYER, FUN=seq_along)),
simplify = FALSE)
})
# FILTER LIST OF DFs BY TEAM
df_list <- lapply(df_list, function(dfs)
Filter(function(df)
nrow(df) == 5 &
max(df$PLAYER_NUM)==1 &
length(df$POS[df$POS == "OF"]) <= 3 &
length(df$POS[df$POS != "OF"]) == length(unique(df$POS[df$POS != "OF"])),
dfs)
)
# COUNT REMAINING DFs BY TEAM FOR UNIQUE n
lengths(df_list)
# A B
# 18 20
data.frame(TEAMS=names(df_list), UNIQUE=lengths(df_list), row.names=NULL)
# TEAMS UNIQUE
# 1 A 18
# 2 B 20
更混乱的解决方案,
我整个上午都在做这件事,刚刚找到了我的解决方案(只看到有一个更优雅的方案贴出来。但无论如何,我把这件事提供给你,让你分享我的思考过程,我是如何找到解决方案的
library(tidyverse)
TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
rm(TEAM, PLAYER, LP, POS)
# Each team has 6 players and I want to find the groups of 5 that are possible.
posible_player_combinations <- combn(1:6, 5) %>% as_tibble()
team = "A"
make_2nd_column <- function(first_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[2])
col2_filter <- tibble(
col1LP = rep(first_stage$LP, each = nrow(mydf)),
col1POS = rep(first_stage$POS, each = nrow(mydf)))
helper <- tibble(
col2LP = rep(mydf$LP, nrow(first_stage)),
col2POS = rep(mydf$POS, nrow(first_stage))
)
col2_filter <- cbind(col2_filter, helper)
second_stage <- col2_filter %>% filter(col1POS != col2POS)
return(second_stage)
}
make_3rd_column <- function(second_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[3])
col3_filter <- tibble(
col1LP = rep(second_stage$col1LP, each = nrow(mydf)),
col1POS = rep(second_stage$col1POS, each = nrow(mydf)),
col2LP = rep(second_stage$col2LP, each = nrow(mydf)),
col2POS = rep(second_stage$col2POS, each = nrow(mydf)))
helper <- tibble(
col3LP = rep(mydf$LP, nrow(second_stage)),
col3POS = rep(mydf$POS, nrow(second_stage))
)
col3_filter <- cbind(col3_filter, helper)
third_stage <- col3_filter %>% filter(col1POS != col2POS,
col2POS != col3POS,
col3POS != col1POS)
return(third_stage)
}
make_4th_column <- function(third_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[4])
col4_filter <- tibble(
col1LP = rep(third_stage$col1LP, each = nrow(mydf)),
col1POS = rep(third_stage$col1POS, each = nrow(mydf)),
col2LP = rep(third_stage$col2LP, each = nrow(mydf)),
col2POS = rep(third_stage$col2POS, each = nrow(mydf)),
col3LP = rep(third_stage$col3LP, each = nrow(mydf)),
col3POS = rep(third_stage$col3POS, each = nrow(mydf)))
helper <- tibble(
col4LP = rep(mydf$LP, nrow(third_stage)),
col4POS = rep(mydf$POS, nrow(third_stage))
)
col4_filter <- cbind(col4_filter, helper)
fourth_stage <- col4_filter %>% filter(col1POS != col2POS,
col1POS != col3POS,
col1POS != col4POS,
col2POS != col3POS,
col2POS != col4POS,
col3POS != col4POS)
return(fourth_stage)
}
make_5th_column <- function(fourth_stage, mydata_byteam, pcomp){
mydf <- mydata_byteam %>% filter(LP == pcomp[5])
col5_filter <- tibble(
col1LP = rep(fourth_stage$col1LP, each = nrow(mydf)),
col1POS = rep(fourth_stage$col1POS, each = nrow(mydf)),
col2LP = rep(fourth_stage$col2LP, each = nrow(mydf)),
col2POS = rep(fourth_stage$col2POS, each = nrow(mydf)),
col3LP = rep(fourth_stage$col3LP, each = nrow(mydf)),
col3POS = rep(fourth_stage$col3POS, each = nrow(mydf)),
col4LP = rep(fourth_stage$col4LP, each = nrow(mydf)),
col4POS = rep(fourth_stage$col4POS, each = nrow(mydf)))
helper <- tibble(
col5LP = rep(mydf$LP, nrow(fourth_stage)),
col5POS = rep(mydf$POS, nrow(fourth_stage))
)
col5_filter <- cbind(col5_filter, helper)
final_stage_prefilter <- col5_filter %>% filter(
col1POS != col2POS,
col1POS != col3POS,
col1POS != col4POS,
col1POS != col5POS,
col2POS != col3POS,
col2POS != col4POS,
col2POS != col5POS,
col3POS != col4POS,
col3POS != col5POS,
col4POS != col5POS)
return(final_stage_prefilter)
}
make_final <- function(final_stage_prefilter){
final_stage_prefilter %>% mutate(
Player1 = paste(col1LP, str_remove_all(col1POS, "-.*")),
Player2 = paste(col2LP, str_remove_all(col2POS, "-.*")),
Player3 = paste(col3LP, str_remove_all(col3POS, "-.*")),
Player4 = paste(col4LP, str_remove_all(col4POS, "-.*")),
Player5 = paste(col5LP, str_remove_all(col5POS, "-.*"))
) %>% select(
11:15
) %>% distinct()
}
make_teams <- function(posible_player_combinations, mydata, k){
pcomp <- posible_player_combinations[k] %>% as_vector() %>% unname()
mydata_byteam <- mydata %>% filter(LP %in% pcomp)
first_stage <- mydata_byteam %>% filter(LP == pcomp[1])
second_stage <- make_2nd_column(first_stage, mydata_byteam, pcomp)
third_stage <- make_3rd_column(second_stage, mydata_byteam, pcomp)
fourth_stage <- make_4th_column(third_stage, mydata_byteam, pcomp)
final_stage_prefilter <- make_5th_column(fourth_stage, mydata_byteam, pcomp)
final_stage <- make_final(final_stage_prefilter)
return(final_stage)
}
make_all_combinations <- function(df, team, posible_player_combinations) {
mydata <- df %>% filter(TEAM == team) %>% select(LP, POS)
of_p <- mydata %>% filter(POS == "OF") %>% select(LP) %>% as_vector()
# I want to treat 3 possible "OF"s as separate positions
# so that that a later restirction on POS can occur.
# Later I will need to filter out non-unique results
# by separating the strings with "-" and dropping the letter.
of_df <- bind_rows(lapply(
seq_along(of_p),
function(x, k){
of_df <- tibble(
LP = rep(of_p[k], 3),
POS = c("OF-a", "OF-b", "OF-c")
)
},
x = of_p
))
mydata <- rbind(mydata %>% filter(POS != "OF"), of_df)
all_combinations <- bind_rows(lapply(
X = seq_along(posible_player_combinations),
FUN = make_teams,
posible_player_combinations = posible_player_combinations,
mydata = mydata
))
}
mydata_a <- make_all_combinations(df, "A", posible_player_combinations)
mydata_b <- make_all_combinations(df, "B", posible_player_combinations)
tail(mydata_a)
tail(mydata_b)
# > tail(mydata_a)
# Player1 Player2 Player3 Player4 Player5
# 13 1 3B 2 OF 4 OF 5 C 6 OF
# 14 1 OF 2 1B 4 OF 5 C 6 OF
# 15 1 3B 3 SS 4 OF 5 C 6 OF
# 16 1 OF 3 SS 4 OF 5 C 6 OF
# 17 2 1B 3 SS 4 OF 5 C 6 OF
# 18 2 OF 3 SS 4 OF 5 C 6 OF
# > tail(mydata_b)
# Player1 Player2 Player3 Player4 Player5
# 15 1 2B 3 C 4 3B 5 1B 6 SS
# 16 1 2B 3 C 4 3B 5 OF 6 SS
# 17 1 OF 3 C 4 3B 5 1B 6 SS
# 18 1 OF 3 C 4 3B 5 OF 6 SS
# 19 2 OF 3 C 4 3B 5 1B 6 SS
# 20 2 OF 3 C 4 3B 5 OF 6 SS
库(tidyverse)
团队所需结果中的n
您需要df%>%unite(LPPOS,LP,POS)%%>%groupby(TEAM)%%>%summary(UNIQUE=nrow(RcppAlgos::comboGeneral(LPPOS,n()-1))
n
是可以使用所述标准创建的唯一5人/位置组合的总数(以示例的格式)。团队中除了的其他POS是否可以复制?@CPak,我不理解你的问题。你所说的有效是什么意思?你所说的组是什么意思?不过,我知道你已经在回答中显示了每个团队的n