Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/mongodb/13.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 计算满足特定条件的唯一组合_R_Group By_Count_Unique_Combinations - Fatal编程技术网

R 计算满足特定条件的唯一组合

R 计算满足特定条件的唯一组合,r,group-by,count,unique,combinations,R,Group By,Count,Unique,Combinations,问题: TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B") PLAYER &

问题:

TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM  UNIQUE
A     n
B     n
我想使用以下数据统计每个团队满足以下标准的独特5人组合的数量

数据:

TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM  UNIQUE
A     n
B     n
编辑:
LP
列与输出无关。这并不像我在原始帖子中希望的那样清楚

标准:

TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)
TEAM  UNIQUE
A     n
B     n
  • 必须使用五名独特的球员
    PLAYER
    (一名球员将始终被排除在外,因为每个队有六名球员可用)

  • 每个位置
    POS
    只能使用一次,但
    除外,该位置最多可以使用的三次
    。请考虑几个争论步骤:

  • 将新列指定为
    PLAYER
    POS
    的串联
  • 运行
    by
    以按团队拆分数据帧,并对拆分运行操作(规则3)
  • PLAYER\u POS
    上运行
    combn
    ,选择5个列表
  • 运行
    ave
    查看类似
    播放器的运行计数
  • 运行
    Filter
    以保留5行数据帧、5个唯一的播放器,并遵守位置标准(规则1和规则2)
  • 基R码

    # HELPER COLUMN
    df$PLAYER_POS <- with(df, paste(PLAYER, POS, sep="_"))
    
    # BUILD LIST OF DFs BY TEAM
    df_list <- by(df, df$TEAM, function(sub){
      combn(sub$PLAYER_POS, 5, FUN = function(p) 
        transform(subset(sub, PLAYER_POS %in% p),
                  PLAYER_NUM = ave(LP, PLAYER, FUN=seq_along)), 
        simplify = FALSE)
    })
      
    # FILTER LIST OF DFs BY TEAM
    df_list <- lapply(df_list, function(dfs) 
      Filter(function(df) 
               nrow(df) == 5 & 
               max(df$PLAYER_NUM)==1 &
               length(df$POS[df$POS == "OF"]) <= 3 &
               length(df$POS[df$POS != "OF"]) == length(unique(df$POS[df$POS != "OF"])), 
             dfs)
    )
    
    # COUNT REMAINING DFs BY TEAM FOR UNIQUE n
    lengths(df_list)
    #  A  B 
    # 18 20 
    
    data.frame(TEAMS=names(df_list), UNIQUE=lengths(df_list), row.names=NULL)
    #   TEAMS UNIQUE
    # 1     A     18
    # 2     B     20
    
    更混乱的解决方案, 我整个上午都在做这件事,刚刚找到了我的解决方案(只看到有一个更优雅的方案贴出来。但无论如何,我把这件事提供给你,让你分享我的思考过程,我是如何找到解决方案的

            library(tidyverse)
        
        TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
        PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
        LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
        POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
        df <- data.frame(TEAM,PLAYER,LP,POS)
        rm(TEAM, PLAYER, LP, POS)
        
        # Each team has 6 players and I want to find the groups of 5 that are possible.
        posible_player_combinations <- combn(1:6, 5) %>% as_tibble() 
        team = "A"
        
        make_2nd_column <- function(first_stage, mydata_byteam, pcomp){
          mydf <- mydata_byteam %>% filter(LP == pcomp[2])
          col2_filter <- tibble(
            col1LP =  rep(first_stage$LP, each = nrow(mydf)),
            col1POS = rep(first_stage$POS, each = nrow(mydf)))
          helper <- tibble(
            col2LP = rep(mydf$LP, nrow(first_stage)),
            col2POS = rep(mydf$POS, nrow(first_stage))
          )
          col2_filter <- cbind(col2_filter, helper)
          second_stage <- col2_filter %>% filter(col1POS != col2POS)
          return(second_stage)
        }
        make_3rd_column <- function(second_stage, mydata_byteam, pcomp){
          mydf <- mydata_byteam %>% filter(LP == pcomp[3])
          col3_filter <- tibble(
            col1LP =  rep(second_stage$col1LP, each = nrow(mydf)),
            col1POS = rep(second_stage$col1POS, each = nrow(mydf)),
            col2LP =  rep(second_stage$col2LP, each = nrow(mydf)),
            col2POS = rep(second_stage$col2POS, each = nrow(mydf)))
          helper <- tibble(
            col3LP = rep(mydf$LP, nrow(second_stage)),
            col3POS = rep(mydf$POS, nrow(second_stage))
          )
          col3_filter <- cbind(col3_filter, helper)
          third_stage <- col3_filter %>% filter(col1POS != col2POS,
                                                col2POS != col3POS,
                                                col3POS != col1POS)
          return(third_stage)
        }
        make_4th_column <- function(third_stage, mydata_byteam, pcomp){
          mydf <- mydata_byteam %>% filter(LP == pcomp[4])
          col4_filter <- tibble(
            col1LP =  rep(third_stage$col1LP, each = nrow(mydf)),
            col1POS = rep(third_stage$col1POS, each = nrow(mydf)),
            col2LP =  rep(third_stage$col2LP, each = nrow(mydf)),
            col2POS = rep(third_stage$col2POS, each = nrow(mydf)),
            col3LP =  rep(third_stage$col3LP, each = nrow(mydf)),
            col3POS = rep(third_stage$col3POS, each = nrow(mydf)))
          helper <- tibble(
            col4LP = rep(mydf$LP, nrow(third_stage)),
            col4POS = rep(mydf$POS, nrow(third_stage))
          )
          col4_filter <- cbind(col4_filter, helper)
          fourth_stage <- col4_filter %>% filter(col1POS != col2POS,
                                                 col1POS != col3POS,
                                                 col1POS != col4POS,
                                                 col2POS != col3POS,
                                                 col2POS != col4POS,
                                                 col3POS != col4POS)
          return(fourth_stage)
        }
        make_5th_column <- function(fourth_stage, mydata_byteam, pcomp){
          mydf <- mydata_byteam %>% filter(LP == pcomp[5])
          col5_filter <- tibble(
            col1LP =  rep(fourth_stage$col1LP, each = nrow(mydf)),
            col1POS = rep(fourth_stage$col1POS, each = nrow(mydf)),
            col2LP =  rep(fourth_stage$col2LP, each = nrow(mydf)),
            col2POS = rep(fourth_stage$col2POS, each = nrow(mydf)),
            col3LP =  rep(fourth_stage$col3LP, each = nrow(mydf)),
            col3POS = rep(fourth_stage$col3POS, each = nrow(mydf)),
            col4LP =  rep(fourth_stage$col4LP, each = nrow(mydf)),
            col4POS = rep(fourth_stage$col4POS, each = nrow(mydf)))
          helper <- tibble(
            col5LP = rep(mydf$LP, nrow(fourth_stage)),
            col5POS = rep(mydf$POS, nrow(fourth_stage))
          )
          col5_filter <- cbind(col5_filter, helper)
          final_stage_prefilter <- col5_filter %>% filter(
            col1POS != col2POS,
            col1POS != col3POS,
            col1POS != col4POS,
            col1POS != col5POS,
            col2POS != col3POS,
            col2POS != col4POS,
            col2POS != col5POS,
            col3POS != col4POS,
            col3POS != col5POS,
            col4POS != col5POS)
          return(final_stage_prefilter)
        }
        make_final <- function(final_stage_prefilter){
          final_stage_prefilter %>% mutate(
            Player1 = paste(col1LP, str_remove_all(col1POS, "-.*")),
            Player2 = paste(col2LP, str_remove_all(col2POS, "-.*")),
            Player3 = paste(col3LP, str_remove_all(col3POS, "-.*")),
            Player4 = paste(col4LP, str_remove_all(col4POS, "-.*")),
            Player5 = paste(col5LP, str_remove_all(col5POS, "-.*"))
          ) %>% select(
            11:15
          ) %>% distinct()
        }
        
        make_teams <- function(posible_player_combinations, mydata, k){
          pcomp  <- posible_player_combinations[k] %>% as_vector() %>% unname()
          mydata_byteam <- mydata %>% filter(LP %in% pcomp)
          first_stage            <- mydata_byteam %>% filter(LP == pcomp[1])
          second_stage           <- make_2nd_column(first_stage, mydata_byteam, pcomp)
          third_stage            <- make_3rd_column(second_stage, mydata_byteam, pcomp)
          fourth_stage           <- make_4th_column(third_stage, mydata_byteam, pcomp)
          final_stage_prefilter  <- make_5th_column(fourth_stage, mydata_byteam, pcomp)
          final_stage            <- make_final(final_stage_prefilter)
          return(final_stage)
        }
        
        
        make_all_combinations <- function(df, team, posible_player_combinations) {
          mydata <- df %>% filter(TEAM == team) %>% select(LP, POS)
          of_p <- mydata %>% filter(POS == "OF") %>% select(LP) %>% as_vector()
          # I want to treat 3 possible "OF"s as separate positions
          # so that that a later restirction on POS can occur.
          # Later I will need to filter out non-unique results
          # by separating the strings with "-" and dropping the letter.
          of_df <- bind_rows(lapply(
            seq_along(of_p),
            function(x, k){
              of_df <- tibble(
                LP = rep(of_p[k], 3),
                POS = c("OF-a", "OF-b", "OF-c")
              )
            },
            x = of_p
          ))
          mydata <- rbind(mydata %>% filter(POS != "OF"), of_df)
          all_combinations <- bind_rows(lapply(
            X = seq_along(posible_player_combinations),
            FUN = make_teams,
            posible_player_combinations = posible_player_combinations,
            mydata = mydata
          ))
        }
    mydata_a <- make_all_combinations(df, "A", posible_player_combinations)
    mydata_b <- make_all_combinations(df, "B", posible_player_combinations)
    
    tail(mydata_a)
    tail(mydata_b)
    
    # > tail(mydata_a)
    #      Player1 Player2 Player3 Player4 Player5
    # 13    1 3B    2 OF    4 OF     5 C    6 OF
    # 14    1 OF    2 1B    4 OF     5 C    6 OF
    # 15    1 3B    3 SS    4 OF     5 C    6 OF
    # 16    1 OF    3 SS    4 OF     5 C    6 OF
    # 17    2 1B    3 SS    4 OF     5 C    6 OF
    # 18    2 OF    3 SS    4 OF     5 C    6 OF
    # > tail(mydata_b)
    #      Player1 Player2 Player3 Player4 Player5
    # 15    1 2B     3 C    4 3B    5 1B    6 SS
    # 16    1 2B     3 C    4 3B    5 OF    6 SS
    # 17    1 OF     3 C    4 3B    5 1B    6 SS
    # 18    1 OF     3 C    4 3B    5 OF    6 SS
    # 19    2 OF     3 C    4 3B    5 1B    6 SS
    # 20    2 OF     3 C    4 3B    5 OF    6 SS
    
    库(tidyverse)
    
    团队所需结果中的
    n
    您需要
    df%>%unite(LPPOS,LP,POS)%%>%groupby(TEAM)%%>%summary(UNIQUE=nrow(RcppAlgos::comboGeneral(LPPOS,n()-1))
    n
    是可以使用所述标准创建的唯一5人/位置组合的总数(以示例的格式)。团队中除了的其他POS是否可以复制?@CPak,我不理解你的问题。你所说的有效是什么意思?你所说的组是什么意思?不过,我知道你已经在回答中显示了每个
    团队
    n