在R中,如果一个新列是有条件的,如何使用多个列来表示一列的值?

在R中,如果一个新列是有条件的,如何使用多个列来表示一列的值?,r,dplyr,R,Dplyr,看着类似的问题,我找不到一个符合我需要的。 如果其中包含解决方案,请共享其链接 我有此dput生成的数据: structure(list(Player = c("Seth Lugo", "Jacob deGrom", "Rick Porcello", "David Peterson", "Michael Wacha", "Seth Lugo", "Jacob deG

看着类似的问题,我找不到一个符合我需要的。 如果其中包含解决方案,请共享其链接

我有此dput生成的数据:

structure(list(Player = c("Seth Lugo", "Jacob deGrom", "Rick Porcello", 
"David Peterson", "Michael Wacha", "Seth Lugo", "Jacob deGrom", 
"Rick Porcello", "David Peterson", "Steven Matz", "Seth Lugo", 
"Jacob deGrom", "Rick Porcello", "David Peterson", "Seth Lugo", 
"Jacob deGrom", "Rick Porcello", "Michael Wacha", "David Peterson", 
"Jacob deGrom", "Seth Lugo", "Rick Porcello", "Robert Gsellman", 
"Michael Wacha", "Ariel Jurado", "Jacob deGrom", "Rick Porcello", 
"Seth Lugo", "Robert Gsellman", "David Peterson"), Date = structure(c(1601164800, 
1601078400, 1601078400, 1600905600, 1600819200, 1600732800, 1600646400, 
1600560000, 1600473600, 1600387200, 1600300800, 1600214400, 1600128000, 
1599955200, 1599868800, 1599782400, 1599609600, 1599523200, 1599436800, 
1599350400, 1599264000, 1599177600, 1599091200, 1599004800, 1598918400, 
1598832000, 1598745600, 1598745600, 1598659200, 1598572800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), DblHdr = c(0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 2), DateStr = c("09/27/2020", 
"09/26/2020", "09/26/2020", "09/24/2020", "09/23/2020", "09/22/2020", 
"09/21/2020", "09/20/2020", "09/19/2020", "09/18/2020", "09/17/2020", 
"09/16/2020", "09/15/2020", "09/13/2020", "09/12/2020", "09/11/2020", 
"09/09/2020", "09/08/2020", "09/07/2020", "09/06/2020", "09/05/2020", 
"09/04/2020", "09/03/2020", "09/02/2020", "09/01/2020", "08/31/2020", 
"08/30/2020", "08/30/2020", "08/29/2020", "08/28/2020"), Month = c("09", 
"09", "09", "09", "09", "09", "09", "09", "09", "09", "09", "09", 
"09", "09", "09", "09", "09", "09", "09", "09", "09", "09", "09", 
"09", "09", "08", "08", "08", "08", "08"), Tm = c("NYM", "NYM", 
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", 
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", 
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", 
"NYM"), Opp = c("WSN", "WSN", "WSN", "WSN", "TBR", "TBR", "TBR", 
"ATL", "ATL", "ATL", "PHI", "PHI", "PHI", "TOR", "TOR", "TOR", 
"BAL", "BAL", "PHI", "PHI", "PHI", "PHI", "NYY", "BAL", "BAL", 
"MIA", "NYY", "NYY", "NYY", "NYY"), Rslt = c("L 5-15", "L 3-4", 
"L 3-5", "W 3-2", "L 5-8", "W 5-2", "L 1-2", "L 0-7", "W 7-2", 
"L 2-15", "W 10-6", "W 5-4", "L 1-4", "L 3-7", "L 2-3", "W 18-1", 
"W 7-6", "L 2-11", "L 8-9", "W 14-1", "W 5-1", "L 3-5", "W 9-7", 
"W 9-4", "L 5-9", "L 3-5", "L 7-8", "L 2-5", "L 1-2", "W 4-3"
), W_L = c("L", "L", "L", "W", "L", "W", "L", "L", "W", "L", 
"W", "W", "L", "L", "L", "W", "W", "L", "L", "W", "W", "L", "W", 
"W", "L", "L", "L", "L", "L", "W"), temp = c("L 5", "L 3", "L 3", 
"W 3", "L 5", "W 5", "L 1", "L 0", "W 7", "L 2", "W 10", "W 5", 
"L 1", "L 3", "L 2", "W 18", "W 7", "L 2", "L 8", "W 14", "W 5", 
"L 3", "W 9", "W 9", "L 5", "L 3", "L 7", "L 2", "L 1", "W 4"
), RS = c(5, 3, 3, 3, 5, 5, 1, 0, 7, 2, 10, 5, 1, 3, 2, 18, 7, 
2, 8, 14, 5, 3, 9, 9, 5, 3, 7, 2, 1, 4), RA = c(15, 4, 5, 2, 
8, 2, 2, 7, 2, 15, 6, 4, 4, 7, 3, 1, 6, 11, 9, 1, 1, 5, 7, 4, 
9, 5, 8, 5, 2, 3), Rdiff = c(-10, -1, -2, 1, -3, 3, -1, -7, 5, 
-13, 4, 1, -3, -4, -1, 17, 1, -9, -1, 13, 4, -2, 2, 5, -4, -2, 
-1, -3, -1, 1), absV = c(10, 1, 2, 1, 3, 3, 1, 7, 5, 13, 4, 1, 
3, 4, 1, 17, 1, 9, 1, 13, 4, 2, 2, 5, 4, 2, 1, 3, 1, 1), App_Dec = c("GS-2, L", 
"GS-5", "GS-3, L", "GS-7, W", "GS-6, L", "GS-7, W", "GS-7, L", 
"GS-7, L", "GS-6, W", "GS-3, L", "GS-2", "GS-2", "GS-6, L", "GS-5, L", 
"GS-6, L", "GS-6, W", "GS-4", "GS-4, L", "GS-2", "GS-7, W", "GS-5, W", 
"GS-6", "GS-2", "GS-3", "GS-4", "GS-6, L", "GS-5", "GS-4", "GS-4", 
"GS-4"), IP = c(1.1, 5, 3, 7, 6, 6.1, 7, 7, 6, 2.2, 1.2, 2, 6, 
5, 5.1, 6, 4, 4, 2, 7, 5, 6, 1.2, 3, 4, 6, 5, 3.2, 4, 4), H = c(5, 
5, 8, 4, 6, 4, 4, 3, 3, 8, 8, 4, 6, 3, 7, 3, 10, 7, 3, 3, 4, 
3, 4, 4, 9, 6, 4, 4, 4, 4), R = c(6, 3, 5, 1, 4, 2, 2, 1, 1, 
6, 6, 3, 4, 2, 3, 1, 5, 5, 5, 1, 1, 2, 4, 2, 5, 4, 2, 1, 1, 3
), ER = c(6, 3, 3, 1, 4, 1, 2, 1, 1, 6, 6, 3, 4, 2, 3, 1, 5, 
4, 5, 1, 1, 2, 4, 2, 5, 1, 2, 1, 1, 3), BB = c(2, 2, 1, 1, 0, 
1, 2, 2, 4, 3, 0, 1, 2, 2, 1, 2, 0, 0, 4, 2, 2, 2, 4, 1, 0, 2, 
2, 2, 0, 3), SO = c(1, 10, 3, 4, 4, 7, 14, 10, 10, 5, 3, 1, 5, 
2, 5, 9, 3, 3, 3, 12, 8, 6, 0, 2, 2, 9, 2, 7, 4, 3), HR = c(0, 
2, 1, 0, 2, 1, 1, 1, 1, 2, 4, 0, 1, 1, 0, 0, 0, 2, 1, 1, 1, 0, 
0, 0, 1, 1, 0, 1, 1, 0), UER = c(0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0), 
    Pit = c(38, 113, 67, 107, 66, 95, 112, 100, 102, 76, 52, 
    40, 94, 81, 91, 102, 66, 71, 70, 108, 81, 100, 52, 69, 84, 
    103, 86, 60, 57, 70), Str = c(24, 78, 42, 68, 45, 66, 70, 
    70, 62, 45, 30, 25, 66, 52, 60, 68, 45, 49, 37, 74, 50, 65, 
    22, 41, 53, 72, 55, 39, 33, 37), GSc = c(19, 53, 29, 68, 
    48, 65, 73, 75, 68, 20, 18, 36, 47, 53, 46, 69, 25, 33, 29, 
    77, 61, 62, 27, 44, 26, 57, 51, 54, 54, 42), BF = c(12, 22, 
    19, 26, 23, 24, 26, 26, 24, 18, 14, 11, 26, 20, 24, 23, 21, 
    20, 14, 26, 21, 23, 13, 15, 21, 27, 20, 16, 15, 18), AB = c(8, 
    20, 18, 24, 23, 23, 23, 23, 20, 15, 13, 9, 24, 18, 22, 21, 
    21, 20, 9, 24, 19, 21, 8, 13, 20, 25, 18, 14, 15, 15), H2B = c(2, 
    0, 1, 1, 1, 0, 2, 0, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1, 1, 1, 
    0, 0, 1, 0, 2, 2, 2, 0, 1, 0), H3B = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 
    0, 0, 0, 1, 0), IBB = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), 
    HBP = c(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), SH = c(0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 1, 0, 0, 0, 0, 0), SF = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
    0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    0), GDP = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1), SB = c(0, 1, 
    1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 0, 
    1, 0, 0, 0, 3, 0, 0, 0, 0), CS = c(0, 0, 0, 0, 1, 0, 1, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), PO = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BK = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), WP = c(0, 1, 1, 1, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
    0, 1, 0, 0), ERA = c("40.5", "5.4", "9", "1.29", "6", "1.42", 
    "2.57", "1.29", "1.5", "20.25", "32.4", "13.5", "6", "3.6", 
    "5.0599999999999996", "1.5", "11.25", "9", "22.5", "1.29", 
    "1.8", "3", "21.6", "6", "11.25", "1.5", "3.6", "2.4500000000000002", 
    "2.25", "6.75"), WPA = c(-0.471, -0.087, -0.256, 0.34, -0.22, 
    0.18, 0.107, 0.219, 0.229, -0.358, -0.487, -0.186, -0.156, 
    0.036, -0.047, 0.049, -0.329, -0.321, -0.34, 0.193, 0.156, 
    0.07, -0.312, -0.042, -0.278, -0.271, 0.029, 0.02, 0.092, 
    -0.174), RE24 = c(-5.122, -0.193, -3.316, 2.931, -1.08, 1.509, 
    1.406, 2.406, 1.92, -4.641, -5.444, -1.919, -0.758, 0.679, 
    0.245, 2.215, -3.054, -3.054, -4.027, 2.406, 1.433, 0.92, 
    -3.788, -0.359, -2.812, -1.08, 0.707, 0.364, 1.166, -0.834
    ), aLI = c(1.45, 1.244, 0.974, 1.271, 0.965, 0.921, 0.955, 
    0.888, 1.066, 0.962, 0.767, 1.073, 0.941, 0.852, 1.353, 0.392, 
    0.857, 0.805, 0.904, 0.75, 1.037, 0.861, 1.232, 1.355, 0.914, 
    1.239, 1.213, 1.28, 0.748, 1.407)), row.names = c(NA, -30L
), class = c("tbl_df", "tbl", "data.frame"))
期望输出:

从第二列开始的数字是每列中每个玩家的总absV值。最后一列包含absV>5的每个玩家的所有absV值之和。只显示前3行中的一个示例,而absV值只是填充编号

| Player | 1 | 2 | 3 | 4 | 5 | >5 |
| deGrom | 2 | 3 | 5 | 0 | 1 | 3 |
| Matz | 2 | 3 | 5 | 0 | 1 | 3 |
代码已尝试,我需要帮助以超越显示的点。如果代码使用dplyr,我更愿意:

starter %>%
  select(Player, absV) %>%
  group_by(Player, absV) %>%
  summarize(numG= n()) %>%
  arrange(Player,absV)

要做到这一点,您需要使用data.table将数据与每名玩家>5行和行分开


我只需要七列。在第七列,即名称大于5的列中,我需要所有包含absV值的列的总数,从6到最高的一列,我认为是44。请参见编辑后的答案以及代码解释。它是否达到了目的?AnilGoyal,这非常有帮助。如果我想让每个玩家只显示每个absV值(如1、2、3等)出现的次数,代码会改变多少?例如,对于Seth Lugo,absV值3出现三次,而对于Jacob deGrom,absV值4出现八次。如果这是另一篇文章的问题,我理解。请参阅编辑后的答案。请按照通常的要求接受并投票,以便将来参考。请参阅前面的评论。阿克隆:我只需要七栏。在第七列中,即名称大于5的列,我需要所有包含absV值的列的总和,从6到最高的一列,我认为是44。
library(dplyr) 
library(tidyr) 

df <- starter %>% group_by(Player) %>% 
  mutate(row = row_number()) %>% 
  select(Player, absV, row) %>% arrange(Player) 

df %>% filter(row <= 5) %>% 
  mutate(row = as.character(row)) %>%
  rbind(df %>% filter(row > 5) %>% 
          summarise( absV = sum(absV))  %>% 
          mutate(row = ">5")) %>% 
  pivot_wider(id_cols = Player, names_from = row, values_from = absV)

# A tibble: 8 x 7
# Groups:   Player [8]
  Player            `1`   `2`   `3`   `4`   `5`  `>5`
  <chr>           <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Ariel Jurado        4    NA    NA    NA    NA    NA
2 David Peterson      1     5     4     1     1    NA
3 Jacob deGrom        1     1     1    17    13     2
4 Michael Wacha       3     9     5    NA    NA    NA
5 Rick Porcello       2     7     3     1     2     1
6 Robert Gsellman     2     1    NA    NA    NA    NA
7 Seth Lugo          10     3     4     1     4     3
8 Steven Matz        13    NA    NA    NA    NA    NA
df <- starter %>% group_by(Player) %>% 
  arrange(Player, absV) %>%
  mutate(row = row_number()) %>% 
  select(Player, absV, row) 

df %>% filter(row <= 5) %>% 
  mutate(row = as.character(row)) %>%
  rbind(df %>% filter(row > 5) %>% 
          summarise( absV = sum(absV))  %>% 
          mutate(row = ">5")) %>% 
  pivot_wider(id_cols = Player, names_from = row, values_from = absV)

#this will give the following diff output

# A tibble: 8 x 7
# Groups:   Player [8]
  Player            `1`   `2`   `3`   `4`   `5`  `>5`
  <chr>           <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Ariel Jurado        4    NA    NA    NA    NA    NA
2 David Peterson      1     1     1     4     5    NA
3 Jacob deGrom        1     1     1     2    13    17
4 Michael Wacha       3     5     9    NA    NA    NA
5 Rick Porcello       1     1     2     2     3     7
6 Robert Gsellman     1     2    NA    NA    NA    NA
7 Seth Lugo           1     3     3     4     4    10
8 Steven Matz        13    NA    NA    NA    NA    NA
df %>% group_by(Player, absV) %>% mutate(freq = n()) %>% ungroup()

#check it
df %>% group_by(Player, absV) %>% mutate(freq = n()) %>% ungroup() %>% select(Player, absV, freq)
   Player          absV  freq
   <chr>          <dbl> <int>
 1 Seth Lugo         10     1
 2 Jacob deGrom       1     3
 3 Rick Porcello      2     2
 4 David Peterson     1     3
 5 Michael Wacha      3     1
 6 Seth Lugo          3     2
 7 Jacob deGrom       1     3
 8 Rick Porcello      7     1
 9 David Peterson     5     1
10 Steven Matz       13     1
# ... with 20 more rows
library(data.table)
dcast(setDT(starter), Player ~ rowid(Player), value.var = 'absV')