Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/ms-access/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 如何在数据帧中仅保留最高的重复值?_R_Dataframe_Dplyr_Tidyverse - Fatal编程技术网

R 如何在数据帧中仅保留最高的重复值?

R 如何在数据帧中仅保留最高的重复值?,r,dataframe,dplyr,tidyverse,R,Dataframe,Dplyr,Tidyverse,我有以下代码: library(tidyverse) astronauts %>% group_by(name, nationality, total_hrs_sum) %>% summarise() name nationality total_hrs_sum <chr> <chr>

我有以下代码:

library(tidyverse)
astronauts %>% 
  group_by(name, nationality, total_hrs_sum) %>% 
  summarise() 
name                                 nationality         total_hrs_sum
<chr>                                <chr>               <dbl>
Acaba, Joseph M.                     U.S.                7272.23        
Acton, Loren Wilbur                  U.S.                190.94     
Adamson, James C.                    U.S.                334.00     
Afanasyev, Viktor Mikhaylovich       U.S.S.R/Russia      13338.55       
Aidyn (Aydyn) Akanovich Aimbetov     Kazakhstan          236.23     
Akers, Thomas D.                     U.S.                814.00     
Akiyama, Toyohiro                    Japan               189.90     
Aksyonov, Vladimir                   U.S.S.R/Russia      284.18     
Al Mansoori, Hazzaa                  UAE                 189.00     
Al-saud, Sultan bin Salman           Saudi Arabia        170.00
structure(list(name = c("Acaba, Joseph M.", "Acton, Loren Wilbur", 
"Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", "Aidyn (Aydyn) Akanovich Aimbetov", 
"Akers, Thomas D.", "Akiyama, Toyohiro", "Aksyonov, Vladimir", 
"Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", "Aldrin, Edwin Eugene, Jr.", 
"Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", "Allen, Andrew M.", 
"Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison", 
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)", 
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.", 
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II", 
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri", 
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena", 
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.", 
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.", 
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick", 
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel", 
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.", 
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan", 
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria", 
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", 
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia", 
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", 
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia", 
"U.S.S.R/Russia", "U.S.S.R/Russia"), total_hrs_sum = c(7272.23, 
190.94, 334, 13338.55, 236.23, 814, 189.9, 284.18, 189, 170, 
289, 47, 7434.03, 904, 314, 1224, 147, 4046, 593, 614.37, 261.525, 
579, 847, 639.5, 206, 188.71, 307, 8784, 3471.35, 377.5, 664, 
5686.82, 190.2, 4722, 17942.23, 338, 686, 965, 4297.28, 5085, 
734, 190.94, 473.75, 169.63, 1671.75, 708, 190, 26.03, 94.83, 
5073.07)), row.names = c(NA, -50L), groups = structure(list(name = c("Acaba, Joseph M.", 
"Acton, Loren Wilbur", "Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", 
"Aidyn (Aydyn) Akanovich Aimbetov", "Akers, Thomas D.", "Akiyama, Toyohiro", 
"Aksyonov, Vladimir", "Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", 
"Aldrin, Edwin Eugene, Jr.", "Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", 
"Allen, Andrew M.", "Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison", 
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)", 
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.", 
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II", 
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri", 
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena", 
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.", 
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.", 
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick", 
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel", 
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.", 
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan", 
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria", 
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", 
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia", 
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", 
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia", 
"U.S.S.R/Russia", "U.S.S.R/Russia"), .rows = structure(list(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 
    27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 
    39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
    
输出:

library(tidyverse)
astronauts %>% 
  group_by(name, nationality, total_hrs_sum) %>% 
  summarise() 
name                                 nationality         total_hrs_sum
<chr>                                <chr>               <dbl>
Acaba, Joseph M.                     U.S.                7272.23        
Acton, Loren Wilbur                  U.S.                190.94     
Adamson, James C.                    U.S.                334.00     
Afanasyev, Viktor Mikhaylovich       U.S.S.R/Russia      13338.55       
Aidyn (Aydyn) Akanovich Aimbetov     Kazakhstan          236.23     
Akers, Thomas D.                     U.S.                814.00     
Akiyama, Toyohiro                    Japan               189.90     
Aksyonov, Vladimir                   U.S.S.R/Russia      284.18     
Al Mansoori, Hazzaa                  UAE                 189.00     
Al-saud, Sultan bin Salman           Saudi Arabia        170.00
structure(list(name = c("Acaba, Joseph M.", "Acton, Loren Wilbur", 
"Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", "Aidyn (Aydyn) Akanovich Aimbetov", 
"Akers, Thomas D.", "Akiyama, Toyohiro", "Aksyonov, Vladimir", 
"Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", "Aldrin, Edwin Eugene, Jr.", 
"Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", "Allen, Andrew M.", 
"Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison", 
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)", 
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.", 
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II", 
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri", 
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena", 
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.", 
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.", 
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick", 
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel", 
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.", 
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan", 
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria", 
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", 
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia", 
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", 
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia", 
"U.S.S.R/Russia", "U.S.S.R/Russia"), total_hrs_sum = c(7272.23, 
190.94, 334, 13338.55, 236.23, 814, 189.9, 284.18, 189, 170, 
289, 47, 7434.03, 904, 314, 1224, 147, 4046, 593, 614.37, 261.525, 
579, 847, 639.5, 206, 188.71, 307, 8784, 3471.35, 377.5, 664, 
5686.82, 190.2, 4722, 17942.23, 338, 686, 965, 4297.28, 5085, 
734, 190.94, 473.75, 169.63, 1671.75, 708, 190, 26.03, 94.83, 
5073.07)), row.names = c(NA, -50L), groups = structure(list(name = c("Acaba, Joseph M.", 
"Acton, Loren Wilbur", "Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", 
"Aidyn (Aydyn) Akanovich Aimbetov", "Akers, Thomas D.", "Akiyama, Toyohiro", 
"Aksyonov, Vladimir", "Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", 
"Aldrin, Edwin Eugene, Jr.", "Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", 
"Allen, Andrew M.", "Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison", 
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)", 
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.", 
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II", 
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri", 
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena", 
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.", 
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.", 
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick", 
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel", 
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.", 
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan", 
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria", 
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", 
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", 
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia", 
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", 
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia", 
"U.S.S.R/Russia", "U.S.S.R/Russia"), .rows = structure(list(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 
    27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 
    39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
    
试试这个:

library(dplyr)

df %>% group_by(nationality) %>% arrange(desc(total_hrs_sum)) %>% filter(!duplicated(nationality))

# A tibble: 10 x 3
# Groups:   nationality [10]
   name                              nationality    total_hrs_sum
   <chr>                             <chr>                  <dbl>
 1 Avdeyev, Sergei                   U.S.S.R/Russia        17942.
 2 Acaba, Joseph M.                  U.S.                   7272.
 3 André-Deshays, Claudie (Haigneré) France                  614.
 4 Aidyn (Aydyn) Akanovich Aimbetov  Kazakhstan              236.
 5 Bella, Ivan                       Slovakia                190 
 6 Akiyama, Toyohiro                 Japan                   190.
 7 Al Mansoori, Hazzaa               UAE                     189 
 8 Arnaldo Tamayo Mendez             Cuba                    189.
 9 Al-saud, Sultan bin Salman        Saudi Arabia            170 
10 Aleksandrov, Aleksandr            Bulgaria                 47 
库(dplyr)
df%%>%分组依据(国籍)%%>%排列(描述(总时数总和))%%>%过滤器(!重复(国籍))
#一个tibble:10x3
#团体:国籍[10]
姓名国籍总时数总和
1阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
2阿卡巴,约瑟夫M.U.S.7272。
3安德烈-德赛,克劳迪(海涅)法国614。
4 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
5贝拉,伊万斯洛伐克190
6秋山,日本丰田章男190。
7 Al Mansoori,阿拉伯联合酋长国哈扎阿189
8阿纳尔多·塔马约·门德斯古巴189。
9沙特阿拉伯苏丹本·萨勒曼沙特170
10阿列克山德罗夫,阿列克山德罗保加利亚47

您可以这样过滤:

astronauts %>%
  group_by(nationality) %>%
  filter(total_hrs_sum == max(total_hrs_sum))
查看
group\u by
数据框中的
max
将是每组的最大值

这是输出:

# A tibble: 10 x 3
# Groups:   nationality [10]
   name                              nationality    total_hrs_sum
   <chr>                             <chr>                  <dbl>
 1 Acaba, Joseph M.                  U.S.                   7272.
 2 Aidyn (Aydyn) Akanovich Aimbetov  Kazakhstan              236.
 3 Akiyama, Toyohiro                 Japan                   190.
 4 Al Mansoori, Hazzaa               UAE                     189 
 5 Al-saud, Sultan bin Salman        Saudi Arabia            170 
 6 Aleksandrov, Aleksandr            Bulgaria                 47 
 7 André-Deshays, Claudie (Haigneré) France                  614.
 8 Arnaldo Tamayo Mendez             Cuba                    189.
 9 Avdeyev, Sergei                   U.S.S.R/Russia        17942.
10 Bella, Ivan                       Slovakia                190 
#一个tible:10 x 3
#团体:国籍[10]
姓名国籍总时数总和
1阿卡巴,约瑟夫M.U.S.7272。
2 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
3秋山,日本丰田章男190。
4阿拉伯联合酋长国哈扎阿Al Mansoori 189
5沙特阿拉伯苏丹本·萨勒曼·沙特170
6阿列克山德罗夫,阿列克山德罗保加利亚47
7安德烈-德赛,克劳迪(海涅)法国614。
8阿纳尔多·塔马约·门德斯古巴189。
9阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
10贝拉,伊万斯洛伐克190

我们可以使用
切片
按“国籍”分组后的第一行,并
按降序排列

library(dplyr)
df %>%        
    arrange(nationality, desc(total_hrs_sum)) %>%
    group_by(nationality) %>%
    slice(1)

或者使用
top\n

df %>%
    group_by(nationality) %>%
    top_n(n=1, total_hrs_sum)
# A tibble: 10 x 3
# Groups:   nationality [10]
#   name                              nationality    total_hrs_sum
#   <chr>                             <chr>                  <dbl>
# 1 Acaba, Joseph M.                  U.S.                   7272.
# 2 Aidyn (Aydyn) Akanovich Aimbetov  Kazakhstan              236.
# 3 Akiyama, Toyohiro                 Japan                   190.
# 4 Al Mansoori, Hazzaa               UAE                     189 
# 5 Al-saud, Sultan bin Salman        Saudi Arabia            170 
# 6 Aleksandrov, Aleksandr            Bulgaria                 47 
# 7 André-Deshays, Claudie (Haigneré) France                  614.
# 8 Arnaldo Tamayo Mendez             Cuba                    189.
# 9 Avdeyev, Sergei                   U.S.S.R/Russia        17942.
#10 Bella, Ivan                       Slovakia                190 
df%>%
组别(国籍)%>%
顶部(n=1,总工时和)
#一个tibble:10x3
#团体:国籍[10]
#姓名国籍总时数总和
#                                                  
#1阿卡巴,约瑟夫M.U.S.7272。
#2 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
#3秋山,日本丰田章男190。
#4阿拉伯联合酋长国哈扎阿Al Mansoori 189
#5沙特阿拉伯苏丹本·萨勒曼·沙特170
#6阿列克山德罗夫,阿列克山德罗保加利亚47
#7安德烈-德赛,克劳迪(海涅)法国614。
#8阿纳尔多·塔马约·门德斯古巴189。
#9阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
#10贝拉,伊万斯洛伐克190