R 如何在数据帧中仅保留最高的重复值?
我有以下代码:R 如何在数据帧中仅保留最高的重复值?,r,dataframe,dplyr,tidyverse,R,Dataframe,Dplyr,Tidyverse,我有以下代码: library(tidyverse) astronauts %>% group_by(name, nationality, total_hrs_sum) %>% summarise() name nationality total_hrs_sum <chr> <chr>
library(tidyverse)
astronauts %>%
group_by(name, nationality, total_hrs_sum) %>%
summarise()
name nationality total_hrs_sum
<chr> <chr> <dbl>
Acaba, Joseph M. U.S. 7272.23
Acton, Loren Wilbur U.S. 190.94
Adamson, James C. U.S. 334.00
Afanasyev, Viktor Mikhaylovich U.S.S.R/Russia 13338.55
Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.23
Akers, Thomas D. U.S. 814.00
Akiyama, Toyohiro Japan 189.90
Aksyonov, Vladimir U.S.S.R/Russia 284.18
Al Mansoori, Hazzaa UAE 189.00
Al-saud, Sultan bin Salman Saudi Arabia 170.00
structure(list(name = c("Acaba, Joseph M.", "Acton, Loren Wilbur",
"Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", "Aidyn (Aydyn) Akanovich Aimbetov",
"Akers, Thomas D.", "Akiyama, Toyohiro", "Aksyonov, Vladimir",
"Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", "Aldrin, Edwin Eugene, Jr.",
"Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", "Allen, Andrew M.",
"Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), total_hrs_sum = c(7272.23,
190.94, 334, 13338.55, 236.23, 814, 189.9, 284.18, 189, 170,
289, 47, 7434.03, 904, 314, 1224, 147, 4046, 593, 614.37, 261.525,
579, 847, 639.5, 206, 188.71, 307, 8784, 3471.35, 377.5, 664,
5686.82, 190.2, 4722, 17942.23, 338, 686, 965, 4297.28, 5085,
734, 190.94, 473.75, 169.63, 1671.75, 708, 190, 26.03, 94.83,
5073.07)), row.names = c(NA, -50L), groups = structure(list(name = c("Acaba, Joseph M.",
"Acton, Loren Wilbur", "Adamson, James C.", "Afanasyev, Viktor Mikhaylovich",
"Aidyn (Aydyn) Akanovich Aimbetov", "Akers, Thomas D.", "Akiyama, Toyohiro",
"Aksyonov, Vladimir", "Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman",
"Aldrin, Edwin Eugene, Jr.", "Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr",
"Allen, Andrew M.", "Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), .rows = structure(list(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
输出:
library(tidyverse)
astronauts %>%
group_by(name, nationality, total_hrs_sum) %>%
summarise()
name nationality total_hrs_sum
<chr> <chr> <dbl>
Acaba, Joseph M. U.S. 7272.23
Acton, Loren Wilbur U.S. 190.94
Adamson, James C. U.S. 334.00
Afanasyev, Viktor Mikhaylovich U.S.S.R/Russia 13338.55
Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.23
Akers, Thomas D. U.S. 814.00
Akiyama, Toyohiro Japan 189.90
Aksyonov, Vladimir U.S.S.R/Russia 284.18
Al Mansoori, Hazzaa UAE 189.00
Al-saud, Sultan bin Salman Saudi Arabia 170.00
structure(list(name = c("Acaba, Joseph M.", "Acton, Loren Wilbur",
"Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", "Aidyn (Aydyn) Akanovich Aimbetov",
"Akers, Thomas D.", "Akiyama, Toyohiro", "Aksyonov, Vladimir",
"Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", "Aldrin, Edwin Eugene, Jr.",
"Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", "Allen, Andrew M.",
"Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), total_hrs_sum = c(7272.23,
190.94, 334, 13338.55, 236.23, 814, 189.9, 284.18, 189, 170,
289, 47, 7434.03, 904, 314, 1224, 147, 4046, 593, 614.37, 261.525,
579, 847, 639.5, 206, 188.71, 307, 8784, 3471.35, 377.5, 664,
5686.82, 190.2, 4722, 17942.23, 338, 686, 965, 4297.28, 5085,
734, 190.94, 473.75, 169.63, 1671.75, 708, 190, 26.03, 94.83,
5073.07)), row.names = c(NA, -50L), groups = structure(list(name = c("Acaba, Joseph M.",
"Acton, Loren Wilbur", "Adamson, James C.", "Afanasyev, Viktor Mikhaylovich",
"Aidyn (Aydyn) Akanovich Aimbetov", "Akers, Thomas D.", "Akiyama, Toyohiro",
"Aksyonov, Vladimir", "Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman",
"Aldrin, Edwin Eugene, Jr.", "Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr",
"Allen, Andrew M.", "Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), .rows = structure(list(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
试试这个:
library(dplyr)
df %>% group_by(nationality) %>% arrange(desc(total_hrs_sum)) %>% filter(!duplicated(nationality))
# A tibble: 10 x 3
# Groups: nationality [10]
name nationality total_hrs_sum
<chr> <chr> <dbl>
1 Avdeyev, Sergei U.S.S.R/Russia 17942.
2 Acaba, Joseph M. U.S. 7272.
3 André-Deshays, Claudie (Haigneré) France 614.
4 Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.
5 Bella, Ivan Slovakia 190
6 Akiyama, Toyohiro Japan 190.
7 Al Mansoori, Hazzaa UAE 189
8 Arnaldo Tamayo Mendez Cuba 189.
9 Al-saud, Sultan bin Salman Saudi Arabia 170
10 Aleksandrov, Aleksandr Bulgaria 47
库(dplyr)
df%%>%分组依据(国籍)%%>%排列(描述(总时数总和))%%>%过滤器(!重复(国籍))
#一个tibble:10x3
#团体:国籍[10]
姓名国籍总时数总和
1阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
2阿卡巴,约瑟夫M.U.S.7272。
3安德烈-德赛,克劳迪(海涅)法国614。
4 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
5贝拉,伊万斯洛伐克190
6秋山,日本丰田章男190。
7 Al Mansoori,阿拉伯联合酋长国哈扎阿189
8阿纳尔多·塔马约·门德斯古巴189。
9沙特阿拉伯苏丹本·萨勒曼沙特170
10阿列克山德罗夫,阿列克山德罗保加利亚47
您可以这样过滤:
astronauts %>%
group_by(nationality) %>%
filter(total_hrs_sum == max(total_hrs_sum))
查看group\u by
数据框中的max
将是每组的最大值
这是输出:
# A tibble: 10 x 3
# Groups: nationality [10]
name nationality total_hrs_sum
<chr> <chr> <dbl>
1 Acaba, Joseph M. U.S. 7272.
2 Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.
3 Akiyama, Toyohiro Japan 190.
4 Al Mansoori, Hazzaa UAE 189
5 Al-saud, Sultan bin Salman Saudi Arabia 170
6 Aleksandrov, Aleksandr Bulgaria 47
7 André-Deshays, Claudie (Haigneré) France 614.
8 Arnaldo Tamayo Mendez Cuba 189.
9 Avdeyev, Sergei U.S.S.R/Russia 17942.
10 Bella, Ivan Slovakia 190
#一个tible:10 x 3
#团体:国籍[10]
姓名国籍总时数总和
1阿卡巴,约瑟夫M.U.S.7272。
2 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
3秋山,日本丰田章男190。
4阿拉伯联合酋长国哈扎阿Al Mansoori 189
5沙特阿拉伯苏丹本·萨勒曼·沙特170
6阿列克山德罗夫,阿列克山德罗保加利亚47
7安德烈-德赛,克劳迪(海涅)法国614。
8阿纳尔多·塔马约·门德斯古巴189。
9阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
10贝拉,伊万斯洛伐克190
我们可以使用切片
按“国籍”分组后的第一行,并按降序排列
library(dplyr)
df %>%
arrange(nationality, desc(total_hrs_sum)) %>%
group_by(nationality) %>%
slice(1)
或者使用
top\n
df %>%
group_by(nationality) %>%
top_n(n=1, total_hrs_sum)
# A tibble: 10 x 3
# Groups: nationality [10]
# name nationality total_hrs_sum
# <chr> <chr> <dbl>
# 1 Acaba, Joseph M. U.S. 7272.
# 2 Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.
# 3 Akiyama, Toyohiro Japan 190.
# 4 Al Mansoori, Hazzaa UAE 189
# 5 Al-saud, Sultan bin Salman Saudi Arabia 170
# 6 Aleksandrov, Aleksandr Bulgaria 47
# 7 André-Deshays, Claudie (Haigneré) France 614.
# 8 Arnaldo Tamayo Mendez Cuba 189.
# 9 Avdeyev, Sergei U.S.S.R/Russia 17942.
#10 Bella, Ivan Slovakia 190
df%>%
组别(国籍)%>%
顶部(n=1,总工时和)
#一个tibble:10x3
#团体:国籍[10]
#姓名国籍总时数总和
#
#1阿卡巴,约瑟夫M.U.S.7272。
#2 Aidyn(Aydyn)Akanovich Aimbetov哈萨克斯坦236。
#3秋山,日本丰田章男190。
#4阿拉伯联合酋长国哈扎阿Al Mansoori 189
#5沙特阿拉伯苏丹本·萨勒曼·沙特170
#6阿列克山德罗夫,阿列克山德罗保加利亚47
#7安德烈-德赛,克劳迪(海涅)法国614。
#8阿纳尔多·塔马约·门德斯古巴189。
#9阿夫德耶夫,谢尔盖苏联/俄罗斯17942。
#10贝拉,伊万斯洛伐克190