R 数据争论:重塑

R 数据争论:重塑,r,dplyr,data.table,reshape2,R,Dplyr,Data.table,Reshape2,需要将数据从df1转换为df2吗 a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand", "Afghanistan", "Australia" ) b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe", "Sri Lanka", "Zimbabwe" , "India" ) d <- c("no result" , "Zimbabwe" , "Zimbabwe

需要将数据从df1转换为df2吗

a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )

df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d)

Country <- c("New Zealand", "Sri Lanka","Afghanistan","Zimbabwe", "Australia","India"   )
Match <- c(2,2,3,3,1,1)
Win <- c(1,0,1,2,1,0)
Loss <- c(0,1,2,1,0,1)

Draw <- c(1,1,0,0,0,0)

df2 <- data.frame(Country, Match,Win, Loss, Draw )

提前感谢。

这里是一个使用data.table的粗略概念:


下面是一个使用dplyr的方法

tableresults <- function(team,df) {

  require(tidyverse)

  df2 <- df %>%
    filter(Team1 == team | Team2 == team) %>%
    mutate(win = ifelse(Winner == team,1,0),
       draw = ifelse(Winner == 'no result',1,0),
       loss = ifelse(!Winner %in% c('no result',team),1,0),
       country = team) %>%
    group_by(country) %>%
    summarize(match = n(),
          win = sum(win),
          loss = sum(loss),
          draw = sum(draw)) %>%
    ungroup()

  return(df2)
}

countries <- df1 %>% distinct(Team1,Team2) %>% gather() %>% pull(value)

results_tbl <- tibble()

for (i in 1:length(countries)) {

  country_tbl <- tableresults(countries[[i]],df1)
  results_tbl <- bind_rows(results_tbl,country_tbl)
}
library(tidyverse)

a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )

df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d, stringsAsFactors = FALSE)


df1 %>% 
  gather(Team1, Team2, key = Team, value = Country) %>% 
  mutate(Result = replace(ifelse(Country == Winner, "Win", "Loss"), Winner == "no result", "Draw")) %>% 
  group_by(Country, Result) %>% 
  summarise(count = n()) %>% 
  spread(key = Result, value = count, fill = 0) %>% 
  mutate(Match = Win + Loss + Draw) %>% 
  select(Country, Match, Win, Loss, Draw)


# A tibble: 6 x 5
# Groups:   Country [6]
    Country     Match   Win  Loss  Draw
    <chr>       <dbl>  <dbl> <dbl> <dbl>
1 Afghanistan     3     1     2     0
2   Australia     1     1     0     0
3       India     1     0     1     0
4 New Zealand     2     1     0     1
5   Sri Lanka     2     0     1     1
6    Zimbabwe     3     2     1     0
结果:

> results_tbl
# A tibble: 6 x 5
  country     match   win  loss  draw
  <chr>       <int> <dbl> <dbl> <dbl>
1 New Zealand     2     1     0     1
2 Afghanistan     3     1     2     0
3 Australia       1     1     0     0
4 Sri Lanka       2     0     1     1
5 Zimbabwe        3     2     1     0
6 India           1     0     1     0

使用dplyr得到相同的结果

tableresults <- function(team,df) {

  require(tidyverse)

  df2 <- df %>%
    filter(Team1 == team | Team2 == team) %>%
    mutate(win = ifelse(Winner == team,1,0),
       draw = ifelse(Winner == 'no result',1,0),
       loss = ifelse(!Winner %in% c('no result',team),1,0),
       country = team) %>%
    group_by(country) %>%
    summarize(match = n(),
          win = sum(win),
          loss = sum(loss),
          draw = sum(draw)) %>%
    ungroup()

  return(df2)
}

countries <- df1 %>% distinct(Team1,Team2) %>% gather() %>% pull(value)

results_tbl <- tibble()

for (i in 1:length(countries)) {

  country_tbl <- tableresults(countries[[i]],df1)
  results_tbl <- bind_rows(results_tbl,country_tbl)
}
library(tidyverse)

a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )

df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d, stringsAsFactors = FALSE)


df1 %>% 
  gather(Team1, Team2, key = Team, value = Country) %>% 
  mutate(Result = replace(ifelse(Country == Winner, "Win", "Loss"), Winner == "no result", "Draw")) %>% 
  group_by(Country, Result) %>% 
  summarise(count = n()) %>% 
  spread(key = Result, value = count, fill = 0) %>% 
  mutate(Match = Win + Loss + Draw) %>% 
  select(Country, Match, Win, Loss, Draw)


# A tibble: 6 x 5
# Groups:   Country [6]
    Country     Match   Win  Loss  Draw
    <chr>       <dbl>  <dbl> <dbl> <dbl>
1 Afghanistan     3     1     2     0
2   Australia     1     1     0     0
3       India     1     0     1     0
4 New Zealand     2     1     0     1
5   Sri Lanka     2     0     1     1
6    Zimbabwe     3     2     1     0

你尝试了什么???