使用tidyverse根据分类列计算差异
我有以下数据框:使用tidyverse根据分类列计算差异,r,dplyr,tidyverse,R,Dplyr,Tidyverse,我有以下数据框: library(tidyverse) df <- data.frame( vars = rep(letters[1:2], 3), value = c(10,12,15,19,22,23), phase = rep(factor(c("pre","post1","post2"), levels = c("pre","post1","post2")),2) ) %>% arrange(vars,phase) 您可以使用从tidyr开始的“扩散”和
library(tidyverse)
df <- data.frame(
vars = rep(letters[1:2], 3),
value = c(10,12,15,19,22,23),
phase = rep(factor(c("pre","post1","post2"), levels = c("pre","post1","post2")),2)
) %>%
arrange(vars,phase)
您可以使用从tidyr开始的“扩散”和“聚集”,首先将相位转换为列,然后在计算完差值后再次转换为长格式:
library(dplyr)
library(tidyr)
df %>%
spread(phase, value) %>%
mutate("post1 - pre" = post1 - pre, "post2 - post1" = post2 - post1, "post2 - pre" = post2 - pre) %>%
select(-pre, -post1, -post2) %>%
gather("x", "diff", 2:4)
您可以使用从tidyr开始的“扩散”和“聚集”,首先将相位转换为列,然后在计算完差值后再次转换为长格式:
library(dplyr)
library(tidyr)
df %>%
spread(phase, value) %>%
mutate("post1 - pre" = post1 - pre, "post2 - post1" = post2 - post1, "post2 - pre" = post2 - pre) %>%
select(-pre, -post1, -post2) %>%
gather("x", "diff", 2:4)
以下是一种更加自动化的方法,在您指定差异必须遵循的顺序后,可以获得所需的所有组合:
library(tidyverse)
# example dataset
df <- data.frame(
vars = rep(letters[1:2], 3),
value = c(10,12,15,19,22,23),
phase = rep(factor(c("pre","post1","post2"), levels = c("pre","post1","post2")),2)
) %>%
arrange(vars,phase)
# set the levels in the right order based on the differences you want to get
df$phase = factor(df$phase, levels = c("post2","post1","pre"))
data.frame(t(combn(as.character(sort(unique(df$phase))), 2)), stringsAsFactors = F) %>% # create a dataframe of unique combinations of differences you want to investigate
mutate(vars = list(unique(df$vars))) %>% # add unique vars as a list
unnest() %>% # get all combinations
group_by(id = row_number()) %>% # for each row
nest() %>% # nest data
mutate(diffs = map(data, ~df$value[df$vars==.$vars & df$phase==.$X1] -
df$value[df$vars==.$vars & df$phase==.$X2]), # get differences based on corresponding values
x = map(data, ~paste0(c(.$X1, .$X2), collapse = " - "))) %>% # create your x column
unnest() %>% # unnest data
select(vars, x, diffs) # keep relevant columns
# # A tibble: 6 x 3
# vars x diffs
# <fct> <chr> <dbl>
# 1 a post2 - post1 -7
# 2 b post2 - post1 11
# 3 a post2 - pre 5
# 4 b post2 - pre 4
# 5 a post1 - pre 12
# 6 b post1 - pre -7
以下是一种更加自动化的方法,在您指定差异必须遵循的顺序后,可以获得所需的所有组合:
library(tidyverse)
# example dataset
df <- data.frame(
vars = rep(letters[1:2], 3),
value = c(10,12,15,19,22,23),
phase = rep(factor(c("pre","post1","post2"), levels = c("pre","post1","post2")),2)
) %>%
arrange(vars,phase)
# set the levels in the right order based on the differences you want to get
df$phase = factor(df$phase, levels = c("post2","post1","pre"))
data.frame(t(combn(as.character(sort(unique(df$phase))), 2)), stringsAsFactors = F) %>% # create a dataframe of unique combinations of differences you want to investigate
mutate(vars = list(unique(df$vars))) %>% # add unique vars as a list
unnest() %>% # get all combinations
group_by(id = row_number()) %>% # for each row
nest() %>% # nest data
mutate(diffs = map(data, ~df$value[df$vars==.$vars & df$phase==.$X1] -
df$value[df$vars==.$vars & df$phase==.$X2]), # get differences based on corresponding values
x = map(data, ~paste0(c(.$X1, .$X2), collapse = " - "))) %>% # create your x column
unnest() %>% # unnest data
select(vars, x, diffs) # keep relevant columns
# # A tibble: 6 x 3
# vars x diffs
# <fct> <chr> <dbl>
# 1 a post2 - post1 -7
# 2 b post2 - post1 11
# 3 a post2 - pre 5
# 4 b post2 - pre 4
# 5 a post1 - pre 12
# 6 b post1 - pre -7