R 在多个变量上重塑宽到高的数据
当前我的数据如下所示:R 在多个变量上重塑宽到高的数据,r,reshape,R,Reshape,当前我的数据如下所示: wide.df <- read.table(header = T, sep = ",", text = " ID, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain, score, group 100, 18 , 4, 29, 30, 40, 0 101, 19, 7, 33, 40, 29, 0 103, 19, 19, 22, 30, 33, 0 200, 29, 30, 22,
wide.df <- read.table(header = T, sep = ",", text = "
ID, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain, score, group
100, 18 , 4, 29, 30, 40, 0
101, 19, 7, 33, 40, 29, 0
103, 19, 19, 22, 30, 33, 0
200, 29, 30, 22, 33, 11, 1
233, 100, 33, 22, 44, 55, 1")
ID group left.or.right mid.or.lat brain score
100 0 0 0 29 40 # 0 = left, 0=lat
100 0 1 0 30 40 # 1 = right, 0=lat
100 0 0 1 18 40 # 0 = left, 1 = mid
100 0 1 1 4 40 # 1 = right, 1 = mid
101 0 0 0 33 29 # 0 = left, 0 = lat
.
.
.
.
.
233 1 1 1 33 55 # 1= right, 1= mid
library(tidyverse)
long.df <- wide.df %>%
gather(variable, brain, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain) %>%
mutate(
left.or.right = ifelse(grepl('left', variable), 0, 1),
mid.or.lat = ifelse(grepl('lat', variable), 0, 1)
) %>%
select(ID, group, left.or.right, mid.or.lat, brain, score) %>%
arrange(ID)
ID group left.or.right mid.or.lat brain score
1 100 0 0 1 18 40
2 100 0 1 1 4 40
3 100 0 0 0 29 40
4 100 0 1 0 30 40
5 101 0 0 1 19 29
6 101 0 1 1 7 29
7 101 0 0 0 33 29
8 101 0 1 0 40 29
9 103 0 0 1 19 33
10 103 0 1 1 19 33
其中,
left.mid.brain
,right.mid.brain
,left.lat.brain
,right.lat.brain
被更改为因子,但它们的值仍然保留,每个参与者各有四行。tidyverse(特别是dplyr和tidyr套餐)非常擅长这样的操作:
wide.df <- read.table(header = T, sep = ",", text = "
ID, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain, score, group
100, 18 , 4, 29, 30, 40, 0
101, 19, 7, 33, 40, 29, 0
103, 19, 19, 22, 30, 33, 0
200, 29, 30, 22, 33, 11, 1
233, 100, 33, 22, 44, 55, 1")
ID group left.or.right mid.or.lat brain score
100 0 0 0 29 40 # 0 = left, 0=lat
100 0 1 0 30 40 # 1 = right, 0=lat
100 0 0 1 18 40 # 0 = left, 1 = mid
100 0 1 1 4 40 # 1 = right, 1 = mid
101 0 0 0 33 29 # 0 = left, 0 = lat
.
.
.
.
.
233 1 1 1 33 55 # 1= right, 1= mid
library(tidyverse)
long.df <- wide.df %>%
gather(variable, brain, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain) %>%
mutate(
left.or.right = ifelse(grepl('left', variable), 0, 1),
mid.or.lat = ifelse(grepl('lat', variable), 0, 1)
) %>%
select(ID, group, left.or.right, mid.or.lat, brain, score) %>%
arrange(ID)
ID group left.or.right mid.or.lat brain score
1 100 0 0 1 18 40
2 100 0 1 1 4 40
3 100 0 0 0 29 40
4 100 0 1 0 30 40
5 101 0 0 1 19 29
6 101 0 1 1 7 29
7 101 0 0 0 33 29
8 101 0 1 0 40 29
9 103 0 0 1 19 33
10 103 0 1 1 19 33
库(tidyverse)
long.df%
聚集(变量,brain,left.mid.brain,right.mid.brain,left.lat.brain,right.lat.brain)%>%
变异(
left.or.right=ifelse(grepl('left',变量),0,1),
mid.or.lat=ifelse(grepl('lat',变量),0,1)
) %>%
选择(ID、组、左或右、中或左、大脑、分数)%>%
安排(ID)
ID组左或右中或横向脑评分
1 100 0 0 1 18 40
2 100 0 1 1 4 40
3 100 0 0 0 29 40
4 100 0 1 0 30 40
5 101 0 0 1 19 29
6 101 0 1 1 7 29
7 101 0 0 0 33 29
8 101 0 1 0 40 29
9 103 0 0 1 19 33
10 103 0 1 1 19 33
tidyverse(特别是dplyr和tidyr软件包)非常擅长这样的操作:
wide.df <- read.table(header = T, sep = ",", text = "
ID, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain, score, group
100, 18 , 4, 29, 30, 40, 0
101, 19, 7, 33, 40, 29, 0
103, 19, 19, 22, 30, 33, 0
200, 29, 30, 22, 33, 11, 1
233, 100, 33, 22, 44, 55, 1")
ID group left.or.right mid.or.lat brain score
100 0 0 0 29 40 # 0 = left, 0=lat
100 0 1 0 30 40 # 1 = right, 0=lat
100 0 0 1 18 40 # 0 = left, 1 = mid
100 0 1 1 4 40 # 1 = right, 1 = mid
101 0 0 0 33 29 # 0 = left, 0 = lat
.
.
.
.
.
233 1 1 1 33 55 # 1= right, 1= mid
library(tidyverse)
long.df <- wide.df %>%
gather(variable, brain, left.mid.brain, right.mid.brain, left.lat.brain, right.lat.brain) %>%
mutate(
left.or.right = ifelse(grepl('left', variable), 0, 1),
mid.or.lat = ifelse(grepl('lat', variable), 0, 1)
) %>%
select(ID, group, left.or.right, mid.or.lat, brain, score) %>%
arrange(ID)
ID group left.or.right mid.or.lat brain score
1 100 0 0 1 18 40
2 100 0 1 1 4 40
3 100 0 0 0 29 40
4 100 0 1 0 30 40
5 101 0 0 1 19 29
6 101 0 1 1 7 29
7 101 0 0 0 33 29
8 101 0 1 0 40 29
9 103 0 0 1 19 33
10 103 0 1 1 19 33
库(tidyverse)
long.df%
聚集(变量,brain,left.mid.brain,right.mid.brain,left.lat.brain,right.lat.brain)%>%
变异(
left.or.right=ifelse(grepl('left',变量),0,1),
mid.or.lat=ifelse(grepl('lat',变量),0,1)
) %>%
选择(ID、组、左或右、中或左、大脑、分数)%>%
安排(ID)
ID组左或右中或横向脑评分
1 100 0 0 1 18 40
2 100 0 1 1 4 40
3 100 0 0 0 29 40
4 100 0 1 0 30 40
5 101 0 0 1 19 29
6 101 0 1 1 7 29
7 101 0 0 0 33 29
8 101 0 1 0 40 29
9 103 0 0 1 19 33
10 103 0 1 1 19 33
另一种基于dplyr
/tidyr
的方法,应该可以很好地扩展。创建长形数据后,您将有一些列,这些列的值类似于“right.mid.brain”
,您希望将这些列拆分为“right”
和“mid”
-dplyr::separate
这样做很容易,在“\\”
上拆分,并避免过多硬编码。它会给你一个虚拟的专栏,我稍后会删除
此时,您将看到:
库(dplyr)
图书馆(tidyr)
#0=左侧,0=横向
宽%>%
聚集(键,值=大脑,-ID,-score,-group)%>%
分离(键,插入=c(“左或右”、“中或横向”、“虚拟”),sep=“\\””%>%
总目()
#>ID评分组左侧或右侧中间或横向虚拟大脑
#>1 100 40 0左中脑18
#>2 101 29 0左中脑19
#>3 103 33 0左中脑19
#>420011左中脑29
#>5233 55 1左中脑100
#>6100 40 0右中脑4
如果需要进行更复杂的重新编码,可以使用一些forcats
函数来重新编码因子级别。在这种情况下,只需根据left.or.right==“right”
等条件转换列就足够简单了,如果为true,则转换为1,如果为false(即,如果为left),则转换为0。按所需顺序选择列
long%
聚集(键,值=大脑,-ID,-score,-group)%>%
分离(键,插入=c(“左或右”、“中或横向”、“虚拟”),sep=“\\””%>%
mutate(left.or.right=as.numeric(left.or.right==“right”),
mid.or.lat=as.numeric(mid.or.lat==“mid”))%>%
选择(ID、组、左或右、中或左、大脑、分数)%>%
安排(ID)
头部(长)
#>ID组左或右中或横向脑评分
#> 1 100 0 0 1 18 40
#> 2 100 0 1 1 4 40
#> 3 100 0 0 0 29 40
#> 4 100 0 1 0 30 40
#> 5 101 0 0 1 19 29
#> 6 101 0 1 1 7 29
另一种基于dplyr
/tidyr
的方法,应该可以很好地扩展。创建长形数据后,您将有一些列,这些列的值类似于“right.mid.brain”
,您希望将这些列拆分为“right”
和“mid”
-dplyr::separate
这样做很容易,在“\\”
上拆分,并避免过多硬编码。它会给你一个虚拟的专栏,我稍后会删除
此时,您将看到:
库(dplyr)
图书馆(tidyr)
#0=左侧,0=横向
宽%>%
聚集(键,值=大脑,-ID,-score,-group)%>%
分离(键,插入=c(“左或右”、“中或横向”、“虚拟”),sep=“\\””%>%
总目()
#>ID评分组左侧或右侧中间或横向虚拟大脑
#>1 100 40 0左中脑18
#>2 101 29 0左中脑19
#>3 103 33 0左中脑19
#>420011左中脑29
#>5233 55 1左中脑100
#>6100 40 0右中脑4
如果需要进行更复杂的重新编码,可以使用一些forcats
函数来重新编码因子级别。在这种情况下,只需根据left.or.right==“ri”等条件转换列就足够简单了