R 更改数据帧结构(将两个数据帧更改为一个)
我有两个data.frame(R 更改数据帧结构(将两个数据帧更改为一个),r,join,merge,dplyr,reshape,R,Join,Merge,Dplyr,Reshape,我有两个data.frame(data1和data2) 有谁能给我一个如何实现这一点的提示吗 您必须重塑您的data2,然后加入data1: library(tidyverse) data2 %>% select(-id) %>% group_by(data1Id) %>% # for each data1Id mutate(indx = row_number()) %>%
data1
和data2
)
有谁能给我一个如何实现这一点的提示吗 您必须重塑您的
data2
,然后加入data1
:
library(tidyverse)
data2 %>%
select(-id) %>%
group_by(data1Id) %>% # for each data1Id
mutate(indx = row_number()) %>% # create a row index (useful to reshape)
ungroup() %>%
gather(var, value, -data1Id, -indx) %>% # reshape dataset
unite(var, var, indx) %>% # combine those to columns to create new column names for reshaping
spread(var, value) %>% # reshape again
right_join(data1, by = c("data1Id" = "id")) %>% # join to data1
select(id = data1Id, var1, var2, everything()) # update column order
# # A tibble: 3 x 9
# id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
# <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 x y 0 3 NA 1 6 NA
# 2 2 x y 5 2 5 3 6 2
# 3 3 x y 2 9 8 8 7 5
库(tidyverse)
数据2%>%
选择(-id)%%>%
每个数据1ID的分组依据(数据1ID)%>%
mutate(indx=row_number())%>%#创建行索引(有助于重塑形状)
解组()%>%
聚集(var,value,-data1Id,-indx)%>%#重塑数据集
unite(var,var,indx)%>%#将这些内容组合到列中,以创建新的列名以进行重新造型
价差(var,value)%>%#再次重塑
右键连接(data1,by=c(“data1Id”=“id”))%>%连接到data1
选择(id=data1Id,var1,var2,everything())#更新列顺序
##tibble:3 x 9
#id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
#
#1 x y 0 3 NA 1 6 NA
#2 x y 5 2 5 3 6 2
#3 x y 2 9 8 7 5
这里是另一种使用内部连接的tidyverse
方法
library(tidyverse)
inner_join(data1, data2, by = c("id" = "data1Id")) %>%
gather(key, value, -(1:3)) %>%
filter(key != "id.y") %>%
group_by(id) %>%
mutate(key = paste(key, row_number(), sep = "_")) %>%
spread(key, value)
# A tibble: 3 x 10
# Groups: id [3]
# id var1 var2 var3_1 var3_2 var3_3 var4_3 var4_4 var4_5 var4_6
# <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 x y 0 3 NA 1 6 NA NA
#2 2 x y 5 2 5 NA 3 6 2
#3 3 x y 2 9 8 NA 8 7 5
库(tidyverse)
内部联接(数据1,数据2,by=c(“id”=“data1Id”))%>%
聚集(键,值,-(1:3))%>%
过滤器(键!=“id.y”)%>%
分组依据(id)%>%
突变(键=粘贴(键,行号(),sep=“”))%>%
排列(键、值)
#一个tibble:3x10
#组别:id[3]
#id var1 var2 var3_1 var3_2 var3_3 var4_3 var4_4_5 var4_6
#
#11xy03na 16na
#2 x y 5 2 5 NA 3 6 2
#3xy298na875
上面的内容弄乱了列名,以后可以手动更改或使用下面的方法(如@AntoniosK所建议的)
internal_join(data1,data2,by=c(“id”=“data1Id”))%>%
聚集(键,值,-(1:3))%>%
过滤器(键!=“id.y”)%>%
分组依据(id,键)%>%
突变(键1=粘贴(键,行号(),sep=“”))%>%
解组()%>%
选择(-key)%%>%
排列(键1,值)
#一个tibble:3x9
#id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
#
#1 x y 0 3 NA 1 6 NA
#2 x y 5 2 5 3 6 2
#3 x y 2 9 8 7 5
我们还可以使用数据。表
与melt/dcast
library(data.table)
setDT(data1)[dcast(melt(setDT(data2)[, id := rowid(data1Id)],
id.var = c('id', 'data1Id'))[, variable := paste(variable, id,
sep="_")], data1Id ~ variable, value.var = 'value'), on = .(id = data1Id)]
# id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
#1: 1 x y 0 3 NA 1 6 NA
#2: 2 x y 5 2 5 3 6 2
#3: 3 x y 2 9 8 8 7 5
library(tidyverse)
inner_join(data1, data2, by = c("id" = "data1Id")) %>%
gather(key, value, -(1:3)) %>%
filter(key != "id.y") %>%
group_by(id) %>%
mutate(key = paste(key, row_number(), sep = "_")) %>%
spread(key, value)
# A tibble: 3 x 10
# Groups: id [3]
# id var1 var2 var3_1 var3_2 var3_3 var4_3 var4_4 var4_5 var4_6
# <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 x y 0 3 NA 1 6 NA NA
#2 2 x y 5 2 5 NA 3 6 2
#3 3 x y 2 9 8 NA 8 7 5
inner_join(data1, data2, by = c("id" = "data1Id")) %>%
gather(key, value, -(1:3)) %>%
filter(key != "id.y") %>%
group_by(id, key) %>%
mutate(key1 = paste(key, row_number(), sep = "_")) %>%
ungroup() %>%
select(-key) %>%
spread(key1, value)
# A tibble: 3 x 9
# id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
# <dbl> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 x y 0 3 NA 1 6 NA
#2 2 x y 5 2 5 3 6 2
#3 3 x y 2 9 8 8 7 5
library(data.table)
setDT(data1)[dcast(melt(setDT(data2)[, id := rowid(data1Id)],
id.var = c('id', 'data1Id'))[, variable := paste(variable, id,
sep="_")], data1Id ~ variable, value.var = 'value'), on = .(id = data1Id)]
# id var1 var2 var3_1 var3_2 var3_3 var4_1 var4_2 var4_3
#1: 1 x y 0 3 NA 1 6 NA
#2: 2 x y 5 2 5 3 6 2
#3: 3 x y 2 9 8 8 7 5