按一列匹配两个数据帧,并从R中另一列中匹配的行中减去
我正在努力做到以下几点:按一列匹配两个数据帧,并从R中另一列中匹配的行中减去,r,R,我正在努力做到以下几点: 将R数据帧拆分为控件和rest(示例) 匹配一列控件和示例数据帧(单词匹配) 对于该列中的每个匹配项,减去另一列中的样本控制值 打印输出数据框中的所有匹配行和非匹配行 我试过: # input My.Data <- structure(list(V1 = structure(1:9, .Label = c("a1", "a2", "a3", "a
# input
My.Data <- structure(list(V1 = structure(1:9, .Label = c("a1", "a2", "a3",
"a4", "a5", "control1", "control2", "control3", "control4"), class = "factor"),
V2 = structure(c(1L, 1L, 2L, 3L, 5L, 1L, 2L, 3L, 4L), .Label = c("otu1",
"otu2", "otu3", "otu4", "otu6"), class = "factor"), V3 = structure(c(4L,
5L, 6L, 9L, 8L, 3L, 1L, 2L, 7L), .Label = c("ee", "tt", "w",
"xx", "xxx", "xy", "yy", "z44", "zz"), class = "factor"),
V4 = c(44L, 52L, 11L, 22L, 91L, 4L, 34L, 33L, 11L)), class = "data.frame", row.names = c(NA,
-9L))
# split groups
control<-My.Data[grep("^control*", My.Data$V1), ]
sample<-My.Data[!grepl("^control*",My.Data$V1),]
# match V2 in control and samples (example match: otu1 with otu1..)
?
# Whenever match found in V2 (multiple match is possible), subtract sample-control values in V4
?
# print all matched (and non-matched) rows in a dataframe
谢谢。我们可以将
V2
上的两个数据集合并并减去列“V4”
library(data.table)
setDT(sample)[control, V4 := V4 - i.V4, on = .(V2)]
sample
# V1 V2 V3 V4
#1: a1 otu1 xx 40
#2: a2 otu1 xxx 48
#3: a3 otu2 xy -23
#4: a4 otu3 zz -11
#5: a5 otu6 z44 91
如果我们想与“控件”的不匹配行绑定
rbind(sample, setDT(control)[!sample, on = .(V2)])
# V1 V2 V3 V4
#1: a1 otu1 xx 40
#2: a2 otu1 xxx 48
#3: a3 otu2 xy -23
#4: a4 otu3 zz -11
#5: a5 otu6 z44 91
#6: control4 otu4 yy 11
在
tidyverse
中,我们可以使用left\u join
和bind\u rows
与anti\u join
library(dplyr)
left_join(sample, control %>%
select(V2, V4), by = 'V2') %>%
transmute(V1, V2, V3, V4 = coalesce(V4.x-V4.y, V4.x)) %>%
bind_rows(anti_join(control, sample, by = 'V2'))
一个
dplyr
选项可以是:
My.Data %>%
group_by(V2) %>%
filter(n() > 1) %>%
mutate(V4 = V4 - V4[grepl("^control", V1)]) %>%
filter(!grepl("^control", V1)) %>%
bind_rows(My.Data %>%
group_by(V2) %>%
filter(n() == 1))
V1 V2 V3 V4
<fct> <fct> <fct> <int>
1 a1 otu1 xx 40
2 a2 otu1 xxx 48
3 a3 otu2 xy -23
4 a4 otu3 zz -11
5 a5 otu6 z44 91
6 control4 otu4 yy 11
My.Data%>%
分组依据(V2)%>%
过滤器(n()>1)%>%
变异(V4=V4-V4[grepl(“^control”,V1)])%>%
过滤器(!grepl(“^control”,V1))%>%
绑定_行(My.Data%>%
分组依据(V2)%>%
过滤器(n()=1))
V1 V2 V3 V4
1 a1 otu1 xx 40
2 a2 otu1 xxx 48
3 a3 otu2 xy-23
4 a4 otu3 zz-11
5 a5 otu6 z44 91
6控制4 otu4 yy 11
My.Data %>%
group_by(V2) %>%
filter(n() > 1) %>%
mutate(V4 = V4 - V4[grepl("^control", V1)]) %>%
filter(!grepl("^control", V1)) %>%
bind_rows(My.Data %>%
group_by(V2) %>%
filter(n() == 1))
V1 V2 V3 V4
<fct> <fct> <fct> <int>
1 a1 otu1 xx 40
2 a2 otu1 xxx 48
3 a3 otu2 xy -23
4 a4 otu3 zz -11
5 a5 otu6 z44 91
6 control4 otu4 yy 11