Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/67.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 按多个条件合并两个表_R - Fatal编程技术网

R 按多个条件合并两个表

R 按多个条件合并两个表,r,R,我找不到一个能准确回答我的问题 df1 df2 这是我的数据的一般格式,有多个其他列与合并无关,但需要保留 我想要的是严格合并具有相同“chr”和“position”的行,但还要确保df1中的“effect.exposure”与df2中的“effect.output”或“other”匹配。重要的是,如果“effect.exposure”与“effect.outcome”或“other”不匹配,我希望该行被删除 “chr”和“position”可以组合在一起,使结果数据中的每一列只有一列,但我希望

我找不到一个能准确回答我的问题

df1

df2

这是我的数据的一般格式,有多个其他列与合并无关,但需要保留

我想要的是严格合并具有相同“chr”和“position”的行,但还要确保df1中的“effect.exposure”与df2中的“effect.output”或“other”匹配。重要的是,如果“effect.exposure”与“effect.outcome”或“other”不匹配,我希望该行被删除

“chr”和“position”可以组合在一起,使结果数据中的每一列只有一列,但我希望这两个“effect”和“other”列在最终数据表中保持独立

更新:

找到了解决问题的方法。 我的方法是通过“chr”和“position”合并两个数据帧


new.df这是旧答案之一的扩展,其中执行2个
merge
s,然后每个merge的结果是
rbind
。数据的问题是如何合并具有不同列数的结果。您可以使用
tidyr::gather
tidyr::spread
来处理这个问题

你的数据

df1 <- structure(list(chr = c(1L, 2L, 2L, 3L), position = c(12345L, 
54321L, 6789L, 9876L), effect.exposure = c("A", "G", "C", "D"
), misc = c("a", "b", "c", "d")), .Names = c("chr", "position", 
"effect.exposure", "misc"), class = "data.frame", row.names = c(NA, 
-4L))

df2 <- structure(list(chr = c(1L, 2L, 3L, 5L), position = c(12345L, 
54321L, 12314L, 12321L), effect.outcome = c("A", "T", "C", "C"
), other = c("C", "G", "A", "D")), .Names = c("chr", "position", 
"effect.outcome", "other"), class = "data.frame", row.names = c(NA, 
-4L))
df1希望这有帮助

library(dplyr)
final_df <- df1 %>% 
  inner_join(df2, by=c("chr", "position")) %>%
  mutate(Resp_final = if_else((as.character(effect_exposure)==as.character(effect_outcome)) | 
                                (as.character(effect_exposure)==as.character(other)), 1, 0)) %>%
  filter(Resp_final==1) %>%
  select(-Resp_final)
final_df


可能重复感谢您的回复。事实上,我已经想出了一个办法来做我想做的事。可能不是最有效/最高效的,但它可用于此目的。我已经编辑了我的原始问题,以便其他人可以看到。谢谢你的回答。事实证明,我自己已经找到了解决这个问题的方法。如果您有兴趣,请查看原始的帖子编辑。
new.df <- merge(df1, df2, by = c("chr", "position"))
final.df <- new.df[new.df$effect.exposure == new.df$effect.outcome |
                   new.df$effect.exposure == new.df$other, ]
df1 <- structure(list(chr = c(1L, 2L, 2L, 3L), position = c(12345L, 
54321L, 6789L, 9876L), effect.exposure = c("A", "G", "C", "D"
), misc = c("a", "b", "c", "d")), .Names = c("chr", "position", 
"effect.exposure", "misc"), class = "data.frame", row.names = c(NA, 
-4L))

df2 <- structure(list(chr = c(1L, 2L, 3L, 5L), position = c(12345L, 
54321L, 12314L, 12321L), effect.outcome = c("A", "T", "C", "C"
), other = c("C", "G", "A", "D")), .Names = c("chr", "position", 
"effect.outcome", "other"), class = "data.frame", row.names = c(NA, 
-4L))
library(dplyr)
library(tidyr)
result1 <- inner_join(df1, df2, by=c("chr", "position", "effect.exposure" = "effect.outcome")) %>%
              gather(key, value, -chr, -position, -effect.exposure)

  # chr position effect.exposure   key value
# 1   1    12345               A  misc     a
# 2   1    12345               A other     C

result2 <- inner_join(df1, df2, by=c("chr", "position", "effect.exposure" = "other")) %>%
            gather(key, value, -chr, -position, -effect.exposure)

  # chr position effect.exposure            key value
# 1   2    54321               G           misc     b
# 2   2    54321               G effect.outcome     T

ans <- rbind(result1, result2) %>%
            spread(key, value)

  # chr position effect.exposure effect.outcome misc other
# 1   1    12345               A           <NA>    a     C
# 2   2    54321               G              T    b  <NA>
library(dplyr)
final_df <- df1 %>% 
  inner_join(df2, by=c("chr", "position")) %>%
  mutate(Resp_final = if_else((as.character(effect_exposure)==as.character(effect_outcome)) | 
                                (as.character(effect_exposure)==as.character(other)), 1, 0)) %>%
  filter(Resp_final==1) %>%
  select(-Resp_final)
final_df
  chr position effect_exposure col4 effect_outcome other col5
1   1    12345               A Asdf              A     C 1234
2   2    54321               G  Abc              T     G  987
#Sample data
> dput(df1)
structure(list(chr = c(1L, 2L, 2L, 3L), position = c(12345L, 
54321L, 6789L, 9876L), effect_exposure = structure(c(1L, 4L, 
2L, 3L), .Label = c("A", "C", "D", "G"), class = "factor"), col4 = structure(c(2L, 
1L, 4L, 3L), .Label = c("Abc", "Asdf", "qwerty", "xyz"), class = "factor")), .Names = c("chr", 
"position", "effect_exposure", "col4"), class = "data.frame", row.names = c(NA, 
-4L))

> dput(df2)
structure(list(chr = c(1L, 2L, 3L, 5L), position = c(12345L, 
54321L, 12314L, 12321L), effect_outcome = structure(c(1L, 3L, 
2L, 2L), .Label = c("A", "C", "T"), class = "factor"), other = structure(c(2L, 
4L, 1L, 3L), .Label = c("A", "C", "D", "G"), class = "factor"), 
    col5 = c(1234L, 987L, 675L, 3456L)), .Names = c("chr", "position", 
"effect_outcome", "other", "col5"), class = "data.frame", row.names = c(NA, 
-4L))