与r中的条件合并

与r中的条件合并,r,dataframe,dplyr,R,Dataframe,Dplyr,大家好,我需要帮助,以便合并两个数据帧之间的条件信息 以下是2个df: df1 Groups Species sub_group Name 1 G1 B s1 KO 2 G1 B s1 KO 3 G1 B s1 KO 4 G1 B s1 AL 5 G1 B s1 AL 6 G1

大家好,我需要帮助,以便合并两个数据帧之间的条件信息

以下是2个df:

df1

   Groups Species sub_group Name
1      G1       B        s1   KO
2      G1       B        s1   KO
3      G1       B        s1   KO
4      G1       B        s1   AL
5      G1       B        s1   AL
6      G1       B        s1   AL
7      G2       B        s1   KO
8      G2       B        s1   KO
9      G3       A        s2  ZIP
10     G3       A        s2  ZIP
11     G3       A        s3  ZIP
12     G4       C        s4  LOP
13     G4       C        s4  AKA
structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 3L, 3L, 3L, 4L, 4L), .Label = c("G1", "G2", "G3", "G4"), class = "factor"), 
    Species = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"), 
    sub_group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 3L, 4L, 4L), .Label = c("s1", "s2", "s3", "s4"), class = "factor"), 
    Name = structure(c(3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 5L, 5L, 
    5L, 4L, 1L), .Label = c("AKA", "AL", "KO", "LOP", "ZIP"), class = "factor")), class = "data.frame", row.names = c(NA, 
-13L))
df2

 Group Species sub_group Value
1    G1       B        s1 BLOC1
2    G3       A        s2 BLOC2
3    G4       C        s4 BLOC3
structure(list(Group = structure(1:3, .Label = c("G1", "G3", 
"G4"), class = "factor"), Species = structure(c(2L, 1L, 3L), .Label = c("A", 
"B", "C"), class = "factor"), sub_group = structure(1:3, .Label = c("s1", 
"s2", "s4"), class = "factor"), Value = structure(1:3, .Label = c("BLOC1", 
"BLOC2", "BLOC3"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L))
我的想法是,当cols
物种
子组
是相同的
时,我想用
df2$值
“BlocN”
值添加一个
df1$新的
子组
也是相同的
组内有重复的
df1$名称
(组,物种,子组)

如果我们遵循这些规则,我会得到:

   Groups Species sub_group Name New_Col
1      G1       B        s1   KO   BLOC1
2      G1       B        s1   KO   BLOC1
3      G1       B        s1   KO   BLOC1
4      G1       B        s1   AL   BLOC1
5      G1       B        s1   AL   BLOC1
6      G1       B        s1   AL   BLOC1
7      G2       B        s1   KO        
8      G2       B        s1   KO        
9      G3       A        s2  ZIP   BLOC2
10     G3       A        s2  ZIP   BLOC2
11     G3       A        s3  ZIP        
12     G4       C        s4  LOP        
13     G4       C        s4  AKA     
例如,在
G4,C,s4中
df1$Name中没有重复的值

有人有什么想法吗?谢谢你的帮助和时间

以下是dput数据

df1

   Groups Species sub_group Name
1      G1       B        s1   KO
2      G1       B        s1   KO
3      G1       B        s1   KO
4      G1       B        s1   AL
5      G1       B        s1   AL
6      G1       B        s1   AL
7      G2       B        s1   KO
8      G2       B        s1   KO
9      G3       A        s2  ZIP
10     G3       A        s2  ZIP
11     G3       A        s3  ZIP
12     G4       C        s4  LOP
13     G4       C        s4  AKA
structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 3L, 3L, 3L, 4L, 4L), .Label = c("G1", "G2", "G3", "G4"), class = "factor"), 
    Species = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"), 
    sub_group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 3L, 4L, 4L), .Label = c("s1", "s2", "s3", "s4"), class = "factor"), 
    Name = structure(c(3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 5L, 5L, 
    5L, 4L, 1L), .Label = c("AKA", "AL", "KO", "LOP", "ZIP"), class = "factor")), class = "data.frame", row.names = c(NA, 
-13L))
df2

 Group Species sub_group Value
1    G1       B        s1 BLOC1
2    G3       A        s2 BLOC2
3    G4       C        s4 BLOC3
structure(list(Group = structure(1:3, .Label = c("G1", "G3", 
"G4"), class = "factor"), Species = structure(c(2L, 1L, 3L), .Label = c("A", 
"B", "C"), class = "factor"), sub_group = structure(1:3, .Label = c("s1", 
"s2", "s4"), class = "factor"), Value = structure(1:3, .Label = c("BLOC1", 
"BLOC2", "BLOC3"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L))

这里有一个join选项,然后我们将
replace
创建的列替换为
NA
,其中每个组的行数为1

library(dplyr)
left_join(df1, df2, by = c("Groups" = "Group", "Species", "sub_group")) %>% 
    group_by(Groups, Species, sub_group, Name) %>%
    mutate(New_Col = replace(Value, n() == 1, NA_character_), Value = NULL) %>%
    ungroup
-输出

# A tibble: 13 x 5
#   Groups Species sub_group Name  New_Col
#   <fct>  <fct>   <fct>     <fct> <fct>  
# 1 G1     B       s1        KO    BLOC1  
# 2 G1     B       s1        KO    BLOC1  
# 3 G1     B       s1        KO    BLOC1  
# 4 G1     B       s1        AL    BLOC1  
# 5 G1     B       s1        AL    BLOC1  
# 6 G1     B       s1        AL    BLOC1  
# 7 G2     B       s1        KO    <NA>   
# 8 G2     B       s1        KO    <NA>   
# 9 G3     A       s2        ZIP   BLOC2  
#10 G3     A       s2        ZIP   BLOC2  
#11 G3     A       s3        ZIP   <NA>   
#12 G4     C       s4        LOP   <NA>   
#13 G4     C       s4        AKA   <NA>   
#一个tible:13x5
#组种亚组名称新列
#                
#1 G1 B s1 KO BLOC1
#2 G1 B s1 KO BLOC1
#3 G1 B s1 KO BLOC1
#4 G1 B s1铝合金块1
#5 G1 B s1铝合金块1
#6 G1 B s1铝合金块1
#7 G2 B s1 KO
#8 G2 B s1 KO
#9 G3 A s2拉链2
#10 G3 A s2拉链2
#11 G3 s3拉链
#12 G4 C s4 LOP
#13 G4 C s4 AKA

如果第14行为G4、C、s4、AKA,输出应该是什么?因为现在grp中有两个值,一个有重复项,另一个没有重复项