与r中的条件合并
大家好,我需要帮助,以便合并两个数据帧之间的条件信息 以下是2个df: df1与r中的条件合并,r,dataframe,dplyr,R,Dataframe,Dplyr,大家好,我需要帮助,以便合并两个数据帧之间的条件信息 以下是2个df: df1 Groups Species sub_group Name 1 G1 B s1 KO 2 G1 B s1 KO 3 G1 B s1 KO 4 G1 B s1 AL 5 G1 B s1 AL 6 G1
Groups Species sub_group Name
1 G1 B s1 KO
2 G1 B s1 KO
3 G1 B s1 KO
4 G1 B s1 AL
5 G1 B s1 AL
6 G1 B s1 AL
7 G2 B s1 KO
8 G2 B s1 KO
9 G3 A s2 ZIP
10 G3 A s2 ZIP
11 G3 A s3 ZIP
12 G4 C s4 LOP
13 G4 C s4 AKA
structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 3L, 3L, 3L, 4L, 4L), .Label = c("G1", "G2", "G3", "G4"), class = "factor"),
Species = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"),
sub_group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 3L, 4L, 4L), .Label = c("s1", "s2", "s3", "s4"), class = "factor"),
Name = structure(c(3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 5L, 5L,
5L, 4L, 1L), .Label = c("AKA", "AL", "KO", "LOP", "ZIP"), class = "factor")), class = "data.frame", row.names = c(NA,
-13L))
df2
Group Species sub_group Value
1 G1 B s1 BLOC1
2 G3 A s2 BLOC2
3 G4 C s4 BLOC3
structure(list(Group = structure(1:3, .Label = c("G1", "G3",
"G4"), class = "factor"), Species = structure(c(2L, 1L, 3L), .Label = c("A",
"B", "C"), class = "factor"), sub_group = structure(1:3, .Label = c("s1",
"s2", "s4"), class = "factor"), Value = structure(1:3, .Label = c("BLOC1",
"BLOC2", "BLOC3"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L))
我的想法是,当cols组
,物种
,子组
是相同的时,我想用df2$值
“BlocN”
值添加一个df1$新的
,子组
也是相同的组内有重复的df1$名称
(组,物种,子组)
如果我们遵循这些规则,我会得到:
Groups Species sub_group Name New_Col
1 G1 B s1 KO BLOC1
2 G1 B s1 KO BLOC1
3 G1 B s1 KO BLOC1
4 G1 B s1 AL BLOC1
5 G1 B s1 AL BLOC1
6 G1 B s1 AL BLOC1
7 G2 B s1 KO
8 G2 B s1 KO
9 G3 A s2 ZIP BLOC2
10 G3 A s2 ZIP BLOC2
11 G3 A s3 ZIP
12 G4 C s4 LOP
13 G4 C s4 AKA
例如,在G4,C,s4中df1$Name中没有重复的值
有人有什么想法吗?谢谢你的帮助和时间
以下是dput数据
df1
Groups Species sub_group Name
1 G1 B s1 KO
2 G1 B s1 KO
3 G1 B s1 KO
4 G1 B s1 AL
5 G1 B s1 AL
6 G1 B s1 AL
7 G2 B s1 KO
8 G2 B s1 KO
9 G3 A s2 ZIP
10 G3 A s2 ZIP
11 G3 A s3 ZIP
12 G4 C s4 LOP
13 G4 C s4 AKA
structure(list(Groups = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 3L, 3L, 3L, 4L, 4L), .Label = c("G1", "G2", "G3", "G4"), class = "factor"),
Species = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 3L, 3L), .Label = c("A", "B", "C"), class = "factor"),
sub_group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 3L, 4L, 4L), .Label = c("s1", "s2", "s3", "s4"), class = "factor"),
Name = structure(c(3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 5L, 5L,
5L, 4L, 1L), .Label = c("AKA", "AL", "KO", "LOP", "ZIP"), class = "factor")), class = "data.frame", row.names = c(NA,
-13L))
df2
Group Species sub_group Value
1 G1 B s1 BLOC1
2 G3 A s2 BLOC2
3 G4 C s4 BLOC3
structure(list(Group = structure(1:3, .Label = c("G1", "G3",
"G4"), class = "factor"), Species = structure(c(2L, 1L, 3L), .Label = c("A",
"B", "C"), class = "factor"), sub_group = structure(1:3, .Label = c("s1",
"s2", "s4"), class = "factor"), Value = structure(1:3, .Label = c("BLOC1",
"BLOC2", "BLOC3"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L))
这里有一个join选项,然后我们将replace
创建的列替换为NA
,其中每个组的行数为1
library(dplyr)
left_join(df1, df2, by = c("Groups" = "Group", "Species", "sub_group")) %>%
group_by(Groups, Species, sub_group, Name) %>%
mutate(New_Col = replace(Value, n() == 1, NA_character_), Value = NULL) %>%
ungroup
-输出
# A tibble: 13 x 5
# Groups Species sub_group Name New_Col
# <fct> <fct> <fct> <fct> <fct>
# 1 G1 B s1 KO BLOC1
# 2 G1 B s1 KO BLOC1
# 3 G1 B s1 KO BLOC1
# 4 G1 B s1 AL BLOC1
# 5 G1 B s1 AL BLOC1
# 6 G1 B s1 AL BLOC1
# 7 G2 B s1 KO <NA>
# 8 G2 B s1 KO <NA>
# 9 G3 A s2 ZIP BLOC2
#10 G3 A s2 ZIP BLOC2
#11 G3 A s3 ZIP <NA>
#12 G4 C s4 LOP <NA>
#13 G4 C s4 AKA <NA>
#一个tible:13x5
#组种亚组名称新列
#
#1 G1 B s1 KO BLOC1
#2 G1 B s1 KO BLOC1
#3 G1 B s1 KO BLOC1
#4 G1 B s1铝合金块1
#5 G1 B s1铝合金块1
#6 G1 B s1铝合金块1
#7 G2 B s1 KO
#8 G2 B s1 KO
#9 G3 A s2拉链2
#10 G3 A s2拉链2
#11 G3 s3拉链
#12 G4 C s4 LOP
#13 G4 C s4 AKA
如果第14行为G4、C、s4、AKA,输出应该是什么?因为现在grp中有两个值,一个有重复项,另一个没有重复项