R 基于行号将列/行从一个数据帧映射到另一个数据帧
我有两个数据帧:R 基于行号将列/行从一个数据帧映射到另一个数据帧,r,indexing,rows,R,Indexing,Rows,我有两个数据帧: df1<-structure(list(Name = c("sub7", "sub7", "sub7", "sub7", "sub7", "sub7", "sub7", "sub7", "sub7", "sub7"), StimulusName = c("Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1",
df1<-structure(list(Name = c("sub7", "sub7", "sub7", "sub7", "sub7",
"sub7", "sub7", "sub7", "sub7", "sub7"), StimulusName = c("Alpha1",
"Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1", "Alpha1",
"Alpha1", "Alpha1"), PupilLeft = c(10.046, 10.05, 10.062, 10.072,
10.072, 10.056, 10.056, 10.056, 10.066, 10.066)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
df2<-structure(list(Name = c("sub7", "sub7"), StimulusName = c("Alpha1",
"Alpha1"), Row_Num = c(1, 3), Label = c("Onset", "Offset")), row.names = c(NA,
-2L), vars = "Name", drop = TRUE, indices = list(0:1), group_sizes = 2L, biggest_group_size = 2L, labels = structure(list(
Name = "Guilty Subject 07"), row.names = c(NA, -1L), class = "data.frame", vars = "Name", drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
合并怎么样
df2 <- data.frame(df2, stringsAsFactors = F)
df3 <- merge(df1,df2)
> df3
Name StimulusName PupilLeft Row_Num Label
1 sub7 Alpha1 10.046 1 Onset
2 sub7 Alpha1 10.046 3 Offset
3 sub7 Alpha1 10.050 1 Onset
4 sub7 Alpha1 10.050 3 Offset
5 sub7 Alpha1 10.062 1 Onset
6 sub7 Alpha1 10.062 3 Offset
7 sub7 Alpha1 10.072 1 Onset
8 sub7 Alpha1 10.072 3 Offset
9 sub7 Alpha1 10.072 1 Onset
10 sub7 Alpha1 10.072 3 Offset
11 sub7 Alpha1 10.056 1 Onset
12 sub7 Alpha1 10.056 3 Offset
13 sub7 Alpha1 10.056 1 Onset
14 sub7 Alpha1 10.056 3 Offset
15 sub7 Alpha1 10.056 1 Onset
16 sub7 Alpha1 10.056 3 Offset
17 sub7 Alpha1 10.066 1 Onset
18 sub7 Alpha1 10.066 3 Offset
19 sub7 Alpha1 10.066 1 Onset
20 sub7 Alpha1 10.066 3 Offset
df2另一个选项是在合并之前在df1
中创建Row_Num
df_out <- merge(
transform(df1, Row_Num = seq_len(nrow(df1))),
df2,
by = c("Name", "StimulusName", "Row_Num"),
all.x = TRUE)
df_out$Row_Num <- ifelse(df_out$Row_Num %in% df2$Row_Num, df_out$Row_Num, NA)
df_out
# Name StimulusName Row_Num PupilLeft Label
#1 sub7 Alpha1 1 10.046 Onset
#2 sub7 Alpha1 NA 10.050 <NA>
#3 sub7 Alpha1 3 10.062 Offset
#4 sub7 Alpha1 NA 10.072 <NA>
#5 sub7 Alpha1 NA 10.072 <NA>
#6 sub7 Alpha1 NA 10.056 <NA>
#7 sub7 Alpha1 NA 10.056 <NA>
#8 sub7 Alpha1 NA 10.056 <NA>
#9 sub7 Alpha1 NA 10.066 <NA>
#10 sub7 Alpha1 NA 10.066 <NA>
df_out使用tidyverse
library(tidyverse)
df1 %>%
mutate(Row_Num = row_number()) %>%
left_join(df2) %>%
mutate(Row_Num = replace(Row_Num, !Row_Num %in% c(1, 3), NA))
# A tibble: 10 x 5
# Name StimulusName PupilLeft Row_Num Label
# <chr> <chr> <dbl> <dbl> <chr>
# 1 sub7 Alpha1 10.0 1 Onset
# 2 sub7 Alpha1 10.0 NA <NA>
# 3 sub7 Alpha1 10.1 3 Offset
# 4 sub7 Alpha1 10.1 NA <NA>
# 5 sub7 Alpha1 10.1 NA <NA>
# 6 sub7 Alpha1 10.1 NA <NA>
# 7 sub7 Alpha1 10.1 NA <NA>
# 8 sub7 Alpha1 10.1 NA <NA>
# 9 sub7 Alpha1 10.1 NA <NA>
#10 sub7 Alpha1 10.1 NA <NA>
或者使用匹配frombase R
i1 <- match(row.names(df1), df2$Row_Num)
df1[names(df2)[3:4]] <- lapply(df2[3:4], `[`, i1)
df1
# A tibble: 10 x 5
# Name StimulusName PupilLeft Row_Num Label
# <chr> <chr> <dbl> <dbl> <chr>
# 1 sub7 Alpha1 10.0 1 Onset
# 2 sub7 Alpha1 10.0 NA <NA>
# 3 sub7 Alpha1 10.1 3 Offset
# 4 sub7 Alpha1 10.1 NA <NA>
# 5 sub7 Alpha1 10.1 NA <NA>
# 6 sub7 Alpha1 10.1 NA <NA>
# 7 sub7 Alpha1 10.1 NA <NA>
# 8 sub7 Alpha1 10.1 NA <NA>
# 9 sub7 Alpha1 10.1 NA <NA>
#10 sub7 Alpha1 10.1 NA <NA>
i1如果我们只通过Row_Num
加入,那么我们可以:
rownames(df2) <- df2$Row_Num
merge(df1, df2, by=0, all.x=TRUE)
Row.names Name.x StimulusName.x PupilLeft Name.y StimulusName.y Row_Num Label
1 1 sub7 Alpha1 10.046 sub7 Alpha1 1 Onset
2 10 sub7 Alpha1 10.066 <NA> <NA> NA <NA>
3 2 sub7 Alpha1 10.050 <NA> <NA> NA <NA>
4 3 sub7 Alpha1 10.062 sub7 Alpha1 3 Offset
5 4 sub7 Alpha1 10.072 <NA> <NA> NA <NA>
6 5 sub7 Alpha1 10.072 <NA> <NA> NA <NA>
7 6 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
8 7 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
9 8 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
10 9 sub7 Alpha1 10.066 <NA> <NA> NA <NA>
使论证不那么隐晦。
谢谢你花时间考虑我的请求@ GPURE。但是,我希望使用df2中“Row_Num”列中的数值映射到df1中相应的行。请看我的示例完成的数据帧,它与您的不同。我喜欢将此想法作为一个简单的解决方案,但是,我希望只将df2中“Row_Num”中的值显示在df1中的新列“Row_Num”中。这是为了进行目视检查/确认。谢谢。这通过OP不想要的StimulusName
和Name
连接起来。例如,如果df2在StimulusName
中有“Alpha2”作为一个值,这将有问题。当我回答时,我看了看他的预期输出。嗨@akrun,谢谢你的回答。此解决方案是否依赖于手动将行号输入dplyr链?@Docconcoct不幸的是,我们必须创建一个列,因为如果查看这两个数据集,两个列中常见的“名称”和“刺激名称”是重复的。如果我们通过这些列连接,则会导致行的重复,因为它是非特定的join@akrun也许我看错了,但看起来OP只是想按行数加入。不是名称
和刺激名称
。因此,我们将有类似于Name.y
和StimulusName.y
@Docconcoct的列。您可以检查基于匹配的方法。它可能是基于您所评论的逻辑的。您是否也通过Name
和StimulusName
加入,而不仅仅是Row_Num
?不,我只是想将“Row_Num”中的值与df1中相应的行号进行匹配。我不介意“名称”和“刺激名称”是否被保留下来,但这不是必须的。
rownames_to_column(df1, "Row_Num") %>%
mutate(Row_Num = as.numeric(Row_Num)) %>%
left_join(., df2 %>%
ungroup %>%
select(Row_Num, Label), by = "Row_Num") %>%
mutate(Row_Num = replace(Row_Num, !Row_Num %in% c(1, 3), NA))
i1 <- match(row.names(df1), df2$Row_Num)
df1[names(df2)[3:4]] <- lapply(df2[3:4], `[`, i1)
df1
# A tibble: 10 x 5
# Name StimulusName PupilLeft Row_Num Label
# <chr> <chr> <dbl> <dbl> <chr>
# 1 sub7 Alpha1 10.0 1 Onset
# 2 sub7 Alpha1 10.0 NA <NA>
# 3 sub7 Alpha1 10.1 3 Offset
# 4 sub7 Alpha1 10.1 NA <NA>
# 5 sub7 Alpha1 10.1 NA <NA>
# 6 sub7 Alpha1 10.1 NA <NA>
# 7 sub7 Alpha1 10.1 NA <NA>
# 8 sub7 Alpha1 10.1 NA <NA>
# 9 sub7 Alpha1 10.1 NA <NA>
#10 sub7 Alpha1 10.1 NA <NA>
rownames(df2) <- df2$Row_Num
merge(df1, df2, by=0, all.x=TRUE)
Row.names Name.x StimulusName.x PupilLeft Name.y StimulusName.y Row_Num Label
1 1 sub7 Alpha1 10.046 sub7 Alpha1 1 Onset
2 10 sub7 Alpha1 10.066 <NA> <NA> NA <NA>
3 2 sub7 Alpha1 10.050 <NA> <NA> NA <NA>
4 3 sub7 Alpha1 10.062 sub7 Alpha1 3 Offset
5 4 sub7 Alpha1 10.072 <NA> <NA> NA <NA>
6 5 sub7 Alpha1 10.072 <NA> <NA> NA <NA>
7 6 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
8 7 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
9 8 sub7 Alpha1 10.056 <NA> <NA> NA <NA>
10 9 sub7 Alpha1 10.066 <NA> <NA> NA <NA>
merge(df1, df2, by="row.names", all.x=TRUE)