R 根据另一个df的值,为一个df中的多个列填写NA值

R 根据另一个df的值,为一个df中的多个列填写NA值,r,dplyr,R,Dplyr,我有两个dfs。在一个数据帧中有两个变量的NA值,我想用另一个df中的值替换它们。以下是我的示例数据: df1 id Sex Race Income 1 M White 1 2 NA Hispanic 2 3 NA NA 3 我希望数据看起来像这样,性别和种族的df1的NA值由df2的值填充 df2 id Sex Race Income 1 M White

我有两个dfs。在一个数据帧中有两个变量的NA值,我想用另一个df中的值替换它们。以下是我的示例数据:

df1
id    Sex    Race     Income
1     M      White      1
2     NA     Hispanic   2
3     NA     NA         3
我希望数据看起来像这样,性别和种族的df1的NA值由df2的值填充

df2
id    Sex    Race      Income
1     M      White       1
2     F      Hispanic    2
3     M      White       3
4     F      Black       NA
有人能帮忙吗?

我们可以在这里加入

library(data.table)
setDT(df2)[df1, Income := Income, on = .(id)]
-输出

df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
#  id Sex     Race Income
#1  1   M    White      1
#2  2   F Hispanic      2
#3  3   M    White      3
#4  4   F    Black     NA

如果我们需要在非NA元素之间选择“性别”、“种族”

nm1 <- names(df2)[-1]
setDT(df2)[df1, c(nm1, 'Income') := c(Map(fcoalesce, 
  .SD[, nm1, with = FALSE], mget(paste0('i.', nm1))), list(Income)), on = .(id)]

或者使用
tidyverse
,只使用
dplyr
功能

library(dplyr)
left_join(df2, df1, by = 'id') %>% 
  transmute(id,  Sex = coalesce(Sex.x, Sex.y),
                Race = coalesce(Race.x, Race.y),
           Income)
-输出

df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
#  id Sex     Race Income
#1  1   M    White      1
#2  2   F Hispanic      2
#3  3   M    White      3
#4  4   F    Black     NA
数据
df1我们可以在这里使用连接

library(data.table)
setDT(df2)[df1, Income := Income, on = .(id)]
-输出

df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
#  id Sex     Race Income
#1  1   M    White      1
#2  2   F Hispanic      2
#3  3   M    White      3
#4  4   F    Black     NA

如果我们需要在非NA元素之间选择“性别”、“种族”

nm1 <- names(df2)[-1]
setDT(df2)[df1, c(nm1, 'Income') := c(Map(fcoalesce, 
  .SD[, nm1, with = FALSE], mget(paste0('i.', nm1))), list(Income)), on = .(id)]

或者使用
tidyverse
,只使用
dplyr
功能

library(dplyr)
left_join(df2, df1, by = 'id') %>% 
  transmute(id,  Sex = coalesce(Sex.x, Sex.y),
                Race = coalesce(Race.x, Race.y),
           Income)
-输出

df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
df2
#   id Sex     Race Income
#1:  1   M    White      1
#2:  2   F Hispanic      2
#3:  3   M    White      3
#4:  4   F    Black     NA
#  id Sex     Race Income
#1  1   M    White      1
#2  2   F Hispanic      2
#3  3   M    White      3
#4  4   F    Black     NA
数据
df1A
tidyverse
方法可以在将两个数据帧整形为长(使用众所周知的
pivot\u longer()
)然后再整形为宽(使用
pivot\u wider()
)以获得预期结果后使用连接。代码如下:

library(tidyverse)
#Code
newdf <- df2 %>% 
  mutate(across(-id,~as.character(.))) %>%
  pivot_longer(-id) %>%
  full_join(df1 %>% 
              mutate(across(-id,~as.character(.))) %>%
              pivot_longer(-id) %>% rename(value2=value)) %>%
  mutate(value=ifelse(is.na(value),value2,value)) %>% select(-value2) %>%
  pivot_wider(names_from = name,values_from=value) %>%
  mutate(Income=as.numeric(Income))
库(tidyverse)
#代码
新DF%
变异(跨(-id,~as.character(.))%>%
枢轴长度(-id)%>%
完全联接(df1%>%
变异(跨(-id,~as.character(.))%>%
pivot_longer(-id)%%>%重命名(value2=value))%%>%
mutate(value=ifelse(is.na(value),value2,value))%%>%select(-value2)%%>%
枢轴宽度(名称从=名称,值从=值)%>%
变化(收入=数字(收入))
输出:

# A tibble: 4 x 4
     id Sex   Race     Income
  <int> <chr> <chr>     <dbl>
1     1 M     White         1
2     2 F     Hispanic      2
3     3 M     White         3
4     4 F     Black        NA
#一个tible:4 x 4
性别种族收入
1米白色1
2 F西班牙裔2
3米白色3
4 F黑色NA
使用的一些数据:

#Data 1
df1 <- structure(list(id = 1:3, Sex = c("M", NA, NA), Race = c("White", 
"Hispanic", NA), Income = 1:3), class = "data.frame", row.names = c(NA, 
-3L))

#Data 2
df2 <- structure(list(id = 1:4, Sex = c("M", "F", "M", "F"), Race = c("White", 
"Hispanic", "White", "Black")), class = "data.frame", row.names = c(NA, 
-4L))
#数据1

df1A
tidyverse
方法可以在将两个数据帧整形为长(使用众所周知的
pivot\u longer()
)然后再整形为宽(使用
pivot\u wider()
)以获得预期结果后使用连接。代码如下:

library(tidyverse)
#Code
newdf <- df2 %>% 
  mutate(across(-id,~as.character(.))) %>%
  pivot_longer(-id) %>%
  full_join(df1 %>% 
              mutate(across(-id,~as.character(.))) %>%
              pivot_longer(-id) %>% rename(value2=value)) %>%
  mutate(value=ifelse(is.na(value),value2,value)) %>% select(-value2) %>%
  pivot_wider(names_from = name,values_from=value) %>%
  mutate(Income=as.numeric(Income))
库(tidyverse)
#代码
新DF%
变异(跨(-id,~as.character(.))%>%
枢轴长度(-id)%>%
完全联接(df1%>%
变异(跨(-id,~as.character(.))%>%
pivot_longer(-id)%%>%重命名(value2=value))%%>%
mutate(value=ifelse(is.na(value),value2,value))%%>%select(-value2)%%>%
枢轴宽度(名称从=名称,值从=值)%>%
变化(收入=数字(收入))
输出:

# A tibble: 4 x 4
     id Sex   Race     Income
  <int> <chr> <chr>     <dbl>
1     1 M     White         1
2     2 F     Hispanic      2
3     3 M     White         3
4     4 F     Black        NA
#一个tible:4 x 4
性别种族收入
1米白色1
2 F西班牙裔2
3米白色3
4 F黑色NA
使用的一些数据:

#Data 1
df1 <- structure(list(id = 1:3, Sex = c("M", NA, NA), Race = c("White", 
"Hispanic", NA), Income = 1:3), class = "data.frame", row.names = c(NA, 
-3L))

#Data 2
df2 <- structure(list(id = 1:4, Sex = c("M", "F", "M", "F"), Race = c("White", 
"Hispanic", "White", "Black")), class = "data.frame", row.names = c(NA, 
-4L))
#数据1

df1使用
merge

subset(
  merge(df1, df2, by = "id", all.y = TRUE),
  select = c("id", "Sex.y", "Race.y", "Income")
)

  id Sex.y   Race.y Income
1  1     M    White      1
2  2     F Hispanic      2
3  3     M    White      3
4  4     F    Black     NA

使用
merge

subset(
  merge(df1, df2, by = "id", all.y = TRUE),
  select = c("id", "Sex.y", "Race.y", "Income")
)

  id Sex.y   Race.y Income
1  1     M    White      1
2  2     F Hispanic      2
3  3     M    White      3
4  4     F    Black     NA

这不是由
'id'(基于您显示的数据)单独进行的连接吗?这不是由
'id'(基于您显示的数据)单独进行的连接吗?