R 根据另一个df的值,为一个df中的多个列填写NA值
我有两个dfs。在一个数据帧中有两个变量的NA值,我想用另一个df中的值替换它们。以下是我的示例数据:R 根据另一个df的值,为一个df中的多个列填写NA值,r,dplyr,R,Dplyr,我有两个dfs。在一个数据帧中有两个变量的NA值,我想用另一个df中的值替换它们。以下是我的示例数据: df1 id Sex Race Income 1 M White 1 2 NA Hispanic 2 3 NA NA 3 我希望数据看起来像这样,性别和种族的df1的NA值由df2的值填充 df2 id Sex Race Income 1 M White
df1
id Sex Race Income
1 M White 1
2 NA Hispanic 2
3 NA NA 3
我希望数据看起来像这样,性别和种族的df1的NA值由df2的值填充
df2
id Sex Race Income
1 M White 1
2 F Hispanic 2
3 M White 3
4 F Black NA
有人能帮忙吗?我们可以在这里加入
library(data.table)
setDT(df2)[df1, Income := Income, on = .(id)]
-输出
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
# id Sex Race Income
#1 1 M White 1
#2 2 F Hispanic 2
#3 3 M White 3
#4 4 F Black NA
如果我们需要在非NA元素之间选择“性别”、“种族”
nm1 <- names(df2)[-1]
setDT(df2)[df1, c(nm1, 'Income') := c(Map(fcoalesce,
.SD[, nm1, with = FALSE], mget(paste0('i.', nm1))), list(Income)), on = .(id)]
或者使用
tidyverse
,只使用dplyr
功能
library(dplyr)
left_join(df2, df1, by = 'id') %>%
transmute(id, Sex = coalesce(Sex.x, Sex.y),
Race = coalesce(Race.x, Race.y),
Income)
-输出
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
# id Sex Race Income
#1 1 M White 1
#2 2 F Hispanic 2
#3 3 M White 3
#4 4 F Black NA
数据
df1我们可以在这里使用连接
library(data.table)
setDT(df2)[df1, Income := Income, on = .(id)]
-输出
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
# id Sex Race Income
#1 1 M White 1
#2 2 F Hispanic 2
#3 3 M White 3
#4 4 F Black NA
如果我们需要在非NA元素之间选择“性别”、“种族”
nm1 <- names(df2)[-1]
setDT(df2)[df1, c(nm1, 'Income') := c(Map(fcoalesce,
.SD[, nm1, with = FALSE], mget(paste0('i.', nm1))), list(Income)), on = .(id)]
或者使用tidyverse
,只使用dplyr
功能
library(dplyr)
left_join(df2, df1, by = 'id') %>%
transmute(id, Sex = coalesce(Sex.x, Sex.y),
Race = coalesce(Race.x, Race.y),
Income)
-输出
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
df2
# id Sex Race Income
#1: 1 M White 1
#2: 2 F Hispanic 2
#3: 3 M White 3
#4: 4 F Black NA
# id Sex Race Income
#1 1 M White 1
#2 2 F Hispanic 2
#3 3 M White 3
#4 4 F Black NA
数据
df1Atidyverse
方法可以在将两个数据帧整形为长(使用众所周知的pivot\u longer()
)然后再整形为宽(使用pivot\u wider()
)以获得预期结果后使用连接。代码如下:
library(tidyverse)
#Code
newdf <- df2 %>%
mutate(across(-id,~as.character(.))) %>%
pivot_longer(-id) %>%
full_join(df1 %>%
mutate(across(-id,~as.character(.))) %>%
pivot_longer(-id) %>% rename(value2=value)) %>%
mutate(value=ifelse(is.na(value),value2,value)) %>% select(-value2) %>%
pivot_wider(names_from = name,values_from=value) %>%
mutate(Income=as.numeric(Income))
库(tidyverse)
#代码
新DF%
变异(跨(-id,~as.character(.))%>%
枢轴长度(-id)%>%
完全联接(df1%>%
变异(跨(-id,~as.character(.))%>%
pivot_longer(-id)%%>%重命名(value2=value))%%>%
mutate(value=ifelse(is.na(value),value2,value))%%>%select(-value2)%%>%
枢轴宽度(名称从=名称,值从=值)%>%
变化(收入=数字(收入))
输出:
# A tibble: 4 x 4
id Sex Race Income
<int> <chr> <chr> <dbl>
1 1 M White 1
2 2 F Hispanic 2
3 3 M White 3
4 4 F Black NA
#一个tible:4 x 4
性别种族收入
1米白色1
2 F西班牙裔2
3米白色3
4 F黑色NA
使用的一些数据:
#Data 1
df1 <- structure(list(id = 1:3, Sex = c("M", NA, NA), Race = c("White",
"Hispanic", NA), Income = 1:3), class = "data.frame", row.names = c(NA,
-3L))
#Data 2
df2 <- structure(list(id = 1:4, Sex = c("M", "F", "M", "F"), Race = c("White",
"Hispanic", "White", "Black")), class = "data.frame", row.names = c(NA,
-4L))
#数据1
df1Atidyverse
方法可以在将两个数据帧整形为长(使用众所周知的pivot\u longer()
)然后再整形为宽(使用pivot\u wider()
)以获得预期结果后使用连接。代码如下:
library(tidyverse)
#Code
newdf <- df2 %>%
mutate(across(-id,~as.character(.))) %>%
pivot_longer(-id) %>%
full_join(df1 %>%
mutate(across(-id,~as.character(.))) %>%
pivot_longer(-id) %>% rename(value2=value)) %>%
mutate(value=ifelse(is.na(value),value2,value)) %>% select(-value2) %>%
pivot_wider(names_from = name,values_from=value) %>%
mutate(Income=as.numeric(Income))
库(tidyverse)
#代码
新DF%
变异(跨(-id,~as.character(.))%>%
枢轴长度(-id)%>%
完全联接(df1%>%
变异(跨(-id,~as.character(.))%>%
pivot_longer(-id)%%>%重命名(value2=value))%%>%
mutate(value=ifelse(is.na(value),value2,value))%%>%select(-value2)%%>%
枢轴宽度(名称从=名称,值从=值)%>%
变化(收入=数字(收入))
输出:
# A tibble: 4 x 4
id Sex Race Income
<int> <chr> <chr> <dbl>
1 1 M White 1
2 2 F Hispanic 2
3 3 M White 3
4 4 F Black NA
#一个tible:4 x 4
性别种族收入
1米白色1
2 F西班牙裔2
3米白色3
4 F黑色NA
使用的一些数据:
#Data 1
df1 <- structure(list(id = 1:3, Sex = c("M", NA, NA), Race = c("White",
"Hispanic", NA), Income = 1:3), class = "data.frame", row.names = c(NA,
-3L))
#Data 2
df2 <- structure(list(id = 1:4, Sex = c("M", "F", "M", "F"), Race = c("White",
"Hispanic", "White", "Black")), class = "data.frame", row.names = c(NA,
-4L))
#数据1
df1使用merge
subset(
merge(df1, df2, by = "id", all.y = TRUE),
select = c("id", "Sex.y", "Race.y", "Income")
)
给
id Sex.y Race.y Income
1 1 M White 1
2 2 F Hispanic 2
3 3 M White 3
4 4 F Black NA
使用merge
subset(
merge(df1, df2, by = "id", all.y = TRUE),
select = c("id", "Sex.y", "Race.y", "Income")
)
给
id Sex.y Race.y Income
1 1 M White 1
2 2 F Hispanic 2
3 3 M White 3
4 4 F Black NA
这不是由
'id'(基于您显示的数据)单独进行的连接吗?这不是由
'id'(基于您显示的数据)单独进行的连接吗?