R 将具有NAs的长格式数据与宽格式完整数据合并以覆盖NAs_R_Merge_Melt

R 将具有NAs的长格式数据与宽格式完整数据合并以覆盖NAs

r merge

R 将具有NAs的长格式数据与宽格式完整数据合并以覆盖NAs,r,merge,melt,R,Merge,Melt,所以我有三个数据集需要合并。其中包含学校数据以及4年级和5年级的阅读/数学成绩。其中一个是长格式的数据集，在一些变量中有很多缺失是的，我确实需要长格式的数据，而另外两个则有宽格式的完整缺失数据。所有这些数据帧都包含一列，该列对于数据库中的每个个体都具有唯一的ID号这里是一个完整的可复制示例，它生成了一个数据类型的小示例。我正在处理的帧。。。我需要使用的三个数据帧如下：school\u lf、school 4和school 5。school\u lf具有NAs的长格式数据，school 4和sc

所以我有三个数据集需要合并。其中包含学校数据以及4年级和5年级的阅读/数学成绩。其中一个是长格式的数据集，在一些变量中有很多缺失是的，我确实需要长格式的数据，而另外两个则有宽格式的完整缺失数据。所有这些数据帧都包含一列，该列对于数据库中的每个个体都具有唯一的ID号

这里是一个完整的可复制示例，它生成了一个数据类型的小示例。我正在处理的帧。。。我需要使用的三个数据帧如下：school\u lf、school 4和school 5。school\u lf具有NAs的长格式数据，school 4和school 5是我需要使用的dfs，以按id和年级填充此长格式数据中的NA

非常感谢您的帮助！我已经尝试解决这个问题好几个小时了，但没有取得任何进展，所以我想在这里问一下

您可以使用dplyr中的coalesce函数。如果第一个向量中的值为NA，它将查看第二个向量中相同位置的值是否为NA，然后选择它。如果再次出现NA，则转到第三个

library(dplyr)
sch %>% mutate(mathscore = coalesce(mathscore, math4, math5)) %>%
   mutate(readscore = coalesce(readscore, read4, read5)) %>% 
   select(id:readscore)

编辑：我只是尝试在实际数据上使用这种方法，但它不起作用，因为替换数据也有一些NAs，因此，我尝试合并的dfs具有不同的行数。。。回到原点

我能够用下面的代码来理解这一点，尽管它不是最优雅或最直接的代码，@Edwin的回答帮助我找到了正确的方向。任何关于如何使这段代码更加优雅和高效的建议都是非常受欢迎的

# Idea: put both in long form and stack on top of one another... then merge like that!

sch4r <- as.data.frame(subset(school4, select = -c(mathscore)))
sch4m <- as.data.frame(subset(school4, select = -c(readscore)))

sch5r <- as.data.frame(subset(school5, select = -c(mathscore)))
sch5m <- as.data.frame(subset(school5, select = -c(readscore)))


# Put these in LF
sch4r_lf <- melt(sch4r, id.vars = c("id", "selected", "grade"), value.name = "readscore")
sch4m_lf <- melt(sch4m, id.vars = c("id", "selected", "grade"), value.name = "mathscore")

sch5r_lf <- melt(sch5r, id.vars = c("id", "selected", "grade"), value.name = "readscore")
sch5m_lf <- melt(sch5m, id.vars = c("id", "selected", "grade"), value.name = "mathscore")

# Combine in one DF
sch_full_4 <-cbind(sch4r_lf, sch4m_lf$mathscore)
sch_full_4$mathscore <- sch_full_4$`sch4m_lf$mathscore`
sch_full_4$`sch4m_lf$mathscore` <- NULL # deletes
sch_full_4$variable <- NULL

sch_full_5 <- cbind(sch5r_lf, sch5m$mathscore)
sch_full_5$mathscore <- sch_full_5$`sch5m$mathscore`
sch_full_5$`sch5m$mathscore` <- NULL
sch_full_5$variable <- NULL

# Stack together
sch_full <- rbind(sch_full_4,sch_full_5)
sch_full$selected <- NULL # delete this column...

# MERGE together
final_school_math <- mutate(school_lf, mathscore = coalesce(school_lf$mathscore, sch_full$mathscore))
final_school_read <- mutate(school_lf, readscore = coalesce(school_lf$readscore, sch_full$readscore))

final_df <- cbind(final_school_math, final_school_read$readscore)
final_df$readscore <- final_df$`final_school_read$readscore`
final_df$`final_school_read$readscore` <- NULL

谢谢你的回复！这个函数看起来很简洁，但我很难理解它在做什么。。。我试着用样本数据运行它，它给了我这个错误：mutate_impl.data中的错误，dots:object'math4'找不到那么你可能在另一个数据集上运行它。它只是说math4不在应用函数的de数据集中。

library(dplyr)
sch %>% mutate(mathscore = coalesce(mathscore, math4, math5)) %>%
   mutate(readscore = coalesce(readscore, read4, read5)) %>% 
   select(id:readscore)

# Idea: put both in long form and stack on top of one another... then merge like that!

sch4r <- as.data.frame(subset(school4, select = -c(mathscore)))
sch4m <- as.data.frame(subset(school4, select = -c(readscore)))

sch5r <- as.data.frame(subset(school5, select = -c(mathscore)))
sch5m <- as.data.frame(subset(school5, select = -c(readscore)))


# Put these in LF
sch4r_lf <- melt(sch4r, id.vars = c("id", "selected", "grade"), value.name = "readscore")
sch4m_lf <- melt(sch4m, id.vars = c("id", "selected", "grade"), value.name = "mathscore")

sch5r_lf <- melt(sch5r, id.vars = c("id", "selected", "grade"), value.name = "readscore")
sch5m_lf <- melt(sch5m, id.vars = c("id", "selected", "grade"), value.name = "mathscore")

# Combine in one DF
sch_full_4 <-cbind(sch4r_lf, sch4m_lf$mathscore)
sch_full_4$mathscore <- sch_full_4$`sch4m_lf$mathscore`
sch_full_4$`sch4m_lf$mathscore` <- NULL # deletes
sch_full_4$variable <- NULL

sch_full_5 <- cbind(sch5r_lf, sch5m$mathscore)
sch_full_5$mathscore <- sch_full_5$`sch5m$mathscore`
sch_full_5$`sch5m$mathscore` <- NULL
sch_full_5$variable <- NULL

# Stack together
sch_full <- rbind(sch_full_4,sch_full_5)
sch_full$selected <- NULL # delete this column...

# MERGE together
final_school_math <- mutate(school_lf, mathscore = coalesce(school_lf$mathscore, sch_full$mathscore))
final_school_read <- mutate(school_lf, readscore = coalesce(school_lf$readscore, sch_full$readscore))

final_df <- cbind(final_school_math, final_school_read$readscore)
final_df$readscore <- final_df$`final_school_read$readscore`
final_df$`final_school_read$readscore` <- NULL