合并两个data.frame,将所有匹配行保留在R中
我正在努力合并两个data.frame,其中一个df中出现na值合并两个data.frame,将所有匹配行保留在R中,r,R,我正在努力合并两个data.frame,其中一个df中出现na值 sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L, GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp",
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1",
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15",
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 33L, class = "data.frame")
我会认为:
output <- merge(A,B,by="Nom_xp",all.x=T,all.y=T)
output您只有一行吗?那么,这还不够吗?您可以在sampleB
中获得以下结果:
sampleB[, is.na(sampleB)] <- sampleA[, is.na(sampleB)]
sampleB[,is.na(sampleB)]不能完全确定您的整个数据集是什么样子,但我假设您可以有几个具有相同“Nom_xp”的样本,而不仅仅是2个?你可能把所有的数据都放在一个大数据框里
如果是这样的话,也许这段代码是一个好的开始(也许有人可以帮忙,重新编写这段代码会更有效率?)。无论如何:
不,我有很多行。我没有想过使用其他东西,但加入或合并。。。我可能走错方向了…如果Samam辩诉和sampleB在相同的顺序中有相同的行,这就行了?是的,你是对的。我建议这样做是因为OP之前对执行apply
感兴趣,这让我认为他是在逐行比较替换值。我认为逐行比较是可能的,因为两个数据帧具有相同顺序的相同行数。明天早上我会尽快试一试。又来了一堆!如果忽略NA的话,Sam辩诉和sampleB中的行是否完全相同?
output <- join(A,B,by="Nom_xp",match="all")
sampleB[, is.na(sampleB)] <- sampleA[, is.na(sampleB)]
sampleB[is.na(sampleB)] <- sampleA[is.na(sampleB)]
sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_,
GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L,
GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1","GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15","GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 32L, class = "data.frame")
sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L,
GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp","Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1" ), row.names = 33L, class = "data.frame")
sampleC <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = NA, GB05.1 = NA,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 12349L,
GB27.1 = 3, GB24 = 234112, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = NA, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L,
GB10.1 = 134L, GB14 = NA, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 34L, class = "data.frame")
sampleD <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = 214, GB05.1 = 34,
GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L,
GB27.1 = 3, GB24 = NA, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L,
GB15 = 56, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 15466L,
GB10.1 = 134L, GB14 = 34, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 35L, class = "data.frame")
cdat<-rbind(sampleA,sampleB,sampleC,sampleD) #simulating your data set (?)
dcols<-dim(cdat)[2]
mat<-matrix(nrow=length(unique(cdat$Nom_xp)),ncol=dcols)
colnames(mat)<-colnames(cdat)
for (j in 1:length(unique(cdat$Nom_xp)))
{
g<-grep(unique(cdat$Nom_xp)[j],cdat$Nom_xp) #Get the Nom_xp rows that match
mat[j,1]<-cdat[g[1],1] #Fill in the "Nom_xp"
mat[j,2]<-paste(g,collapse=" ") #Fill in the "rep"
mat[j,3:dcols]<-apply(cdat[g,3:dcols],2, #Calculate a mean for each column
function(x){as.numeric(mean(x,na.rm=T))})
}