R 高效填充矩阵

R 高效填充矩阵,r,matrix,dataframe,dplyr,reshape2,R,Matrix,Dataframe,Dplyr,Reshape2,我有数据显示,在一组医院中,对一组受试者使用一组药物治疗的结果进行了测量。(#药物>受试者>医院) 现在我想构建一个矩阵,其中每一行都是唯一的医院受试者组合,每一列都是唯一的医院药物组合。以下可能是构建此矩阵的一种效率不高的方法: df$hospital.subject <- paste(df$hospital,df$subject,sep=":") df$hospital.drug <- paste(df$hospital,df$drug,sep=":") hospital.su

我有数据显示,在一组医院中,对一组受试者使用一组药物治疗的结果进行了测量。(#药物>受试者>医院)

现在我想构建一个
矩阵
,其中每一行都是唯一的
医院
受试者
组合,每一列都是唯一的
医院
药物组合。以下可能是构建此矩阵的一种效率不高的方法:

df$hospital.subject <- paste(df$hospital,df$subject,sep=":")
df$hospital.drug <- paste(df$hospital,df$drug,sep=":")

hospital.subject <- unique(paste(df$hospital,df$subject,sep=":"))
hospital.drug <- unique(paste(df$hospital,df$drug,sep=":"))

mat <- do.call(rbind,lapply(hospital.subject, function(x){
  hospital.subject.df <- dplyr::filter(df,hospital.subject==x)
  res <- rep(NA,length(hospital.drug))
  match.idx <- match(hospital.drug,hospital.subject.df$hospital.drug)
  res[which(!is.na(match.idx))] <- hospital.subject.df$outcome[match.idx[which(!is.na(match.idx))]]
  return(res)
}))
rownames(mat) <- hospital.subject
colnames(mat) <- hospital.drug
有没有更有效、更优雅的方法来实现这一点

还有一件事,我的真实数据没有本例中的数据组织得那么好,因为每家医院的受试者人数不尽相同,而且还有多家医院的受试者使用相同的药物治疗。

这就是你想要的吗

df$hos.sub=paste(df$hospital,df$subject)
df$hos.dru=paste(df$hospital,df$drug)

ind1 <- list(factor(df$hos.sub),factor(df$hos.dru))
res<-tapply(df[,"outcome"],ind1,mean)
head(res[,1:10])

> head(res[,1:9])
           H_1 D_1  H_1 D_10 H_1 D_100 H_1 D_1000 H_1 D_101  H_1 D_102 H_1 D_103 H_1 D_104 H_1 D_105
H_1 S_1  26.550866 83.189899  6.516364   45.77171  6.471249 26.6257392  81.14044  9.088058  67.64499
H_1 S_10  6.178627  4.288589 45.675309   77.90078  3.338293 95.5751769  92.02642 49.810641  14.31814
H_1 S_2  37.212390 76.684275 27.743618   21.32599 67.661240 66.0476814  82.46891 97.271288  88.86986
H_1 S_3  57.285336 27.278032 60.041069   55.22206 73.537169 21.2416518  91.60083 85.267414  95.01507
H_1 S_4  90.820779 18.816330 27.314448   13.21052 11.129967  0.5266102  72.34151 49.899330  91.69972
H_1 S_5  20.168193 22.576183 94.148905   44.60504  4.665462 10.2902506  91.02545 27.440370  90.51900
df$hos.sub=粘贴(df$hospital,df$subject)
df$hos.dru=粘贴(df$医院,df$药物)
这就是你想要的吗

df$hos.sub=paste(df$hospital,df$subject)
df$hos.dru=paste(df$hospital,df$drug)

ind1 <- list(factor(df$hos.sub),factor(df$hos.dru))
res<-tapply(df[,"outcome"],ind1,mean)
head(res[,1:10])

> head(res[,1:9])
           H_1 D_1  H_1 D_10 H_1 D_100 H_1 D_1000 H_1 D_101  H_1 D_102 H_1 D_103 H_1 D_104 H_1 D_105
H_1 S_1  26.550866 83.189899  6.516364   45.77171  6.471249 26.6257392  81.14044  9.088058  67.64499
H_1 S_10  6.178627  4.288589 45.675309   77.90078  3.338293 95.5751769  92.02642 49.810641  14.31814
H_1 S_2  37.212390 76.684275 27.743618   21.32599 67.661240 66.0476814  82.46891 97.271288  88.86986
H_1 S_3  57.285336 27.278032 60.041069   55.22206 73.537169 21.2416518  91.60083 85.267414  95.01507
H_1 S_4  90.820779 18.816330 27.314448   13.21052 11.129967  0.5266102  72.34151 49.899330  91.69972
H_1 S_5  20.168193 22.576183 94.148905   44.60504  4.665462 10.2902506  91.02545 27.440370  90.51900
df$hos.sub=粘贴(df$hospital,df$subject)
df$hos.dru=粘贴(df$医院,df$药物)

ind1我不认为这是我在问题中描述的方式,我不认为这是我在问题中描述的方式
for(h in 1:length(hospitals)){
  row.idx <- which(grepl(paste0(hospitals[h],":"),hospital.subject)==T)
  col.idx <- which(grepl(paste0(hospitals[h],":"),hospital.drug)==T)
  for(i in 1:length(col.idx)){
    drug <- strsplit(hospital.drug[col.idx[i]],split=":")[[1]][2]
    impute.idx <- which(grepl(paste0(":",drug,"$"),hospital.drug,perl=T)==T)[-col.idx[i]]
    mat[row.idx,impute.idx] <- rnorm(length(row.idx)*length(impute.idx),mean=median(mat[row.idx,col.idx[i]]),sd=mad(mat[row.idx,col.idx[i]]))
  }
}
df$hos.sub=paste(df$hospital,df$subject)
df$hos.dru=paste(df$hospital,df$drug)

ind1 <- list(factor(df$hos.sub),factor(df$hos.dru))
res<-tapply(df[,"outcome"],ind1,mean)
head(res[,1:10])

> head(res[,1:9])
           H_1 D_1  H_1 D_10 H_1 D_100 H_1 D_1000 H_1 D_101  H_1 D_102 H_1 D_103 H_1 D_104 H_1 D_105
H_1 S_1  26.550866 83.189899  6.516364   45.77171  6.471249 26.6257392  81.14044  9.088058  67.64499
H_1 S_10  6.178627  4.288589 45.675309   77.90078  3.338293 95.5751769  92.02642 49.810641  14.31814
H_1 S_2  37.212390 76.684275 27.743618   21.32599 67.661240 66.0476814  82.46891 97.271288  88.86986
H_1 S_3  57.285336 27.278032 60.041069   55.22206 73.537169 21.2416518  91.60083 85.267414  95.01507
H_1 S_4  90.820779 18.816330 27.314448   13.21052 11.129967  0.5266102  72.34151 49.899330  91.69972
H_1 S_5  20.168193 22.576183 94.148905   44.60504  4.665462 10.2902506  91.02545 27.440370  90.51900