循环R矢量化的有效方法
如何将以下R代码矢量化以减少计算时间循环R矢量化的有效方法,r,vectorization,R,Vectorization,如何将以下R代码矢量化以减少计算时间 q = matrix(0,n,p) for(u in 1 : n){ q1 <- matrix(0,p,1) for(iprime in 1 : n){ for(i in 1 : n){ if(cause[iprime]==1 & cause[i]>1 & (time[i]<time[u]) & (time[u] <= time[iprime])){ q1 =
q = matrix(0,n,p)
for(u in 1 : n){
q1 <- matrix(0,p,1)
for(iprime in 1 : n){
for(i in 1 : n){
if(cause[iprime]==1 & cause[i]>1 & (time[i]<time[u]) & (time[u] <= time[iprime])){
q1 = q1 + (covs[i,] - S1byS0hat[iprime,])*G[iprime]/G[i]*expz[i]/S0hat[iprime]
}
}
}
q[u,] = q1/(m*m)
}
对您的解决方案进行基准测试:
coeff <- 10
n = 20 * coeff
m = 500
p = 3
G = runif(n)
time = runif(n, 0.01, 5)
cause = c(rep(0, 6 * coeff), rep(1, 10 * coeff), rep(2, 4 * coeff))
covs = matrix(rnorm(n * p), n, p)
S1byS0hat = matrix(rnorm(n * p), n, p)
S0hat = rnorm(n)
expz = rnorm(n)
system.time({
q = matrix(0,n,p)
for(u in 1 : n){
q1 <- matrix(0,p,1)
for(iprime in 1 : n){
for(i in 1 : n){
if(cause[iprime]==1 & cause[i]>1 & (time[i]<time[u]) & (time[u] <= time[iprime])){
q1 = q1 + (covs[i,] - S1byS0hat[iprime,])*G[iprime]/G[i]*expz[i]/S0hat[iprime]
}
}
}
q[u,] = q1/(m*m)
}
})
对于系数=10,这需要0.3秒,对于系数=100,这需要6分钟
然后,可以对至少一个循环进行矢量化:
q3 <- matrix(0, n, p)
c1 <- G / S0hat
c2 <- expz / G
covs_c2 <- sweep(covs, 1, c2, '*')
S1byS0hat_c1 <- sweep(S1byS0hat, 1, c1, '*')
for (u in 1:n) {
q1 <- rep(0, p)
ind_iprime <- which(cause == 1 & time[u] <= time)
ind_i <- which(cause > 1 & time < time[u])
for (iprime in ind_iprime) {
q1 <- q1 + colSums(covs_c2[ind_i, , drop = FALSE]) * c1[iprime] -
S1byS0hat_c1[iprime, ] * sum(c2[ind_i])
}
q3[u, ] <- q1
}
q3 <- q3 / (m * m)
q3@F.Prive非常好,谢谢。我确实在Rcpp中编写了代码。这是@F.Prive链接,我用上面的代码和我的代码(在Rcpp中)比较了计算时间。对于coeff=100,您的代码在我的机器上花费了大约11秒,而我的代码在我的机器上花费了大约16秒。如果您对这个答案感到满意,您可以接受它。明天我可以从另一个问题快速查看Rcpp代码。当然可以。如果我们能进一步改进,那就太好了
q2 = matrix(0, n, p)
c1 <- G / S0hat
c2 <- expz / G
for (u in 1:n) {
q1 <- rep(0, p)
ind_iprime <- which(cause == 1 & time[u] <= time)
ind_i <- which(cause > 1 & time < time[u])
for (iprime in ind_iprime) {
for (i in ind_i) {
q1 = q1 + (covs[i, ] - S1byS0hat[iprime, ]) * c1[iprime] * c2[i]
}
}
q2[u, ] = q1
}
q2 <- q2 / (m * m)
q3 <- matrix(0, n, p)
c1 <- G / S0hat
c2 <- expz / G
covs_c2 <- sweep(covs, 1, c2, '*')
S1byS0hat_c1 <- sweep(S1byS0hat, 1, c1, '*')
for (u in 1:n) {
q1 <- rep(0, p)
ind_iprime <- which(cause == 1 & time[u] <= time)
ind_i <- which(cause > 1 & time < time[u])
for (iprime in ind_iprime) {
q1 <- q1 + colSums(covs_c2[ind_i, , drop = FALSE]) * c1[iprime] -
S1byS0hat_c1[iprime, ] * sum(c2[ind_i])
}
q3[u, ] <- q1
}
q3 <- q3 / (m * m)