在R中创建此矩阵的最快方法
我有两个向量在R中创建此矩阵的最快方法,r,R,我有两个向量r和s。我想找出这两个数组的外部差异,而不是像下面这样的负值 r = rnorm(100000) s = c(0.02, 0.04, 0.3, 0.43, 0.5, 0.7, 0.8, 0.9) res = t(pmax(outer(r, s, "-"), 0)) system.time({ res = t(pmax(outer(r, s, "-"), 0)) }) ## system elapsed ## 0.05 0.00 0.05 或 如何使R中的结果x更快?
r
和s
。我想找出这两个数组的外部差异,而不是像下面这样的负值
r = rnorm(100000)
s = c(0.02, 0.04, 0.3, 0.43, 0.5, 0.7, 0.8, 0.9)
res = t(pmax(outer(r, s, "-"), 0))
system.time({
res = t(pmax(outer(r, s, "-"), 0))
})
## system elapsed
## 0.05 0.00 0.05
或
如何使R中的结果x更快?通过分别运行
outer
函数和下面这样的子设置zero<0
项,我可以获得稍快的性能
res1 <- t( outer( r , s , "-" ) )
res1[ res1 < 0 ] <- 0
然后使用如下函数:
gtzero( r , s )
这比使用outer
和pmax
快约6倍,比outer
然后[
子集设置快3倍:
require( microbenchmark )
bm <- microbenchmark( eval( rose.baseR ) , eval( simon.baseR ) , eval( simon.Rcpp ) )
print( bm , "relative" , order = "median" , digits = 2 )
#Unit: relative
# expr min lq median uq max neval
# eval(simon.Rcpp) 1 1.0 1.0 1.0 1.0 100
# eval(simon.baseR) 3 3.1 3.2 3.2 1.5 100
# eval(rose.baseR) 3 3.4 6.0 5.9 1.8 100
计算了以下表达式:
set.seed(123)
r = rnorm(100000)
s = c(0.02, 0.04, 0.3, 0.43, 0.5, 0.7, 0.8, 0.9)
rose.baseR <- quote({
res0 <- t(pmax(outer(r, s, "-"), 0))
})
simon.baseR <- quote({
res1 <- outer( r , s , "-" )
res1[ res1 < 0 ] <- 0
})
simon.Rcpp <- quote({
res2 <- gtzero(r,s)
})
set.seed(123)
r=rnorm(100000)
s=c(0.02,0.04,0.3,0.43,0.5,0.7,0.8,0.9)
rose.baseR以下是@thelatemail的评论:
fun1 <- function(r,s) t(pmax(outer(r, s, "-"), 0))
fun2 <- function(r,s) {
x = pmax(r - rep(s, each = length(r)), 0)
matrix(x, nrow = length(s), byrow = TRUE)
}
fun3 <- function(r,s) {
dr <- length(r)
ds <- length(s)
R <- rep(s, rep.int(length(r), length(s)))
S <- rep(r, times = ceiling(length(s)/length(r)))
res <- pmax(S - R, 0)
dim(res) <- c(dr, ds)
t(res)
}
library(microbenchmark)
microbenchmark(res1 <- fun1(r,s),
res2 <- fun2(r,s),
res3 <- fun3(r,s),
times=20)
# Unit: milliseconds
# expr min lq median uq max neval
# res1 <- fun1(r, s) 43.28387 46.68182 66.03417 78.78109 83.75569 20
# res2 <- fun2(r, s) 50.52941 54.36576 56.77067 60.87218 91.14043 20
# res3 <- fun3(r, s) 34.18374 35.37835 37.97405 40.10642 70.78626 20
identical(res1, res3)
#[1] TRUE
fun1也许你可以用文字来描述你想做什么你可能可以在outer
的内部挖掘,只提取那些实际上进行计算的位,而不需要检查。不确定它是否真的值得你节省几秒钟的时间。Rcpp解决方案(封装在返回结果的R函数调用中)似乎比fun3
快3倍。
identical( res0 , res2 )
#[1] TRUE
set.seed(123)
r = rnorm(100000)
s = c(0.02, 0.04, 0.3, 0.43, 0.5, 0.7, 0.8, 0.9)
rose.baseR <- quote({
res0 <- t(pmax(outer(r, s, "-"), 0))
})
simon.baseR <- quote({
res1 <- outer( r , s , "-" )
res1[ res1 < 0 ] <- 0
})
simon.Rcpp <- quote({
res2 <- gtzero(r,s)
})
fun1 <- function(r,s) t(pmax(outer(r, s, "-"), 0))
fun2 <- function(r,s) {
x = pmax(r - rep(s, each = length(r)), 0)
matrix(x, nrow = length(s), byrow = TRUE)
}
fun3 <- function(r,s) {
dr <- length(r)
ds <- length(s)
R <- rep(s, rep.int(length(r), length(s)))
S <- rep(r, times = ceiling(length(s)/length(r)))
res <- pmax(S - R, 0)
dim(res) <- c(dr, ds)
t(res)
}
library(microbenchmark)
microbenchmark(res1 <- fun1(r,s),
res2 <- fun2(r,s),
res3 <- fun3(r,s),
times=20)
# Unit: milliseconds
# expr min lq median uq max neval
# res1 <- fun1(r, s) 43.28387 46.68182 66.03417 78.78109 83.75569 20
# res2 <- fun2(r, s) 50.52941 54.36576 56.77067 60.87218 91.14043 20
# res3 <- fun3(r, s) 34.18374 35.37835 37.97405 40.10642 70.78626 20
identical(res1, res3)
#[1] TRUE