R 向量元素上的向量化循环_R_Loops_Vector_Vectorization

R 向量元素上的向量化循环

r loops vector

R 向量元素上的向量化循环,r,loops,vector,vectorization,R,Loops,Vector,Vectorization,我发现很难找到以下问题的快速解决方案：我有一个观察向量，它表示观察某些现象的时间 example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0); example我确信有人会找到更好的pure-R解决方案，但我的第一次尝试是只使用1个循环，如下所示： x <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0) last <- x[1] for (i in seq_along(x))

我发现很难找到以下问题的快速解决方案：

我有一个观察向量，它表示观察某些现象的时间

example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0);

example我确信有人会找到更好的pure-R解决方案，但我的第一次尝试是只使用1个循环，如下所示：
x <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0)

last <- x[1]
for (i in seq_along(x)) {
   if (x[i] == 0) x[i] <- last
   else last <- x[i] 
}

x
## [1]  0  0  0  1  1  1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1  1  1  1

x我怀疑您的0
值实际上是NA值。在这里，我让它们NA
，然后使用package zoo的NA.locf
（上一次观察结转）：
example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0)
res <- example
#res[res==0] <- NA
#the same but faster
res <- res/res*res
library(zoo)
res <- na.locf(res,  na.rm = FALSE)
res[is.na(res)] <- 0
cbind(example, res)
#       example res
#  [1,]       0   0
#  [2,]       0   0
#  [3,]       0   0
#  [4,]       1   1
#  [5,]       0   1
#  [6,]       1   1
#  [7,]       1   1
#  [8,]       0   1
#  [9,]       0   1
# [10,]       0   1
# [11,]      -1  -1
# [12,]       0  -1
# [13,]       0  -1
# [14,]      -1  -1
# [15,]      -1  -1
# [16,]       0  -1
# [17,]       0  -1
# [18,]       1   1
# [19,]       0   1
# [20,]       0   1

示例我将尝试提供纯R解决方案：
example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0);

cs = cumsum(example!=0);
mch = match(cs, cs);
desired.output = example[mch];

print(cbind(example,desired.output))

UPD2：我喜欢@Roland的答案。它可以缩短为两行：
NN = (example != 0);
desired.output = c(example[1], example[NN])[cumsum(NN) + 1L];

伟大的基准。虽然我不希望打败Rcpp，但请你也测试一下我最后的代码好吗？谢谢，很棒的两行程序。真的很好。我建议不要使用“L”作为对象，因为有人肯定会把L
和1L
：-）FWIW，根据microbenchmark，你的findInterval
的“UPD”比UPD2”快4%左右`
set.seed(123L)
x <- sample(c(-1,0,1), replace=TRUE, 100000)
# ...
microbenchmark::microbenchmark(
   gagolews(x),
   gagolews_Rcpp(x),
   Roland(x),
   AndreyShabalin_match(x),
   AndreyShabalin_findInterval(x),
   AndreyShabalin_cumsum(x),
   unit="relative"
)
## Unit: relative
##                            expr        min         lq     median         uq        max neval
##                     gagolews(x) 167.264538 163.172532 162.703810 171.186482 110.604258   100
##                gagolews_Rcpp(x)   1.000000   1.000000   1.000000   1.000000   1.000000   100
##                       Roland(x)  33.817744  34.374521  34.544877  35.633136  52.825091   100
##         AndreyShabalin_match(x)  45.217805  43.819050  44.105279  44.800612  58.375625   100
##  AndreyShabalin_findInterval(x)  45.191419  43.832256  44.283284  45.094304  23.819259   100
##        AndreyShabalin_cumsum(x)   8.701682   8.367212   8.413992   9.938748   5.676467   100

example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0)
res <- example
#res[res==0] <- NA
#the same but faster
res <- res/res*res
library(zoo)
res <- na.locf(res,  na.rm = FALSE)
res[is.na(res)] <- 0
cbind(example, res)
#       example res
#  [1,]       0   0
#  [2,]       0   0
#  [3,]       0   0
#  [4,]       1   1
#  [5,]       0   1
#  [6,]       1   1
#  [7,]       1   1
#  [8,]       0   1
#  [9,]       0   1
# [10,]       0   1
# [11,]      -1  -1
# [12,]       0  -1
# [13,]       0  -1
# [14,]      -1  -1
# [15,]      -1  -1
# [16,]       0  -1
# [17,]       0  -1
# [18,]       1   1
# [19,]       0   1
# [20,]       0   1

example <- c(0,0,0,1,0,1,1,0,0,0,-1,0,0,-1,-1,0,0,1,0,0);

cs = cumsum(example!=0);
mch = match(cs, cs);
desired.output = example[mch];

print(cbind(example,desired.output))

mch = findInterval(cs-1,cs)+1

NN = (example != 0);
desired.output = c(example[1], example[NN])[cumsum(NN) + 1L];