在R中更新foreach循环中的光栅值?(光栅时间序列NA插补)

在R中更新foreach循环中的光栅值?(光栅时间序列NA插补),r,foreach,parallel-processing,gis,parallel-foreach,R,Foreach,Parallel Processing,Gis,Parallel Foreach,我试图在时间序列光栅中输入NA值。以下是我的数据的可复制示例: library(raster) library(rgdal) library(doParallel) library(foreach) r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow=100, ncol=100) values(r1) <- runif(ncell(r1)) values(r2) <- runif(ncell(r2)) values(r3

我试图在时间序列光栅中输入NA值。以下是我的数据的可复制示例:

library(raster)
library(rgdal)
library(doParallel)
library(foreach)

r1 <- r2 <- r3 <- r4 <- r5 <- raster(nrow=100, ncol=100)
values(r1) <- runif(ncell(r1))
values(r2) <- runif(ncell(r2))
values(r3) <- runif(ncell(r3))
values(r4) <- runif(ncell(r4))
values(r5) <- runif(ncell(r5))

s <- stack(r1, r2, r3, r4, r5)
time_series <- brick(s)
time_series[1, 30][2] <- NA
time_series[3, 20][3] <- NA
time_series[5, 10][5] <- NA
time_series[8, 40][4] <- NA

如果我没有将foreach循环指定给对象,它只会导出结果。我希望在最后更新光栅对象,但无法找到问题的解决方案

我找不到如何设置矩阵值光栅对象-设置值不起作用,可能是因为尺寸不同,如下所示:

> dim(time_series_new)
[1] 100 100   5

> dim(time_series_new2)
[1] 10000     5

我知道foreach循环的工作方式不同。有没有一种方法可以在foreach循环中更新time_series_new2对象,以便在最后更新光栅对象

编辑:

setValues()实际上可以工作!作为:

time_series_new3 <- time_series

time_series_new3[] <- NA #empty raster object

time_series_new3 <- setValues(time_series_new3, time_series_new2) #filled with matrix rendered from foreach loop

> time_series_new3
class      : RasterBrick 
dimensions : 100, 100, 10000, 5  (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8  (x, y)
extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
source     : memory
names      :      layer.1,      layer.2,      layer.3,      layer.4,      layer.5 
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05 
max values :    0.9999564,    0.9999854,    0.9997795,    0.9999780,    0.9997880 

> time_series_new2
class      : RasterBrick 
dimensions : 100, 100, 10000, 5  (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8  (x, y)
extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
source     : memory
names      :      layer.1,      layer.2,      layer.3,      layer.4,      layer.5 
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05 
max values :    0.9999564,    0.9999854,    0.9997795,    0.9999780,    0.9997880

> all.equal(time_series_new2, time_series_new3)
[1] TRUE

time_series_new3 all.equal(time_series_new2,time_series_new3)
[1] 真的

尽管如此,我还是想知道如何在foreach中进行更新。

foreach
循环中,您没有更新rasterBrick
time\u series\u new
的奢侈副作用。也就是说,
time\u series\u new
知道它是什么-一种
光栅
类型的对象。
rbind
组合将非data.frames强制转换为
矩阵
。这就是100x100x5变成10000x5的原因

我假设您将进行
并行
计算,因为
for
循环的速度很慢。如果是这样,我建议以不同的方式处理问题,尤其是在没有太多缺失值的情况下

我们首先可以看到实际上有多少行缺少数据:

missing_dat_rows <- which(is.na(getValues(time_series)) == T, arr.ind = T)[, 1]
missing_dat_rows <- unique(missing_dat_rows)

missing_dat_rows
#[1]  30 220 740 410

嗨,谢谢你详尽的回答!不幸的是,我处理的是大型卫星图像,它们将有巨大的差距,因此需要并行方法。我将尝试在更方便的时间将您的方法转换为并行方法。请参见以下答案:
> dim(time_series_new)
[1] 100 100   5

> dim(time_series_new2)
[1] 10000     5

time_series_new3 <- time_series

time_series_new3[] <- NA #empty raster object

time_series_new3 <- setValues(time_series_new3, time_series_new2) #filled with matrix rendered from foreach loop

> time_series_new3
class      : RasterBrick 
dimensions : 100, 100, 10000, 5  (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8  (x, y)
extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
source     : memory
names      :      layer.1,      layer.2,      layer.3,      layer.4,      layer.5 
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05 
max values :    0.9999564,    0.9999854,    0.9997795,    0.9999780,    0.9997880 

> time_series_new2
class      : RasterBrick 
dimensions : 100, 100, 10000, 5  (nrow, ncol, ncell, nlayers)
resolution : 3.6, 1.8  (x, y)
extent     : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
source     : memory
names      :      layer.1,      layer.2,      layer.3,      layer.4,      layer.5 
min values : 1.468023e-04, 3.525158e-04, 9.689084e-05, 5.349121e-05, 4.214607e-05 
max values :    0.9999564,    0.9999854,    0.9997795,    0.9999780,    0.9997880

> all.equal(time_series_new2, time_series_new3)
[1] TRUE

missing_dat_rows <- which(is.na(getValues(time_series)) == T, arr.ind = T)[, 1]
missing_dat_rows <- unique(missing_dat_rows)

missing_dat_rows
#[1]  30 220 740 410
time_series3 <- time_series
for (mis_row in missing_dat_rows) {
  values(time_series3)[mis_row, ] <- impute.loess(getValues(time_series3)[mis_row, ])
}
impute.loess <- function(y, x.length = NULL, s = 0.80, 
                         smooth.data = FALSE, ...) {
  if(is.null(x.length)) { x.length = length(y) }
  options(warn = -1)

  x <- 1:x.length
  if (all(is.na(y))| all(!is.na(y))) { #added the or statement - I don't think we want to do this if there are no missing values.
    return(y)
  } else {
    p <- loess(y ~ x, span = s, data.frame(x = x, y = y))
    if(smooth.data == TRUE) {
      y <- predict(p, x)
    } else {
      na.idx <- which( is.na(y) )
      # if( length(na.idx) > 1 ) { #commented out - I feel as though we should be replacing all NAs
        y[na.idx] <- predict(p, data.frame(x = na.idx))
      # }
    }   
    return(y)
  }
}