foreach和dopar不创建对象,但返回NULL
我在用Maxent做一些空间分析。我有一个很长的脚本,其中包含许多在列表中收集的输出。它与for循环和光栅堆栈中的低分辨率气候预测器(在我的core i5,6gb笔记本中)配合得很好。但是我需要使用一组高分辨率的光栅,所有的问题都来自这个问题。即使使用16核、32gb的虚拟机,处理过程也非常缓慢,3天后,内存不足,在我的循环中运行了大约50圈(共92种)后,运行就结束了。我试图通过收集垃圾来清理内存并使用doParallel来改进这个脚本。在新脚本使用低分辨率预测器干净地运行之后,我将尝试使用高分辨率预测器 因此,我将脚本更改为使用foreach和dopar不创建对象,但返回NULL,r,memory,foreach,parallel-processing,doparallel,R,Memory,Foreach,Parallel Processing,Doparallel,我在用Maxent做一些空间分析。我有一个很长的脚本,其中包含许多在列表中收集的输出。它与for循环和光栅堆栈中的低分辨率气候预测器(在我的core i5,6gb笔记本中)配合得很好。但是我需要使用一组高分辨率的光栅,所有的问题都来自这个问题。即使使用16核、32gb的虚拟机,处理过程也非常缓慢,3天后,内存不足,在我的循环中运行了大约50圈(共92种)后,运行就结束了。我试图通过收集垃圾来清理内存并使用doParallel来改进这个脚本。在新脚本使用低分辨率预测器干净地运行之后,我将尝试使用高
foreach
而不是for
,并使用%dopar%
但到目前为止,我得到的结果是:
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
我看到了关于同一问题的另一个问题,但这家伙需要的非常简单的解决方案不适用于我。。所以,任何提示都是非常受欢迎的
#install.packages("dismo")
library(dismo)
#install.packages("scales")
library(scales)
#install.packages("rgdal")
library(rgdal)
#install.packages("rgeos")
library(rgeos)
#install.packages("rJava")
library(rJava)
#install.packages("foreach")
library(foreach)
#install.packages("doParallel")
library(doParallel)
#Colors to use in the plots
MyRbw2<-c('#f4f4f4','#3288bd','#66c2a5','#e6f598','#fee08b','#f46d43','#9e0142')
colfunc_myrbw2<-colorRampPalette(MyRbw2)
#Create empty lists to recieve outputs
xm_list<-list()
xm_spc_list<-list()
e_spc_list<-list()
px_spc_list<-list()
tr_spc_list<-list()
spc_pol1<-list()
spc_pol5<-list()
tr<-list()
#Create empty data frame to recieve treshold values for each species
tr_df<-data.frame(matrix(NA, nrow=92, ncol=7))
tr_df[,1]<-as.character(tree_list)
names(tr_df)<- c('spp',"kappa","spec_sens","no_omission","prevalence","equal_sens_spec","sensitivity")
# Assigning objects to run Maxent
data_points <- tree_cd_points # this is a list with SpatialPoints for 92 species
data_list <- tree_list # list with the species names
counts_data<- counts_tree_cd # number of points for each species
predictors2<-predictors_low # rasterStack of Bioclim layers (climatic variables), low resolution
#Stablishing extent for Maxent predictions
xmin=-120; xmax=-35; ymin2=-40; ymax=35
limits2 <- c(xmin, xmax, ymin2, ymax)
# Making the cluster for doParallel
cores<-detectCores() # I have 16
cl <- makeCluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl)
#Just to keep track of time
ptime1 <- proc.time()
pdf("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/treesp_maxent_20170823.pdf",
paper = "letter", height = 11, width=8,5, pointsize=12,pagecentre = TRUE)
#I have 92 species, but I'll run just the first 4 to test
foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar% { #Runs only species with 5 or more points to avoid maxent problems
if (counts_data$n[i]>4) { #If the species has more than 4 occurrence points, run maxent
tryCatch({ #makes the loop go on despite errors
#Sets train, test and total points for Maxent
group <- kfold(x=data_points[[i]], 5)
pres_train<- data_points[[i]][group != 1, ]
pres_test <- data_points[[i]][group == 1, ]
spoints<- data_points[[i]]
#Sets background points for Maxent
backg <- randomPoints(predictors2, n=20000, ext=limits2, extf = 1.25)
colnames(backg) = c('lon', 'lat')
group <- kfold(backg, 5)
backg_train <- backg[group != 1, ]
backg_test <- backg[group == 1, ]
#The maxent itself (put the xm in the empty list that I created earlier to store all xms)
xm_spc_list[[i]] <- maxent(x=predictors2, p=spoints, a=backg ,
factors='ecoreg',
args=c('visible=true',
'betamultiplier=1',
'randomtestpoints=20',
'randomseed=true',
'linear=true',
'quadratic=true',
'product=true',
'hinge=true',
'threads=4',
'responsecurves=true',
'jackknife=true',
'removeduplicates=false',
'extrapolate=true',
'pictures=true',
'cache=true',
'maximumiterations=5000',
'askoverwrite=false'),
path=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i]), overwrite=TRUE)
par(mfrow=c(1,1),mar = c(2,2, 2, 2))
plot(xm_spc_list[[i]], main=paste(data_list[i]))
response(xm_spc_list[[i]])
#Evaluating how good is the model and putting the evaluation values in a list
e_spc_list[[i]] <- evaluate(pres_test, backg_test, xm_spc_list[[i]], predictors2)
#Predicting the climatic envelopes and Sending to a list os predictions
px_spc_list[[i]] <- predict(predictors2, xm_spc_list[[i]], ext=limits2, progress='text',
filename=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd"), overwrite=TRUE)
tr_df[i,2:7]<-threshold(e_spc_list[[i]])
tr[[i]]<-threshold(e_spc_list[[i]], 'spec_sens')
#Pol 1 will be the regular polygon, default treshold
spc_pol1[[i]] <- rasterToPolygons(px_spc_list[[i]]>tr[[i]],function(x) x == 1,dissolve=T)
writeOGR(obj = spc_pol1[[i]], dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm1/",data_list[i]), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
#Pol 5 will be a 100km^2 circle around the occurrence points
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
#To write de polygon to a file, the function writeOGR needs an object SPDF, so...
#Getting Polygon IDs
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
#Making the IDs row names
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
# Make spatial polygon data frame
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
#Save the polygon, finally
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i]), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
spc_pol5[[i]]<-circ_SPDF
#Now the plots
par(mfrow=c(2,3),mar = c(2,1, 1, 1))
plot(px_spc_list[[i]], axes=FALSE, legend=TRUE, legend.shrink=1, col=colfunc_myrbw2(20), main=paste((data_list[i]),' - Maxent'))
plot(wrld_cropped,add=TRUE, border='dark grey',axes=FALSE)
points(data_points[[i]], pch=21,col="white", bg='hotpink', lwd=0.5, cex=0.7)
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main='px>tr')
plot(spc_pol1[[i]] , main=paste((data_list[i]),' - Range'), add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8),axes=FALSE)
points(data_points[[i]], pch="°",col="black", cex=0.7)
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=paste(data_list[i],"circles"))
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
}, error=function(e){cat("Warning message:",conditionMessage(e), "\n")})
#But sometimes, even with >4 occurrence points, Maxent fails...
#So I'll make sure that if I have >4 points but maxent didn't work, I get the circles anyway
f<-paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd", sep="")
gc() #Just collecting garbage to speed up the process
if (!file.exists(f)){ # then, if f (maxent output) doesn't exist, create the circles at least
spoints<- data_points[[i]]
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
#To write de polygon to a file, the function writeOGR needs an object SPDF, so...
#Getting Polygon IDs
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
#Making the IDs row names
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
# Make spatial polygon data frame
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
#Save the polygon, finally
#dir.create(paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""))
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
spc_pol5[[i]]<-circ_SPDF
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
#plot(spoints,pch=21,col="white", bg='hotpink', lwd=0.1, cex=0.5, add=TRUE)
}
} else { #If the species does not have more than 4 points,
#do not run maxent, but create a circles polygon
spoints<- data_points[[i]]
#For the circle to have 100km2, d should be 5641.9 ...
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
par(mfrow=c(1,1),mar = c(2,2, 2, 2))
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
spc_pol5[[i]]<-circ_SPDF
gc() #collecting garbage before a nuw run
}
}
dev.off()
dev.off() #to close that pdf I started before the loop
ptime2<- proc.time() - ptime1 #just checking the time
ptime2
#安装程序包(“dismo”)
图书馆(dismo)
#安装软件包(“天平”)
图书馆(比例尺)
#安装程序包(“rgdal”)
图书馆(rgdal)
#安装程序包(“rgeos”)
图书馆(rgeos)
#安装软件包(“rJava”)
图书馆(rJava)
#install.packages(“foreach”)
图书馆(foreach)
#安装软件包(“doParallel”)
图书馆(双平行)
#要在绘图中使用的颜色
MyRbw2您可以调用foreach
指定“收集器”变量,如:
results您可以调用foreach
指定“收集器”变量,如:
results在请求帮助时,您应该提供一个可用于运行和测试的数据。这是太多的代码,要求别人看。尽量简化你的问题。您过去是否成功使用过foreach
?也许从小事做起。您希望从每次迭代中保存什么?我不确定我是否看到你的循环返回任何东西的地方。不,我以前从未使用过foreach。。。我花了20个小时来学习,因为我对R很陌生,但我什么都没有得到。。。对于for循环,每次迭代都会给我一个maxent对象、一个maxent求值、一个针对该maxent的tresholds集、一个预测、预测的多边形和第二个多边形(更简单)。所有这些内容都被发送到列表中,并调用相应的绘图,以便在循环之前以PDF格式打印出来。我的脚本在for循环中运行良好,但在foreach中没有成功。。。我将尝试简化我的问题,并找出如何使其重现。谢谢大家!@泰语如果你不懂foreach,这可能会让你感兴趣。很好,这是一个学习foreach的好方法!谢谢你,Privé!在请求帮助时,您应该提供一个可用于运行和测试的数据。这是太多的代码,要求别人看。尽量简化你的问题。您过去是否成功使用过foreach
?也许从小事做起。您希望从每次迭代中保存什么?我不确定我是否看到你的循环返回任何东西的地方。不,我以前从未使用过foreach。。。我花了20个小时来学习,因为我对R很陌生,但我什么都没有得到。。。对于for循环,每次迭代都会给我一个maxent对象、一个maxent求值、一个针对该maxent的tresholds集、一个预测、预测的多边形和第二个多边形(更简单)。所有这些内容都被发送到列表中,并调用相应的绘图,以便在循环之前以PDF格式打印出来。我的脚本在for循环中运行良好,但在foreach中没有成功。。。我将尝试简化我的问题,并找出如何使其重现。谢谢大家!@泰语如果你不懂foreach,这可能会让你感兴趣。很好,这是一个学习foreach的好方法!谢谢你,Privé!非常感谢你,罗布。。。有了你的提示,我可以收集我的输出!非常感谢你,罗布。。。有了你的提示,我可以收集我的输出!
out <- list(xm_spc_list= xm_spc_list,
e_spc_list = e_spc_list,
px_spc_list = px_spc_list,
... = ...,
... = ....)
return(out)
}
xm_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 1)))
e_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 2)))
....
....