仅当使用plyr和doParallel时，在win平台上找不到函数%do%_R_Parallel Processing

仅当使用plyr和doParallel时，在win平台上找不到函数%do%

r parallel-processing

仅当使用plyr和doParallel时，在win平台上找不到函数%do%,r,parallel-processing,R,Parallel Processing,我用R做一些分析。在此之前，我已经编写了自己的函数，并且R脚本在mac OS上完美运行然而，当我尝试在64位Windows上运行相同的R脚本时，我遇到了一些奇怪的问题。例如，在我安装并加载了plyr包之后，我实际上可以直接运行laply函数。但当我运行自己的函数（由函数laply组成）时，它返回一个错误，指出“找不到函数laply” 另外，由于我尝试执行并行计算，我加载了doParallel包，并将其与库foreach一起使用。但是，我的一个函数返回错误，该错误表示找不到函数%do%，而其他函

我用R做一些分析。在此之前，我已经编写了自己的函数，并且R脚本在mac OS上完美运行

然而，当我尝试在64位Windows上运行相同的R脚本时，我遇到了一些奇怪的问题。例如，在我安装并加载了plyr包之后，我实际上可以直接运行laply函数。但当我运行自己的函数（由函数laply组成）时，它返回一个错误，指出“找不到函数laply”

另外，由于我尝试执行并行计算，我加载了doParallel包，并将其与库foreach一起使用。但是，我的一个函数返回错误，该错误表示找不到函数%do%，而其他函数则找不到。这对我来说是非常非常奇怪的，我非常渴望解决它，但对我来说却没有任何线索

错误发生在名为Func.prune的函数中。基本上，它检查关联规则并根据提升值查找冗余规则。功能如下所示。这里我还提供了一些输入数据

rules <- list(Ant=list(c("CDWP = 3","CT in [369.38; 450.629]"),
                   c("CDWP = 3","Month = 3"),
                   c("Month = 3","PCHWP = 3"),
                   c("CDWP = 3","Month = 3"),
                   c("CDWP = 3","Month = 3","PCHWP = 3")),
          Con=list("PCHWP = 3",
                   "WCC in [1040.528; 1882.797]",
                   "WCC in [1040.528; 1882.797]",
                   c("PCHWP = 3","WCC in [1040.528; 1882.797]"),
                   "WCC in [1040.528; 1882.797]"))

rules.m=data.frame(Freq=c(1760,rep(1740,4)),
               Supp=c(0.2821,rep(0.2788,4)),
               Conf=rep(1,5),
               Lift=c(1.814250,1.946198,1.946198,2.028336,1.946198))

accuracy=50

Func.prune <- function(rules, rules.m, accuracy) {
require(foreach)
require(doParallel)
require(plyr)
registerDoParallel(cores=12)

item.ant <- llply(.data=rules$Ant, .fun=function(x) sapply(strsplit(x=x, split=" "), FUN=function(x) x[1]))
item.con <- llply(.data=rules$Con, .fun=function(x) sapply(strsplit(x=x, split=" "), FUN=function(x) x[1]))

res.prune <- foreach(i=1:length(item.ant)) %dopar% {
ant.ori <- rules$Ant[[i]]
con.ori <- rules$Con[[i]]
ant <- item.ant[[i]]
con <- item.con[[i]]
res.1 <- sapply(X=item.ant, FUN=function(x) {
  if((length(x)<length(ant)) && (length(which(x %in% ant))==length(x))) {out=1} else {out=0} 
  return(out)})
res.2 <- sapply(X=item.con, FUN=function(x) {
  if(length(x)==length(con) && length(which(x%in%con))==length(x)) {out=1} else {out=0}
  return(out)
})
ind.sub.cand <- which(res.1==1 & res.2==1)
if(length(ind.sub.cand)==0) {final.upd=0} else {
  #To check whether the consequent of sub candidate is the same with the consequent of considered rules
  #Need to define accuracy to join similar ranges
  ind.filt <- foreach (j = 1:length(ind.sub.cand), .combine=c) %do% {
    ant.cand <- rules$Ant[[ind.sub.cand[j]]]
    con.cand <- rules$Con[[ind.sub.cand[j]]]
    con.cand.ind <- foreach(m = 1:length(con.cand), .combine=c) %do% {
      if(length(grep(pattern="=", x=con.cand[m]))==1) {
        out.ind=ifelse(sapply(X=strsplit(x=con.cand[m], split=" = "), FUN=function(x) x[2])==sapply(X=strsplit(con.ori[grep(pattern=sapply(X=strsplit(x=con.cand[m], split=" = "), FUN=function(x) x[1]), x=con.ori)], split=" = "), FUN=function(x) x[2]), yes=T, no=F)
      } else {
        name <- sapply(strsplit(x=con.cand[m], split=" in "), FUN=function(x) x[1])
        low.ori <- sapply(strsplit(x=sapply(X=strsplit(x=con.ori[grep(pattern=name, x=con.ori)], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[1])
        high.ori <- sapply(strsplit(x=sapply(X=strsplit(x=con.ori[grep(pattern=name, x=con.ori)], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[2])
        low.ori.upd <- round_any(as.numeric(substr(x=low.ori, start=2, stop=nchar(low.ori))), accuracy=accuracy, f=floor)
        high.ori.upd <- round_any(as.numeric(substr(x=high.ori, start=2, stop=(nchar(high.ori))-1)), accuracy=accuracy, f=ceiling)
        low <- sapply(strsplit(x=sapply(strsplit(x=con.cand[m], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[1])
        high <- sapply(strsplit(x=sapply(strsplit(x=con.cand[m], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[2])
        low.upd <- round_any(as.numeric(substr(x=low, start=2, stop=nchar(low))), accuracy=accuracy, f=floor)
        high.upd <- round_any(as.numeric(substr(x=high, start=1, stop=(nchar(low)-1))), accuracy=accuracy, f=ceiling)
        out.ind <- ifelse(low.upd==low.ori.upd && high.upd==high.ori.upd, yes=T, no=F)
      }
      return(out.ind)
    }
    con.match <- ifelse(length(which(con.cand.ind==T))==length(con.cand), yes=1, no=0)
  }
  ind.sub.upd <- ind.sub.cand[which(ind.filt==1)]
  if(length(ind.sub.upd)==0) {final.upd=0} else {
    #To check whether the antecedent of sub candidate are subset of the considered rule's antecedent
    out.final <- foreach(q = 1:length(ind.sub.upd), .combine=c) %do% {
      ant.filt <- rules$Ant[[ind.sub.upd[q]]] 
      ant.ind <- foreach(p = 1:length(ant.filt), .combine=c) %do% {
        if (length(grep(pattern=" = ", x=ant.filt[p]))==1) {
          name <- sapply(strsplit(x=ant.filt[[p]], split=" = "), FUN=function(x) x[1])
          ant.ori.value <- ant.ori[grep(pattern=name, x=ant.ori)]
          res.ind <- ifelse(sapply(X=strsplit(x=ant.filt[[p]], split=" = "), FUN=function(x) x[2])==sapply(strsplit(ant.ori.value, split=" = "), FUN=function(x) x[2]), yes=T, no=F)
        } else {
          name <- sapply(strsplit(x=ant.filt[[p]], split=" in "), FUN=function(x) x[1])
          ant.ori.value <- ant.ori[grep(pattern=name, x=ant.ori)]
          low.ori <- sapply(strsplit(x=sapply(X=strsplit(ant.ori.value, split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[1])
          high.ori <- sapply(strsplit(x=sapply(X=strsplit(ant.ori.value, split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[2])
          low.ori.upd <- round_any(x=as.numeric(substr(x=low.ori, start=2, stop=nchar(low.ori))), accuracy=accuracy, f=floor)
          high.ori.upd <- round_any(x=as.numeric(substr(x=high.ori, start=1, stop=(nchar(high.ori)-1))), accuracy=accuracy, f=ceiling)
          low <- sapply(strsplit(x=sapply(strsplit(x=ant.filt[p], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[1])
          high <- sapply(strsplit(x=sapply(strsplit(x=ant.filt[p], split=" in "), FUN=function(x) x[2]), split="; "), FUN=function(x) x[2])
          low.upd <- round_any(as.numeric(substr(x=low, start=2, stop=nchar(low))), accuracy=accuracy, f=floor)
          high.upd <- round_any(as.numeric(substr(x=high, start=1, stop=(nchar(low)-1))), accuracy=accuracy, f=ceiling)
          res.ind <- ifelse((low.upd>=low.ori.upd) && (high.upd<=high.ori.upd), yes=T, no=F)
        }
        return(res.ind)
      }
      ant.match <- ifelse(length(which(ant.ind==T))==length(ant.filt), yes=1, no=0)
    }
    ind.sub.final <- ind.sub.upd[which(out.final==1)]

    #To check the lift value
    final <- foreach(o = 1:length(ind.sub.final), .combine=c) %do% {
      lift.ori <- rules.m[i, "Lift"]
      lift.sub <- rules.m[ind.sub.final[o], "Lift"]
      v <- ifelse(lift.sub >= lift.ori, yes=T, no=F)
    }
    final.upd <- ifelse(length(which(final==T))==0, yes=0, no=ind.sub.final[which(final==T)])
  }
  return(final.upd)
 }
}
return(res.prune)
}

我得到了以下错误： {：任务5失败-找不到函数%do%

非常感谢您的帮助。提前感谢您的帮助。

下面是一个简单的例子，重现了这个问题：

library(doParallel)
cl <- makePSOCKcluster(6)
registerDoParallel(cl)
foreach(i=1:10) %dopar% {
  foreach(j=1:10) %do% j
}

foreach(i=1:10, .packages='foreach') %dopar% {
  foreach(j=1:10) %do% j
}

将

.packages='foreach'

选项添加到外部foreach循环可以解决以下问题：

library(doParallel)
cl <- makePSOCKcluster(6)
registerDoParallel(cl)
foreach(i=1:10) %dopar% {
  foreach(j=1:10) %do% j
}

foreach(i=1:10, .packages='foreach') %dopar% {
  foreach(j=1:10) %do% j
}

请注意，如果向以下用户注册doParallel：

registerDoParallel(6)

然后，该示例在Windows上失败，但在Mac OS X和Linux上成功。这是因为doParallel在本例中在Mac OS X和Linux上使用了McLappy，因此工作人员已加载foreach，因为他们是由加载foreach的R会话分叉的。这也是该示例与doMC一起工作的原因

在注册表上的题外话
registerDoParallel的参数有点混乱，因为
cl
和
cores
之间的区别不清楚。我认为目的是用
cl
指定集群对象，或者用
cores
指定核心数，但也可以用
cl
指定核心数。如果
cl
是Windows上的一个数字，然后为您隐式创建一个群集对象，因为
mclappy
在Windows上不并行运行。我认为如果使用
cores
，在Windows上也会发生这种情况，但这对我来说不适用于doParallel 1.0.6，它是CRAN上的当前版本：

> packageVersion('doParallel') [1] ‘1.0.6’ > registerDoParallel(cores=6) > getDoParWorkers() [1] 3

我认为这是一个bug，并将其报告给包维护程序。
在任何情况下，我都不会使用registerDoParallel（cl=makeCluster（6）），因为这并没有提供关闭集群对象的方法，这是一种很好的做法。我会使用：

cl <- makeCluster(6) registerDoParallel(cl) # do stuff in parallel stopCluster(cl)

如果集群对象是隐式为您创建的，它将被软件包的
.onUnload
函数关闭。
您是否
需要该软件包？@hd1，是的，我使用library（）要加载包，我实际上可以运行该函数。但是当该函数嵌入到我自己的函数中时，会返回错误。此外，require（）和library（）之间有什么区别？我想它们是相同的。感谢您的回复。A（代码显示了什么运行什么不运行）将帮助我们诊断您的问题。@BlueMagister，谢谢。我已经相应地编辑了这个问题。一个最小的可重复的示例（描述问题的最简洁的代码）会帮助更多。非常感谢您的回答。它非常详细和有用。我还有一个问题。对于windows操作系统，在“registerDoParallel”中定义“cl”和“cores”参数有什么区别。我尝试了registerDoParallel（cl=makeCluster（12））和registerDoParallel（cores=12），然后是getDoParWorkers（），都返回了12。这是否意味着这两个参数是等效的，我们可以选择定义其中一个？提前感谢您的帮助。@rajafan希望我的更新答案会有所帮助。 registerDoParallel(6)