使LOOCV函数在R中工作得更快_R_Matlab

使LOOCV函数在R中工作得更快

r matlab

使LOOCV函数在R中工作得更快,r,matlab,R,Matlab,我编写了这个函数来执行LOOCV，以便对数据集中的样本进行分类。问题是它花了太长时间。我需要使用它来引导150次迭代。我怎样才能让它工作得更快 DoLOOCVa2 <- function(X,Class,S,MTHD,LV) { # # Leave-one-out cross validation (LOO-CV) with either the pls.lda function (MTHD = 1) # or SVMs (MTHD = 2) # X is the inpu

我编写了这个函数来执行LOOCV，以便对数据集中的样本进行分类。问题是它花了太长时间。我需要使用它来引导150次迭代。我怎样才能让它工作得更快

DoLOOCVa2 <- function(X,Class,S,MTHD,LV) {
  #
  # Leave-one-out cross validation (LOO-CV) with either the pls.lda function (MTHD = 1)
  # or SVMs (MTHD = 2)
  # X is the input matrix
  # Class is the class vector
  # S is the scaling to be performed:
  # 0 = None; 1 = MC; 2 = AS; 3 = RS1; 4 = RS2; 5 = Norm
  # MTHD is the classification function
  # LV is the maximum number of latent variables (= 0 if SVM is used)
  # Differs from DoLOOCV2 in that only the minimum LV is outputted corresponding to the maximum %Overall
  #
  #
  source("pretreat.r") # Source function in case not sourced from calling script


  C5 <- 0
  C6 <- 0
  cat(paste("\nSetting number of LVs to ", LV, "...\n"))
  # Set number of iterations to LV
  for (i in 1:length(LV)) {
    cat(paste("\nLOO-CV for LV",i,"\n"))
    # Set number of iterations to the number of rows of X
    for (j in 1:dim(X)[1]) {
      X1 <- X[-j,] # Remove the row j from X so that X1 is a "training set"
      C1 <- Class[-j] # Remove element j from Class
      X2 <- t(as.matrix(X[j,])) # Make X2 = to row j of X so that X2 is a "test set"
      C2 <- Class[j]  # Make C2 = to element j of Class
      Xn <- pretreat(X1,X2,S)  # Apply data pre-treatment
      X1a <- Xn$trDATAscaled    # Extract the scaled "training set"
      X2a <- Xn$tstDATAscaled   # Extract the scaled "test set"
      if (MTHD == 1) {
        # PLS-LDA
        C3 <- pls.lda(X1a,C1,X2a,LV[i]) # Perform the classification
        C4 <- as.numeric(C3$predclass) # Extract the predicted class
      } else if (MTHD == 2) {
        # SVM  (LV = 1)
        MODEL <- svm(X1a,as.factor(C1),scale=FALSE,type="C-classification",kernel="radial")
        C4 <- as.numeric(predict(MODEL,X2a))
      } else if (MTHD == 3) {
        # Random Forests
        C3 <- randomForest(x=X1a,y=as.factor(C1),xtest=X2a,ntree=LV[i]) # Perform the classification (LV value indicates here the number of trees
        C4 <- as.numeric(C3$test$predicted) # Extract the predicted class
      }
      C5[j] <- C4 - C2  # Calculate the difference between the predicted class (C4) and the actual class (C2)
      }
    C6[i] <- 100*(length(which(C5==0)))/length(C5) # Work out the overall success rate of classiciation for LV i
    }
  cat("\nThe %success classified was:\n")

  show(C6)
  if (MTHD == 1 | MTHD == 3) {
     if (MTHD == 1) {
        cat("\nThe suggested optimum LV(s) are: \n")
        OptLVs <- which(C6==max(C6))    # Attain the maximum success rate
        show(OptLVs)
        return(list(OptLVs=OptLVs[1],Overall=C6))
      } else {
        cat("\nThe suggested optimum TREE(s) are: \n")
        OLV <- which(C6==max(C6))    # Attain the maximum success rate
        OptLVs <- LV[OLV]
        show(OptLVs)
        return(list(OptLVs=OptLVs[1],Overall=C6))
      } 
    } else {
      OptLVs <- 0
      return(C6)
    }



}   # End function

你分析过你的代码吗？嗨@Roland，我没有分析过，但我使用proc.time函数来确定时间流逝超过24秒。对于150次迭代，这将超过1小时。1小时对于引导您正在使用的方法来说听起来并不坏。如果你想让代码更有效率，你需要知道什么是慢的部分。因此，评测。@Roland，是的，1小时听起来不错，但当您必须运行不同的缩放方法和数据集时。我可以问一下，我该如何进行分析？谢谢看一看

？Rprof

，有几个软件包可以扩展这一功能。

total.time total.pct self.time self.pct
"DoLOOCVa2"        17.90    100.00      0.04     0.22
"pls.lda"           9.62     53.74      0.02     0.11
"pretreat"          8.20     45.81      0.02     0.11
"source"            6.02     33.63      1.24     6.93
"lda"               3.98     22.23      0.02     0.11
"pretreat1"         3.96     22.12      0.04     0.22
"lda.formula"       3.96     22.12      0.00     0.00
"FUN"               3.48     19.44      1.62     9.05