Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/76.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 与我自己使用类库的代码相比,KNN重复cv方法返回的结果不合理_R_Machine Learning_R Caret_Knn - Fatal编程技术网

R 与我自己使用类库的代码相比,KNN重复cv方法返回的结果不合理

R 与我自己使用类库的代码相比,KNN重复cv方法返回的结果不合理,r,machine-learning,r-caret,knn,R,Machine Learning,R Caret,Knn,最近,我正在用乳腺癌数据集做KNN,这对于机器学习者来说是非常有名的 我在插入符号库中使用带有kKnn方法选项的训练函数来寻找最佳k值,并使用“repeatedcv”方法 我决定随着重复次数的增加找到最佳的k值。然而,该函数产生了不同的最优k值 我的源代码在这里 accuracy_data<-vector() accuracy_data[1:10]<-0 current_op<-0 count_same<-0 str(knnFit) for (i

最近,我正在用乳腺癌数据集做KNN,这对于机器学习者来说是非常有名的

我在插入符号库中使用带有kKnn方法选项的训练函数来寻找最佳k值,并使用“repeatedcv”方法

我决定随着重复次数的增加找到最佳的k值。然而,该函数产生了不同的最优k值

我的源代码在这里

  accuracy_data<-vector()
  accuracy_data[1:10]<-0
  current_op<-0
  count_same<-0
  str(knnFit)

  for (i in 1:50){
    cat('\n current repitation is',i)
    set.seed(i*10)
    training_now<-training[sample(nrow(training)),]

    set.seed(i*100)
    ctrl <- trainControl(method="repeatedcv",repeats = 1) 
    formula <- as.formula(paste(col_label_name, ' ~ .' ))
    knnFit <- train(formula, data=training_now,  method = "knn", trControl = ctrl, preProcess = c("center","scale"), tuneLength = 20)
    accuracy_data<-accuracy_data+knnFit$results$Accuracy


    cat('\n',3+which.max(accuracy_data)*2,'\n')

    if (current_op == which.max(accuracy_data)){
      count_same<-count_same+1
    }
    else{
      current_op<-which.max(accuracy_data);
    }

    if (count_same==3){
       cat('\n',i,'time repitition is enough \n')
     break
    }

  }
精度\u数据
  accuracy_data<-vector()
  accuracy_data[1:10]<-0
  current_op<-0
  count_same<-0

  print('Finding best parameter k by using 10-fold cross-validation method. please wait....')
  for (k in (1:100)){
    random_rows<-sample(nrow(training))
    training<-training[random_rows,]
    train_label<-train_label[random_rows]
    print(paste('The number of repeatation:',k))


    for (j in (1:20)) { ## the number k that will be swept
      kvalue=2*j-1
      acc<-0  
      for (i in 1:fold_n){ ## accmulate accuracy
        # cat(point[i],point[i+1],'\n')

        training_now<-training[-(point[i]:point[i+1]),]
        train_label_now<-train_label[-(point[i]:point[i+1])]
        validation_set<-training[(point[i]:point[i+1]),]
        validation_label<-train_label[(point[i]:point[i+1])]

        validation_pred<-knn(train =training_now , test = validation_set , cl = train_label_now, k=kvalue)

        accuracy<-sum(ifelse(validation_label==validation_pred,1,0)) / length(validation_pred)
        acc<-accuracy+acc
      }
      cat('\n Accuracy:',acc/fold_n,'when k=',kvalue)
      accuracy_data[j]<-accuracy_data[j]+acc/fold_n
    }

    if (current_op==which.max(accuracy_data)){
      count_same<-count_same+1
    }
    else{
      count_same<-0
      current_op<-which.max(accuracy_data)
    }

    if (count_same==3){
      cat('\n',k,'time repitition is enough \n')
      break;
    }        

    current_op<-which.max(accuracy_data)
    cat('\n maximum row:',which.max(accuracy_data),'\n')
  }