获取错误“;k=0必须至少为1“;当我在kNN算法(R)中给k一个值时
当我写下这行代码时:获取错误“;k=0必须至少为1“;当我在kNN算法(R)中给k一个值时,r,knn,R,Knn,当我写下这行代码时: > m1 <- knn( train = trainSetNorm[,c(1:41)], test = testSetNorm[,c(1:41)], cl = trainSetNorm[,c(42)], k = 703) 我使用KDD Cup 99校正作为校正的10%数据集。使用10%,并将训练数据和校正后的数据用作测试数据。下面是确切的代码: setwd("C:/Users/admin/Desktop/BIGLOU") 加载数据集 testSet &l
> m1 <- knn( train = trainSetNorm[,c(1:41)], test = testSetNorm[,c(1:41)],
cl = trainSetNorm[,c(42)], k = 703)
我使用KDD Cup 99校正作为校正的10%数据集。使用10%,并将训练数据和校正后的数据用作测试数据。下面是确切的代码:
setwd("C:/Users/admin/Desktop/BIGLOU")
加载数据集
testSet <- read.delim('corrected', sep = ',', header = FALSE)
colnames(testSet) <- c("duration", "protocol_type", "service", "flag",
"src_bytes", "dst_bytes", "land",
"wrong_fragment", "urgent", "hot", "num_failed_logins",
"logged_in", "num_compromised",
"root_shell", "su_attempted", "num_root",
"num_file_creations", "num_shells",
"num_access_files", "num_outbound_cmds",
"is_host_login","is_guest_login", "count", "srv_count",
"serror_rate", "srv_serror_rate", "rerror_rate",
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
"srv_diff_host_rate", "dst_host_count",
"dst_host_srv_count", "dst_host_same_srv_rate",
"dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
"dst_host_srv_diff_host_rate",
"dst_host_serror_rate", "dst_host_srv_serror_rate",
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
"connection_type")
trainSet <- read.delim('kddcup.data_10_percent_corrected', sep = ',', header = FALSE)
colnames(trainSet) <- c("duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land",
"wrong_fragment", "urgent", "hot", "num_failed_logins",
"logged_in", "num_compromised",
"root_shell", "su_attempted", "num_root",
"num_file_creations", "num_shells",
"num_access_files", "num_outbound_cmds",
"is_host_login","is_guest_login", "count", "srv_count",
"serror_rate", "srv_serror_rate", "rerror_rate",
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
"srv_diff_host_rate", "dst_host_count",
"dst_host_srv_count", "dst_host_same_srv_rate",
"dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
"dst_host_srv_diff_host_rate",
"dst_host_serror_rate", "dst_host_srv_serror_rate",
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
"connection_type")
testSet您可以共享一个吗?knn有多个实现。你用的是哪个软件包?我用的是“类”软件包中的knn算法
testSet <- read.delim('corrected', sep = ',', header = FALSE)
colnames(testSet) <- c("duration", "protocol_type", "service", "flag",
"src_bytes", "dst_bytes", "land",
"wrong_fragment", "urgent", "hot", "num_failed_logins",
"logged_in", "num_compromised",
"root_shell", "su_attempted", "num_root",
"num_file_creations", "num_shells",
"num_access_files", "num_outbound_cmds",
"is_host_login","is_guest_login", "count", "srv_count",
"serror_rate", "srv_serror_rate", "rerror_rate",
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
"srv_diff_host_rate", "dst_host_count",
"dst_host_srv_count", "dst_host_same_srv_rate",
"dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
"dst_host_srv_diff_host_rate",
"dst_host_serror_rate", "dst_host_srv_serror_rate",
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
"connection_type")
trainSet <- read.delim('kddcup.data_10_percent_corrected', sep = ',', header = FALSE)
colnames(trainSet) <- c("duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land",
"wrong_fragment", "urgent", "hot", "num_failed_logins",
"logged_in", "num_compromised",
"root_shell", "su_attempted", "num_root",
"num_file_creations", "num_shells",
"num_access_files", "num_outbound_cmds",
"is_host_login","is_guest_login", "count", "srv_count",
"serror_rate", "srv_serror_rate", "rerror_rate",
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
"srv_diff_host_rate", "dst_host_count",
"dst_host_srv_count", "dst_host_same_srv_rate",
"dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
"dst_host_srv_diff_host_rate",
"dst_host_serror_rate", "dst_host_srv_serror_rate",
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
"connection_type")
testSet[,c(2)] <- as.integer(testSet[,c(2)])
testSet[,c(3)] <- as.integer(testSet[,c(3)])
testSet[,c(4)] <- as.integer(testSet[,c(4)])
testSet[,c(7)] <- as.integer(testSet[,c(7)])
testSet[,c(12)] <- as.integer(testSet[,c(12)])
testSet[,c(21)] <- as.integer(testSet[,c(21)])
testSet[,c(22)] <- as.integer(testSet[,c(22)])
trainSet[,c(2)] <- as.integer(trainSet[,c(2)])
trainSet[,c(3)] <- as.integer(trainSet[,c(3)])
trainSet[,c(4)] <- as.integer(trainSet[,c(4)])
trainSet[,c(7)] <- as.integer(trainSet[,c(7)])
trainSet[,c(12)] <- as.integer(trainSet[,c(12)])
trainSet[,c(21)] <- as.integer(trainSet[,c(21)])
trainSet[,c(22)] <- as.integer(trainSet[,c(22)])
set.seed(60223)
rand <-runif(nrow(testSet))
testSet <- testSet[order(rand),]
set.seed(12558)
rand <-runif(nrow(trainSet))
trainSet <- trainSet[order(rand),]
normalize <- function(x) {return( abs((x - min(x))/(max(x) - min(x))))}
testSetNorm <- as.data.frame(lapply(testSet[,c(1:41)],normalize))
trainSetNorm <- as.data.frame(lapply(trainSet[,c(1:41)],normalize))
testSetNorm <-cbind(testSetNorm, testSet[,c(42)])
colnames(testSet)[42] <- "connection_type"
trainSetNorm <-cbind(trainSetNorm, trainSet[,c(42)])
colnames(trainSet)[42] <- "connection_type"
trainSetNorm <- trainSetNorm[complete.cases(trainSetNorm),]
testSetNorm <- testSetNorm[complete.cases(testSetNorm),]
require(class)
m1 <- knn( train = trainSetNorm[,c(1:41)], test = testSetNorm[,c(1:41)], cl = trainSetNorm[,c(42)], k = 703)