使用随机林查找R中MNIST数据集的误分类数字_R_Classification_Random Forest_Mnist

使用随机林查找R中MNIST数据集的误分类数字

使用随机林查找R中MNIST数据集的误分类数字,r,classification,random-forest,mnist,R,Classification,Random Forest,Mnist,我感兴趣的是为MNIST数据集找到随机森林分类器在R中错误预测的数字。特别是，每个数字（0-9）的示例。下面是我试过的。但是，我一直在得到y+1中的错误错误：二进制运算符的非数值参数，我不确定如何着手修复它。我认为下面是一种获取最差数字的方法，因此，另外，是否可以更改此方法以获取每个数字的示例 # The below code is copied directly from https://gist.github.com/daviddalpiaz/ae62ae5ccd0bada4b9acd6db

我感兴趣的是为MNIST数据集找到随机森林分类器在R中错误预测的数字。特别是，每个数字（0-9）的示例。下面是我试过的。但是，我一直在得到y+1中的错误

错误：二进制运算符的非数值参数

，我不确定如何着手修复它。我认为下面是一种获取最差数字的方法，因此，另外，是否可以更改此方法以获取每个数字的示例

# The below code is copied directly from https://gist.github.com/daviddalpiaz/ae62ae5ccd0bada4b9acd6dbc9008706
# and https://gist.github.com/brendano/39760

# download data from http://yann.lecun.com/exdb/mnist/
download.file("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
              "train-images-idx3-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
              "train-labels-idx1-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
              "t10k-images-idx3-ubyte.gz")
download.file("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
              "t10k-labels-idx1-ubyte.gz")

# gunzip the files
R.utils::gunzip("train-images-idx3-ubyte.gz")
R.utils::gunzip("train-labels-idx1-ubyte.gz")
R.utils::gunzip("t10k-images-idx3-ubyte.gz")
R.utils::gunzip("t10k-labels-idx1-ubyte.gz")  

# load image files
load_image_file = function(filename) {
  ret = list()
  f = file(filename, 'rb')
  readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  n    = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  nrow = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  ncol = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  x = readBin(f, 'integer', n = n * nrow * ncol, size = 1, signed = FALSE)
  close(f)
  data.frame(matrix(x, ncol = nrow * ncol, byrow = TRUE))
}

# load label files
load_label_file = function(filename) {
  f = file(filename, 'rb')
  readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  n = readBin(f, 'integer', n = 1, size = 4, endian = 'big')
  y = readBin(f, 'integer', n = n, size = 1, signed = FALSE)
  close(f)
  y
}

# load images
train = load_image_file("train-images-idx3-ubyte")
test  = load_image_file("t10k-images-idx3-ubyte")

# load labels
train$y = as.factor(load_label_file("train-labels-idx1-ubyte"))
test$y  = as.factor(load_label_file("t10k-labels-idx1-ubyte"))

library(randomForest)
# testing classification on subset of training data
model.rf<-randomForest(y ~ ., data = train[1:1000, ])
model.rf$confusion

#Using model on test set
predict.rf = predict(model.rf, test)
mean(predict.rf == test$y)
table(predicted = predict.rf, actual = test$y)

iset <- sample(which(predict.rf != ytest),7*7)
par(mar=c(0,0,0,0))
par(mfrow=c(7,7))
for (j in iset) {
  y <- matrix(as.matrix(test[j,-1]),16,16,byrow=TRUE)
  y <- 1 - (y + 1)*0.5
  
  plot(0,0,xlab="",ylab="",axes=FALSE)
  rasterImage(y,-1,-1,1,1)
  box()
  text(-0.8,-0.7, test[j,1], cex=3, col="red")
  text(0.8,-0.7, predict.rf[j], cex=3, col="blue")
}

#以下代码直接从https://gist.github.com/daviddalpiaz/ae62ae5ccd0bada4b9acd6dbc9008706
#及https://gist.github.com/brendano/39760
#从下载数据http://yann.lecun.com/exdb/mnist/
下载文件（“http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",
“train-images-idx3-ubyte.gz”）
下载文件（“http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",
“train-labels-idx1-ubyte.gz”）
下载文件（“http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",
“t10k-images-idx3-ubyte.gz”）
下载文件（“http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
“t10k-labels-idx1-ubyte.gz”）
#压缩文件
R.utils:：gunzip（“train-images-idx3-ubyte.gz”）
R.utils:：gunzip（“train-labels-idx1-ubyte.gz”）
R.utils:：gunzip（“t10k-images-idx3-ubyte.gz”）
R.utils:：gunzip（“t10k-labels-idx1-ubyte.gz”）
#加载图像文件
加载\图像\文件=函数（文件名）{
ret=list（）
f=文件（文件名“rb”）
readBin（f'整数'，n=1，size=4，endian='大'）
n=readBin（f'整数'，n=1，size=4，endian='大'）
nrow=readBin（f，‘integer’，n=1，size=4，endian=‘big’）
ncol=readBin（f，‘整数’，n=1，size=4，endian=‘大’）
x=readBin（f，'integer'，n=n*nrow*ncol，size=1，signed=FALSE）
关闭（f）
数据帧（矩阵（x，ncol=nrow*ncol，byrow=TRUE））
}
#加载标签文件
加载标签文件=函数（文件名）{
f=文件（文件名“rb”）
readBin（f'整数'，n=1，size=4，endian='大'）
n=readBin（f'整数'，n=1，size=4，endian='大'）
y=readBin（f，‘整数’，n=n，大小=1，有符号=FALSE）
关闭（f）
Y
}
#加载图像
列车=加载图像文件（“列车图像-idx3-ubyte”）
测试=加载图像文件（“t10k-images-idx3-ubyte”）
#装载标签
序列$y=as.factor（加载标签文件（“序列标签-idx1-ubyte”））
测试$y=as.factor（加载标签文件（“t10k-labels-idx1-ubyte”））
图书馆（森林）
#训练数据子集的测试分类
model.rf