Python 使用scipy.optimize对OCR进行逻辑回归
我试图教自己不同的机器学习技术,所以我想我应该开始研究光学字符识别。我在八度音阶中实现了这一点,但我想利用python,因为我认为它会更快 当我运行代码“cost_fcn()”和“gradient_descent()”时,它们自己可以正常工作,但当我尝试使用优化器训练θ时,它会出错。我在下面附上了回溯,下面是我的代码 我正在使用pjreddie在以下位置重新编写的mnist培训集: 我真的很感谢你在这方面的帮助。我相信这个错误与我如何向“opt.minimize()”提供参数有关,但我没有成功地解决这个问题Python 使用scipy.optimize对OCR进行逻辑回归,python,optimization,scipy,ocr,logistic-regression,Python,Optimization,Scipy,Ocr,Logistic Regression,我试图教自己不同的机器学习技术,所以我想我应该开始研究光学字符识别。我在八度音阶中实现了这一点,但我想利用python,因为我认为它会更快 当我运行代码“cost_fcn()”和“gradient_descent()”时,它们自己可以正常工作,但当我尝试使用优化器训练θ时,它会出错。我在下面附上了回溯,下面是我的代码 我正在使用pjreddie在以下位置重新编写的mnist培训集: 我真的很感谢你在这方面的帮助。我相信这个错误与我如何向“opt.minimize()”提供参数有关,但我没有成功地
Traceback (most recent call last):
File "./test_code.py", line 115, in <module>
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
File "/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py", line 441, in minimize
return _minimize_bfgs(fun, x0, args, jac, callback, **options)
File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 847, in _minimize_bfgs
gfk = myfprime(x0)
File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 289, in function_wrapper
return function(*(wrapper_args + args))
File "./test_code.py", line 87, in gradient_descent
grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
ValueError: could not broadcast input array from shape (5) into shape (1)
import csv
from PIL import Image
import sys
import numpy as np
import math
import cmath
import scipy.optimize as opt
size = 28 # images are 28x28 pixels
test_images = [] # 10000 images
test_images_labels = [] # 10000 labels
train_images = [] # 60000 images
train_images_labels = [] # 60000 labels
# open and read train images file
with open('mnist_train.csv','r') as f:
mnist_train = csv.reader(f,delimiter=',')
for image in mnist_train:
train_images.append(list(map(int,image[1:])))
train_images_labels.append(image[0])
print("Image training set loaded... ")
# open and read test images file
with open('mnist_test.csv','r') as f:
mnist_test = csv.reader(f,delimiter=',')
for image in mnist_test:
test_images.append(image[1:])
test_images_labels.append(image[0])
print("Image test set loaded... ")
##########################################
# Multi-Class Logistic Classifier
#
#
# Define the activation function
def activation_fcn(z):
g = np.divide(1.0, (1.0 + np.exp(-z)))
return g
# Define the cost function
def cost_fcn(theta, X, y, lambda_term):
m = len(y) # number of examples
J = 0 # initialize cost term
h = activation_fcn(np.matmul(X,theta))
theta_sq = np.power(theta[1:],2)
abs_log = np.log(np.abs(np.subtract(1,h)))
J_unreg_nonsum = np.multiply(y, np.log(h)) + np.multiply((np.subtract(1,y)), abs_log)
J_unreg = (-1/m)*J_unreg_nonsum.sum(axis=0)
sum_theta_sq = theta_sq.sum(axis=0)
reg_term = (lambda_term/(2*m))*sum_theta_sq
J = J_unreg + reg_term
return J
def gradient_descent(theta, X, y, lambda_term):
m = len(y)
grad_size = X.shape
grad = np.zeros((grad_size[1],1), float)
h = activation_fcn(np.matmul(X,theta))
grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
grad[1:] = (1/m)*np.matmul(X[:,1:].conj().transpose(),(np.subtract(h,y))) + (lambda_term/m)*theta[1:,:]
return grad.flatten()
###############################################################################################################
### test using the numpy matrix object
## initializations
theta_t = np.matrix('-2; -1; 1; 2')
X_t = np.matrix('1.0 0.1 0.6 1.1; 1.0 0.2 0.7 1.2; 1.0 0.3 0.8 1.3; 1.0 0.4 0.9 1.4; 1.0 0.5 1.0 1.5')
y_t = np.matrix('1; 0; 1; 0; 1')
lambda_t = 3
## test regularized cost func (IT WORKS!)
test_cost = cost_fcn(theta_t, X_t, y_t, lambda_t)
print("The cost is: %d" % test_cost)
## test gradient descent (IT WORKS!)
test_grad = gradient_descent(theta_t, X_t, y_t, lambda_t)
print("The gradient is: ")
print(test_grad)
print("made it to optimizations")
## optimization of theta
m,n = X_t.shape
initial_theta = np.zeros(n, float)
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
print(optimization)
###############################################################################################################