Python 使用scipy.optimize对OCR进行逻辑回归

Python 使用scipy.optimize对OCR进行逻辑回归,python,optimization,scipy,ocr,logistic-regression,Python,Optimization,Scipy,Ocr,Logistic Regression,我试图教自己不同的机器学习技术,所以我想我应该开始研究光学字符识别。我在八度音阶中实现了这一点,但我想利用python,因为我认为它会更快 当我运行代码“cost_fcn()”和“gradient_descent()”时,它们自己可以正常工作,但当我尝试使用优化器训练θ时,它会出错。我在下面附上了回溯,下面是我的代码 我正在使用pjreddie在以下位置重新编写的mnist培训集: 我真的很感谢你在这方面的帮助。我相信这个错误与我如何向“opt.minimize()”提供参数有关,但我没有成功地

我试图教自己不同的机器学习技术,所以我想我应该开始研究光学字符识别。我在八度音阶中实现了这一点,但我想利用python,因为我认为它会更快

当我运行代码“cost_fcn()”和“gradient_descent()”时,它们自己可以正常工作,但当我尝试使用优化器训练θ时,它会出错。我在下面附上了回溯,下面是我的代码

我正在使用pjreddie在以下位置重新编写的mnist培训集:

我真的很感谢你在这方面的帮助。我相信这个错误与我如何向“opt.minimize()”提供参数有关,但我没有成功地解决这个问题

Traceback (most recent call last):
  File "./test_code.py", line 115, in <module>
    optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)
  File "/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py", line 441, in minimize
    return _minimize_bfgs(fun, x0, args, jac, callback, **options)
  File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 847, in _minimize_bfgs
    gfk = myfprime(x0)
  File "/usr/lib/python3/dist-packages/scipy/optimize/optimize.py", line 289, in function_wrapper
    return function(*(wrapper_args + args))
  File "./test_code.py", line 87, in gradient_descent
    grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
ValueError: could not broadcast input array from shape (5) into shape (1)
import csv
from PIL import Image
import sys
import numpy as np
import math
import cmath
import scipy.optimize as opt

size = 28           # images are 28x28 pixels

test_images = []            # 10000 images
test_images_labels = []     # 10000 labels

train_images = []           # 60000 images
train_images_labels = []    # 60000 labels

# open and read train images file
with open('mnist_train.csv','r') as f:
    mnist_train = csv.reader(f,delimiter=',')
    for image in mnist_train:
        train_images.append(list(map(int,image[1:])))
        train_images_labels.append(image[0])

print("Image training set loaded... ")

# open and read test images file
with open('mnist_test.csv','r') as f:
    mnist_test = csv.reader(f,delimiter=',')
    for image in mnist_test:
        test_images.append(image[1:])
        test_images_labels.append(image[0])

print("Image test set loaded... ")

##########################################
# Multi-Class Logistic Classifier
#
#


# Define the activation function
def activation_fcn(z):
    g = np.divide(1.0, (1.0 + np.exp(-z)))
    return g

# Define the cost function
def cost_fcn(theta, X, y, lambda_term): 
    m = len(y)      # number of examples
    J = 0           # initialize cost term

    h = activation_fcn(np.matmul(X,theta))
    theta_sq = np.power(theta[1:],2)
    abs_log = np.log(np.abs(np.subtract(1,h)))

    J_unreg_nonsum = np.multiply(y, np.log(h)) + np.multiply((np.subtract(1,y)), abs_log)
    J_unreg = (-1/m)*J_unreg_nonsum.sum(axis=0)

    sum_theta_sq = theta_sq.sum(axis=0)
    reg_term = (lambda_term/(2*m))*sum_theta_sq
    J = J_unreg + reg_term

    return J

def gradient_descent(theta, X, y, lambda_term):
    m = len(y)
    grad_size = X.shape
    grad = np.zeros((grad_size[1],1), float)

    h = activation_fcn(np.matmul(X,theta))  
    grad[0] = (1/m)*np.matmul(X[:,0].conj().transpose(),(np.subtract(h,y)))
    grad[1:] = (1/m)*np.matmul(X[:,1:].conj().transpose(),(np.subtract(h,y))) + (lambda_term/m)*theta[1:,:]

    return grad.flatten()


###############################################################################################################
### test using the numpy matrix object 

## initializations
theta_t = np.matrix('-2; -1; 1; 2')
X_t = np.matrix('1.0 0.1 0.6 1.1; 1.0 0.2 0.7 1.2; 1.0 0.3 0.8 1.3; 1.0 0.4 0.9 1.4; 1.0 0.5 1.0 1.5')
y_t = np.matrix('1; 0; 1; 0; 1')
lambda_t = 3

## test regularized cost func (IT WORKS!)
test_cost = cost_fcn(theta_t, X_t, y_t, lambda_t)
print("The cost is: %d" % test_cost)

## test gradient descent (IT WORKS!)
test_grad = gradient_descent(theta_t, X_t, y_t, lambda_t)
print("The gradient is: ")
print(test_grad)

print("made it to optimizations")
## optimization of theta
m,n = X_t.shape
initial_theta = np.zeros(n, float)
optimization = opt.minimize(cost_fcn, initial_theta, args = (X_t,y_t,lambda_t), method = 'BFGS', jac = gradient_descent)

print(optimization)
###############################################################################################################