Python中的正则Logistic回归

Python中的正则Logistic回归,python,machine-learning,logistic-regression,Python,Machine Learning,Logistic Regression,这段代码是关于正则化逻辑回归的,在我使用fmin_bfgs的部分,也就是说,直到代码的最后一行,这段代码都很好。它最初是用倍频程编写的,所以在使用fmin_bfgs之前,我测试了每个函数的一些值,所有输出都是正确的。问题是,当我尝试最小化成本功能时,我收到以下消息: line 53, in cost_function_reg thetaR = theta[1:, 0] IndexError: too many indices for array 代码: 我使用的数据: 0.0512

这段代码是关于正则化逻辑回归的,在我使用fmin_bfgs的部分,也就是说,直到代码的最后一行,这段代码都很好。它最初是用倍频程编写的,所以在使用fmin_bfgs之前,我测试了每个函数的一些值,所有输出都是正确的。问题是,当我尝试最小化成本功能时,我收到以下消息:

 line 53, in cost_function_reg
    thetaR = theta[1:, 0]
IndexError: too many indices for array

代码:

我使用的数据:

0.051267,0.69956,1
-0.092742,0.68494,1
-0.21371,0.69225,1
-0.375,0.50219,1
-0.51325,0.46564,1
-0.52477,0.2098,1
-0.39804,0.034357,1
-0.30588,-0.19225,1
0.016705,-0.40424,1
0.13191,-0.51389,1
0.38537,-0.56506,1
0.52938,-0.5212,1
0.63882,-0.24342,1
0.73675,-0.18494,1
0.54666,0.48757,1
0.322,0.5826,1
0.16647,0.53874,1
-0.046659,0.81652,1
-0.17339,0.69956,1
-0.47869,0.63377,1
-0.60541,0.59722,1
-0.62846,0.33406,1
-0.59389,0.005117,1
-0.42108,-0.27266,1
-0.11578,-0.39693,1
0.20104,-0.60161,1
0.46601,-0.53582,1
0.67339,-0.53582,1
-0.13882,0.54605,1
-0.29435,0.77997,1
-0.26555,0.96272,1
-0.16187,0.8019,1
-0.17339,0.64839,1
-0.28283,0.47295,1
-0.36348,0.31213,1
-0.30012,0.027047,1
-0.23675,-0.21418,1
-0.06394,-0.18494,1
0.062788,-0.16301,1
0.22984,-0.41155,1
0.2932,-0.2288,1
0.48329,-0.18494,1
0.64459,-0.14108,1
0.46025,0.012427,1
0.6273,0.15863,1
0.57546,0.26827,1
0.72523,0.44371,1
0.22408,0.52412,1
0.44297,0.67032,1
0.322,0.69225,1
0.13767,0.57529,1
-0.0063364,0.39985,1
-0.092742,0.55336,1
-0.20795,0.35599,1
-0.20795,0.17325,1
-0.43836,0.21711,1
-0.21947,-0.016813,1
-0.13882,-0.27266,1
0.18376,0.93348,0
0.22408,0.77997,0
0.29896,0.61915,0
0.50634,0.75804,0
0.61578,0.7288,0
0.60426,0.59722,0
0.76555,0.50219,0
0.92684,0.3633,0
0.82316,0.27558,0
0.96141,0.085526,0
0.93836,0.012427,0
0.86348,-0.082602,0
0.89804,-0.20687,0
0.85196,-0.36769,0
0.82892,-0.5212,0
0.79435,-0.55775,0
0.59274,-0.7405,0
0.51786,-0.5943,0
0.46601,-0.41886,0
0.35081,-0.57968,0
0.28744,-0.76974,0
0.085829,-0.75512,0
0.14919,-0.57968,0
-0.13306,-0.4481,0
-0.40956,-0.41155,0
-0.39228,-0.25804,0
-0.74366,-0.25804,0
-0.69758,0.041667,0
-0.75518,0.2902,0
-0.69758,0.68494,0
-0.4038,0.70687,0
-0.38076,0.91886,0
-0.50749,0.90424,0
-0.54781,0.70687,0
0.10311,0.77997,0
0.057028,0.91886,0
-0.10426,0.99196,0
-0.081221,1.1089,0
0.28744,1.087,0
0.39689,0.82383,0
0.63882,0.88962,0
0.82316,0.66301,0
0.67339,0.64108,0
1.0709,0.10015,0
-0.046659,-0.57968,0
-0.23675,-0.63816,0
-0.15035,-0.36769,0
-0.49021,-0.3019,0
-0.46717,-0.13377,0
-0.28859,-0.060673,0
-0.61118,-0.067982,0
-0.66302,-0.21418,0
-0.59965,-0.41886,0
-0.72638,-0.082602,0
-0.83007,0.31213,0
-0.72062,0.53874,0
-0.59389,0.49488,0
-0.48445,0.99927,0
-0.0063364,0.99927,0
0.63265,-0.030612,0
我仍然请求学习python,所以任何建议都是可以接受的。谢谢你的关注,我很抱歉有任何问题,这是我第一次在这里问你。

你应该看看这个


您应该尝试打印
theta
数组,以查看它实际包含哪些值以及它的维度是什么。您得到的错误类型表明它没有那么多维度。

您的逻辑得分高于80%的准确性!不破旧。干得好。我只需要做一些pythonic编辑就行了

我会用Python(计算成本和梯度)来分解它。在Matlab/Octave中,可以返回一对

我用CurSera斯坦福机器学习安得烈NG,并做了类似的任务:

import scipy.optimize as op
import numpy as np
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import where
from sklearn.preprocessing import PolynomialFeatures


def sigmoid(z):
    return 1/(1 + np.exp(-z))


def compute_gradient(theta, X, y, l):
    m, n = X.shape
    theta = theta.reshape((n, 1))
    theta_r = theta[1:n, :]
    y = y.reshape((m, 1))
    h = sigmoid(X.dot(theta))
    non_regularized_gradient = ((np.sum(((h-y)*X), axis=0))/m).reshape(n, 1)
    reg = np.insert((l/m)*theta_r, 0, 0, axis=0)
    grad = non_regularized_gradient + reg
    return grad.flatten()


def compute_cost(theta, X, y, l):
    h = sigmoid(X.dot(theta))
    m, n = X.shape
    theta = theta.reshape((n, 1))
    theta_r = theta[1:n, :]
    cost = np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
    reg=(l/(2*m)) * np.sum(np.square(theta_r))
    J = cost + reg
    return J


def make_predictions(theta,X):
    m, n = X.shape
    return np.round(sigmoid(X.dot(theta.reshape(n, 1))))


data = np.loadtxt(open("ex2data2.txt", "rb"), delimiter=",", skiprows=1)
nr, nc = data.shape
X = data[:, 0:nc - 1]
y = data[:, [nc - 1]]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Equipment Test 1')
ylabel('Eguipment Test 2')
legend(['Nominal', 'Adverse'])
show()
storeX = X
poly = PolynomialFeatures(6)
X = poly.fit_transform(X)
m, n = X.shape
initial_theta = np.zeros((n, 1))
l = 1

print("Optimizing...")
Result = op.minimize(fun=compute_cost, x0=initial_theta, args=(X, y, l), method='TNC', jac=compute_gradient)
optimal_theta = Result.x
print(Result.x.shape)
print("optimal theta value")
print(optimal_theta)
p = make_predictions(optimal_theta, X)
score = np.mean(np.double(p == y))
print("Score:")
print(score)
以下是PyCharm社区的输出,Python版本3.7.3:


非常感谢!我正在学习这门课程,你的代码对我帮助很大!它在Python 3.8.2中运行得非常好,很高兴您能接受答案并投票表决吗?请它将是如此有用,谢谢你这么多,我做了30至45分钟的工作!我在找按钮。请您解释一下,为什么您在op.minimize中使用TNC作为方法,而不是像Andrew在八度音阶中使用BFGS?
import scipy.optimize as op
import numpy as np
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import where
from sklearn.preprocessing import PolynomialFeatures


def sigmoid(z):
    return 1/(1 + np.exp(-z))


def compute_gradient(theta, X, y, l):
    m, n = X.shape
    theta = theta.reshape((n, 1))
    theta_r = theta[1:n, :]
    y = y.reshape((m, 1))
    h = sigmoid(X.dot(theta))
    non_regularized_gradient = ((np.sum(((h-y)*X), axis=0))/m).reshape(n, 1)
    reg = np.insert((l/m)*theta_r, 0, 0, axis=0)
    grad = non_regularized_gradient + reg
    return grad.flatten()


def compute_cost(theta, X, y, l):
    h = sigmoid(X.dot(theta))
    m, n = X.shape
    theta = theta.reshape((n, 1))
    theta_r = theta[1:n, :]
    cost = np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
    reg=(l/(2*m)) * np.sum(np.square(theta_r))
    J = cost + reg
    return J


def make_predictions(theta,X):
    m, n = X.shape
    return np.round(sigmoid(X.dot(theta.reshape(n, 1))))


data = np.loadtxt(open("ex2data2.txt", "rb"), delimiter=",", skiprows=1)
nr, nc = data.shape
X = data[:, 0:nc - 1]
y = data[:, [nc - 1]]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Equipment Test 1')
ylabel('Eguipment Test 2')
legend(['Nominal', 'Adverse'])
show()
storeX = X
poly = PolynomialFeatures(6)
X = poly.fit_transform(X)
m, n = X.shape
initial_theta = np.zeros((n, 1))
l = 1

print("Optimizing...")
Result = op.minimize(fun=compute_cost, x0=initial_theta, args=(X, y, l), method='TNC', jac=compute_gradient)
optimal_theta = Result.x
print(Result.x.shape)
print("optimal theta value")
print(optimal_theta)
p = make_predictions(optimal_theta, X)
score = np.mean(np.double(p == y))
print("Score:")
print(score)