Python中的梯度下降数值优化
我试图解决下一个数值优化问题:找到向量x,使代价函数最小化0.5*范数(Bx-v,2)^2,其中B是矩阵,v是向量。我已经实现了两个梯度下降算法。其中一个我手动调整步长,另一个我用公式(2.5)自动计算。代价函数的梯度是B^T(B*x-v) 此外,我将我的实现与numpy.linalg中的solve(A,B)函数进行比较,注意到优化问题的解决方案是线性系统A*x=B的解决方案,其中A=B^T*B,B=B^T*v。到目前为止,我得到的结果很差:错误大,运行时间长。我不知道我的实现中是否有错误,或者这些算法是如何在我设置的计算实验中工作的 在计算实验中,我生成随机“解”向量x和矩阵B,然后相应地计算A和B 任何反馈都将不胜感激 这是我的代码:Python中的梯度下降数值优化,python,mathematical-optimization,numerical-methods,Python,Mathematical Optimization,Numerical Methods,我试图解决下一个数值优化问题:找到向量x,使代价函数最小化0.5*范数(Bx-v,2)^2,其中B是矩阵,v是向量。我已经实现了两个梯度下降算法。其中一个我手动调整步长,另一个我用公式(2.5)自动计算。代价函数的梯度是B^T(B*x-v) 此外,我将我的实现与numpy.linalg中的solve(A,B)函数进行比较,注意到优化问题的解决方案是线性系统A*x=B的解决方案,其中A=B^T*B,B=B^T*v。到目前为止,我得到的结果很差:错误大,运行时间长。我不知道我的实现中是否有错误,或者
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time
def residue(x, B, v):
aux = np.dot(B, x) - v
aux = pow(LA.norm(aux, 2), 2)
aux = aux / pow(LA.norm(v, 2), 2)
return aux
def gradGD(x, B, v):
aux = np.dot(B, x) - v
return np.dot(B.T, aux)
def gradientDescent(B, v, alpha, tol, x0):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
x = x0
while True:
res = residue(x, B, v)
print('Residue ', res)
if (res < tol):
break
x = x - alpha * gradGD(x, B, v)
return x
# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
x = x0
xpre = np.zeros((N, 1))
flag = 0
while True:
res = residue(x, B, v)
#print('Residue ', res)
if (res < tol):
break
if (flag == 0):
grad = gradGD(x, B, v)
x = x - (1e-06) * grad
flag = 1
continue
gradpre = grad
grad = gradGD(x, B, v)
y = grad - gradpre
s = x - xpre
# print('dot', np.dot(s.T, y))
# print('||y||_2 = ', LA.norm(y, 2))
alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
# print("alpha = ", alpha)
xpre = x
x = x - alpha * grad
return x
# Solves the optimization problem via Ax * b
def solver(B, v):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
return np.linalg.solve(A, b)
# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20
mytime_iter = []
time2_iter = []
myeabs_iter = []
myerel_iter = []
myepercent_iter = []
cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []
# Running the experiment many times
for i in range(iter):
print('Iteration: ', i)
B = a * np.random.randn(N, N) + np.ones((N, N))
#print(B)
x0 = np.random.randn(N, 1) # Real solution of the optmization problem
v = np.dot(B, x0)
mystart = time.time()
# x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
myend = time.time()
mytime = myend - mystart
start2 = time.time()
xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
end2 = time.time()
time2 = start2 - end2
myeabs = LA.norm(x - x0, 2)
myerel = myeabs / LA.norm(x0, 2)
myepercent = myerel * 100
cgseabs = LA.norm(xalt - x0, 2)
cgserel = cgseabs / LA.norm(x0, 2)
cgsepercent = cgserel * 100
mytime_iter.append(mytime)
time2_iter.append(time2)
myeabs_iter.append(myeabs)
myerel_iter.append(myerel)
myepercent_iter.append(myepercent)
cgseabs_iter.append(cgseabs)
cgserel_iter.append(cgserel)
cgsepercent_iter.append(cgsepercent)
plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0) --
plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")
plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")
plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")
plt.show()
将numpy导入为np
将matplotlib.pyplot作为plt导入
从numpy进口linalg作为LA
导入时间
def残留物(x、B、v):
aux=np.点(B,x)-v
aux=功率(LA.norm(aux,2),2)
辅助=辅助/功率(LA.norm(v,2),2)
返回辅助
def梯度GD(x、B、v):
aux=np.点(B,x)-v
返回np.dot(B.T,辅助)
def梯度下降(B、v、alpha、tol、x0):
A=np.dot(B.T,B)
b=np.点(b.T,v)
x=x0
尽管如此:
res=残留物(x,B,v)
打印('残留',分辨率)
如果(res
您所说的“大错误”是什么?你能提供更多的信息吗?我指的是相对错误。如果xs是算法的输出,x是实解。然后我将相对误差计算为范数(xs-x,2)/范数(x2,2)。我得到的相对误差>0.7(百分比为70%)。