Python中的梯度下降数值优化

Python中的梯度下降数值优化,python,mathematical-optimization,numerical-methods,Python,Mathematical Optimization,Numerical Methods,我试图解决下一个数值优化问题:找到向量x,使代价函数最小化0.5*范数(Bx-v,2)^2,其中B是矩阵,v是向量。我已经实现了两个梯度下降算法。其中一个我手动调整步长,另一个我用公式(2.5)自动计算。代价函数的梯度是B^T(B*x-v) 此外,我将我的实现与numpy.linalg中的solve(A,B)函数进行比较,注意到优化问题的解决方案是线性系统A*x=B的解决方案,其中A=B^T*B,B=B^T*v。到目前为止,我得到的结果很差:错误大,运行时间长。我不知道我的实现中是否有错误,或者

我试图解决下一个数值优化问题:找到向量x,使代价函数最小化0.5*范数(Bx-v,2)^2,其中B是矩阵,v是向量。我已经实现了两个梯度下降算法。其中一个我手动调整步长,另一个我用公式(2.5)自动计算。代价函数的梯度是B^T(B*x-v)

此外,我将我的实现与numpy.linalg中的solve(A,B)函数进行比较,注意到优化问题的解决方案是线性系统A*x=B的解决方案,其中A=B^T*B,B=B^T*v。到目前为止,我得到的结果很差:错误大,运行时间长。我不知道我的实现中是否有错误,或者这些算法是如何在我设置的计算实验中工作的

在计算实验中,我生成随机“解”向量x和矩阵B,然后相应地计算A和B

任何反馈都将不胜感激

这是我的代码:

import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time

def residue(x, B, v):
    aux = np.dot(B, x) - v
    aux = pow(LA.norm(aux, 2), 2)
    aux = aux / pow(LA.norm(v, 2), 2)
    return aux

def gradGD(x, B, v):
    aux = np.dot(B, x) - v
    return np.dot(B.T, aux)

def gradientDescent(B, v, alpha, tol, x0):
    A = np.dot(B.T, B)
    b = np.dot(B.T, v)

    x = x0
    while True:
        res = residue(x, B, v)
        print('Residue ', res)

        if (res < tol):
            break

        x = x - alpha * gradGD(x, B, v)

    return x

# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
    x = x0
    xpre = np.zeros((N, 1))

    flag = 0
    while True:
        res = residue(x, B, v)
        #print('Residue ', res)

        if (res < tol):
            break

        if (flag == 0):
            grad = gradGD(x, B, v)
            x = x - (1e-06) * grad
            flag = 1
            continue

        gradpre = grad
        grad = gradGD(x, B, v)
        y = grad - gradpre
        s = x - xpre
        # print('dot', np.dot(s.T, y))
        # print('||y||_2 = ', LA.norm(y, 2))
        alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
        # print("alpha = ", alpha)
        xpre = x
        x = x - alpha * grad

    return x

# Solves the optimization problem via Ax * b
def solver(B, v):
    A = np.dot(B.T, B)
    b = np.dot(B.T, v)

    return np.linalg.solve(A, b)

# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20

mytime_iter = []
time2_iter = []

myeabs_iter = []
myerel_iter = []
myepercent_iter = []

cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []

# Running the experiment many times
for i in range(iter):
    print('Iteration: ', i)
    B =  a * np.random.randn(N, N) + np.ones((N, N))
    #print(B)
    x0 = np.random.randn(N, 1) # Real solution of the optmization problem
    v = np.dot(B, x0)

    mystart = time.time()
    # x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
    x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
    myend = time.time()
    mytime = myend - mystart
    
    start2 =  time.time()
    xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
    end2 =  time.time()
    time2 = start2 - end2

    myeabs = LA.norm(x - x0, 2)
    myerel = myeabs / LA.norm(x0, 2)
    myepercent = myerel * 100

    cgseabs = LA.norm(xalt - x0, 2)
    cgserel = cgseabs / LA.norm(x0, 2)
    cgsepercent = cgserel * 100

    mytime_iter.append(mytime)
    time2_iter.append(time2)
    myeabs_iter.append(myeabs)
    myerel_iter.append(myerel)
    myepercent_iter.append(myepercent)

    cgseabs_iter.append(cgseabs)
    cgserel_iter.append(cgserel)
    cgsepercent_iter.append(cgsepercent)

plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0)  --


plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")


plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")


plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")

plt.show()
将numpy导入为np
将matplotlib.pyplot作为plt导入
从numpy进口linalg作为LA
导入时间
def残留物(x、B、v):
aux=np.点(B,x)-v
aux=功率(LA.norm(aux,2),2)
辅助=辅助/功率(LA.norm(v,2),2)
返回辅助
def梯度GD(x、B、v):
aux=np.点(B,x)-v
返回np.dot(B.T,辅助)
def梯度下降(B、v、alpha、tol、x0):
A=np.dot(B.T,B)
b=np.点(b.T,v)
x=x0
尽管如此:
res=残留物(x,B,v)
打印('残留',分辨率)
如果(res
您所说的“大错误”是什么?你能提供更多的信息吗?我指的是相对错误。如果xs是算法的输出,x是实解。然后我将相对误差计算为范数(xs-x,2)/范数(x2,2)。我得到的相对误差>0.7(百分比为70%)。