线性回归的梯度下降算法不选择y截距参数我遵循Andrew Ng Coursera的机器学习教程，尝试在Python中实现梯度下降算法。我对y-intercept参数有问题，因为它看起来不是最佳值。这是我的密码： # IMPORTS import pandas as pd import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline # Acquiring Data # Source: https://github.com/mattnedrich/GradientDescentExample data = pd.read_csv('data.csv') def cost_function(a, b, x_values, y_values): ''' Calculates the square mean error for a given dataset with (x,y) pairs and the model y' = a + bx a: y-intercept for the model b: slope of the curve x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) total_error = sum([((a + b * x_values[i]) - y_values[i])**2 for i in range(data_len)]) return total_error / (2 * float(data_len)) def a_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'a' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) a_gradient = sum([((a + b * x_values[i]) - y_values[i]) for i in range(data_len)]) return a_gradient / float(data_len) def b_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'b' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) b_gradient = sum([(((a + b * x_values[i]) - y_values[i]) * x_values[i]) for i in range(data_len)]) return b_gradient / float(data_len) def gradient_descent_step(a_current, b_current, x_values, y_values, alpha): ''' Give a step in direction of the minimum of the cost_function using the 'a' and 'b' gradiants. Return new values for 'a' and 'b'. a_current, b_current: the current values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' new_a = a_current - alpha * a_gradient(a_current, b_current, x_values, y_values) new_b = b_current - alpha * b_gradient(a_current, b_current, x_values, y_values) return (new_a, new_b) def run_gradient_descent(a, b, x_values, y_values, alpha, precision, plot=False, verbose=False): ''' Runs the gradient_descent_step function and updates (a,b) until the value of the cost function varies less than 'precision'. a, b: initial values for the point a and b in the cost_function x_values, y_values: points (x,y) of the dataset alpha: learning rate for the algorithm precision: value for the algorithm to stop calculation ''' iterations = 0 delta_cost = cost_function(a, b, x_values, y_values) error_list = [delta_cost] iteration_list = [0] # The loop runs until the delta_cost reaches the precision defined # When the variation in cost_function is small it means that the # the function is near its minimum and the parameters 'a' and 'b' # are a good guess for modeling the dataset. while delta_cost > precision: iterations += 1 iteration_list.append(iterations) # Calculates the initial error with current a,b values prev_cost = cost_function(a, b, x_values, y_values) # Calculates new values for a and b a, b = gradient_descent_step(a, b, x_values, y_values, alpha) # Updates the value of the error actual_cost = cost_function(a, b, x_values, y_values) error_list.append(actual_cost) # Calculates the difference between previous and actual error values. delta_cost = prev_cost - actual_cost # Plot the error in each iteration to see how it decreases # and some information about our final results if plot: plt.plot(iteration_list, error_list, '-') plt.title('Error Minimization') plt.xlabel('Iteration',fontsize=12) plt.ylabel('Error',fontsize=12) plt.show() if verbose: print('Iterations = ' + str(iterations)) print('Cost Function Value = '+ str(cost_function(a, b, x_values, y_values))) print('a = ' + str(a) + ' and b = ' + str(b)) return (actual_cost, a, b)_Python_Algorithm_Data Science_Gradient Descent

python/
线性回归的梯度下降算法不选择y截距参数我遵循Andrew Ng Coursera的机器学习教程，尝试在Python中实现梯度下降算法。我对y-intercept参数有问题，因为它看起来不是最佳值。这是我的密码： # IMPORTS import pandas as pd import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline # Acquiring Data # Source: https://github.com/mattnedrich/GradientDescentExample data = pd.read_csv('data.csv') def cost_function(a, b, x_values, y_values): ''' Calculates the square mean error for a given dataset with (x,y) pairs and the model y' = a + bx a: y-intercept for the model b: slope of the curve x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) total_error = sum([((a + b * x_values[i]) - y_values[i])**2 for i in range(data_len)]) return total_error / (2 * float(data_len)) def a_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'a' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) a_gradient = sum([((a + b * x_values[i]) - y_values[i]) for i in range(data_len)]) return a_gradient / float(data_len) def b_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'b' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) b_gradient = sum([(((a + b * x_values[i]) - y_values[i]) * x_values[i]) for i in range(data_len)]) return b_gradient / float(data_len) def gradient_descent_step(a_current, b_current, x_values, y_values, alpha): ''' Give a step in direction of the minimum of the cost_function using the 'a' and 'b' gradiants. Return new values for 'a' and 'b'. a_current, b_current: the current values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' new_a = a_current - alpha * a_gradient(a_current, b_current, x_values, y_values) new_b = b_current - alpha * b_gradient(a_current, b_current, x_values, y_values) return (new_a, new_b) def run_gradient_descent(a, b, x_values, y_values, alpha, precision, plot=False, verbose=False): ''' Runs the gradient_descent_step function and updates (a,b) until the value of the cost function varies less than 'precision'. a, b: initial values for the point a and b in the cost_function x_values, y_values: points (x,y) of the dataset alpha: learning rate for the algorithm precision: value for the algorithm to stop calculation ''' iterations = 0 delta_cost = cost_function(a, b, x_values, y_values) error_list = [delta_cost] iteration_list = [0] # The loop runs until the delta_cost reaches the precision defined # When the variation in cost_function is small it means that the # the function is near its minimum and the parameters 'a' and 'b' # are a good guess for modeling the dataset. while delta_cost > precision: iterations += 1 iteration_list.append(iterations) # Calculates the initial error with current a,b values prev_cost = cost_function(a, b, x_values, y_values) # Calculates new values for a and b a, b = gradient_descent_step(a, b, x_values, y_values, alpha) # Updates the value of the error actual_cost = cost_function(a, b, x_values, y_values) error_list.append(actual_cost) # Calculates the difference between previous and actual error values. delta_cost = prev_cost - actual_cost # Plot the error in each iteration to see how it decreases # and some information about our final results if plot: plt.plot(iteration_list, error_list, '-') plt.title('Error Minimization') plt.xlabel('Iteration',fontsize=12) plt.ylabel('Error',fontsize=12) plt.show() if verbose: print('Iterations = ' + str(iterations)) print('Cost Function Value = '+ str(cost_function(a, b, x_values, y_values))) print('a = ' + str(a) + ' and b = ' + str(b)) return (actual_cost, a, b)

线性回归的梯度下降算法不选择y截距参数我遵循Andrew Ng Coursera的机器学习教程，尝试在Python中实现梯度下降算法。我对y-intercept参数有问题，因为它看起来不是最佳值。这是我的密码： # IMPORTS import pandas as pd import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline # Acquiring Data # Source: https://github.com/mattnedrich/GradientDescentExample data = pd.read_csv('data.csv') def cost_function(a, b, x_values, y_values): ''' Calculates the square mean error for a given dataset with (x,y) pairs and the model y' = a + bx a: y-intercept for the model b: slope of the curve x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) total_error = sum([((a + b * x_values[i]) - y_values[i])**2 for i in range(data_len)]) return total_error / (2 * float(data_len)) def a_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'a' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) a_gradient = sum([((a + b * x_values[i]) - y_values[i]) for i in range(data_len)]) return a_gradient / float(data_len) def b_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'b' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) b_gradient = sum([(((a + b * x_values[i]) - y_values[i]) * x_values[i]) for i in range(data_len)]) return b_gradient / float(data_len) def gradient_descent_step(a_current, b_current, x_values, y_values, alpha): ''' Give a step in direction of the minimum of the cost_function using the 'a' and 'b' gradiants. Return new values for 'a' and 'b'. a_current, b_current: the current values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' new_a = a_current - alpha * a_gradient(a_current, b_current, x_values, y_values) new_b = b_current - alpha * b_gradient(a_current, b_current, x_values, y_values) return (new_a, new_b) def run_gradient_descent(a, b, x_values, y_values, alpha, precision, plot=False, verbose=False): ''' Runs the gradient_descent_step function and updates (a,b) until the value of the cost function varies less than 'precision'. a, b: initial values for the point a and b in the cost_function x_values, y_values: points (x,y) of the dataset alpha: learning rate for the algorithm precision: value for the algorithm to stop calculation ''' iterations = 0 delta_cost = cost_function(a, b, x_values, y_values) error_list = [delta_cost] iteration_list = [0] # The loop runs until the delta_cost reaches the precision defined # When the variation in cost_function is small it means that the # the function is near its minimum and the parameters 'a' and 'b' # are a good guess for modeling the dataset. while delta_cost > precision: iterations += 1 iteration_list.append(iterations) # Calculates the initial error with current a,b values prev_cost = cost_function(a, b, x_values, y_values) # Calculates new values for a and b a, b = gradient_descent_step(a, b, x_values, y_values, alpha) # Updates the value of the error actual_cost = cost_function(a, b, x_values, y_values) error_list.append(actual_cost) # Calculates the difference between previous and actual error values. delta_cost = prev_cost - actual_cost # Plot the error in each iteration to see how it decreases # and some information about our final results if plot: plt.plot(iteration_list, error_list, '-') plt.title('Error Minimization') plt.xlabel('Iteration',fontsize=12) plt.ylabel('Error',fontsize=12) plt.show() if verbose: print('Iterations = ' + str(iterations)) print('Cost Function Value = '+ str(cost_function(a, b, x_values, y_values))) print('a = ' + str(a) + ' and b = ' + str(b)) return (actual_cost, a, b)

python algorithm

线性回归的梯度下降算法不选择y截距参数我遵循Andrew Ng Coursera的机器学习教程，尝试在Python中实现梯度下降算法。我对y-intercept参数有问题，因为它看起来不是最佳值。这是我的密码： # IMPORTS import pandas as pd import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline # Acquiring Data # Source: https://github.com/mattnedrich/GradientDescentExample data = pd.read_csv('data.csv') def cost_function(a, b, x_values, y_values): ''' Calculates the square mean error for a given dataset with (x,y) pairs and the model y' = a + bx a: y-intercept for the model b: slope of the curve x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) total_error = sum([((a + b * x_values[i]) - y_values[i])**2 for i in range(data_len)]) return total_error / (2 * float(data_len)) def a_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'a' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) a_gradient = sum([((a + b * x_values[i]) - y_values[i]) for i in range(data_len)]) return a_gradient / float(data_len) def b_gradient(a, b, x_values, y_values): ''' Partial derivative of the cost_function with respect to 'b' a, b: values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' data_len = len(x_values) b_gradient = sum([(((a + b * x_values[i]) - y_values[i]) * x_values[i]) for i in range(data_len)]) return b_gradient / float(data_len) def gradient_descent_step(a_current, b_current, x_values, y_values, alpha): ''' Give a step in direction of the minimum of the cost_function using the 'a' and 'b' gradiants. Return new values for 'a' and 'b'. a_current, b_current: the current values for 'a' and 'b' x_values, y_values: points (x,y) of the dataset ''' new_a = a_current - alpha * a_gradient(a_current, b_current, x_values, y_values) new_b = b_current - alpha * b_gradient(a_current, b_current, x_values, y_values) return (new_a, new_b) def run_gradient_descent(a, b, x_values, y_values, alpha, precision, plot=False, verbose=False): ''' Runs the gradient_descent_step function and updates (a,b) until the value of the cost function varies less than 'precision'. a, b: initial values for the point a and b in the cost_function x_values, y_values: points (x,y) of the dataset alpha: learning rate for the algorithm precision: value for the algorithm to stop calculation ''' iterations = 0 delta_cost = cost_function(a, b, x_values, y_values) error_list = [delta_cost] iteration_list = [0] # The loop runs until the delta_cost reaches the precision defined # When the variation in cost_function is small it means that the # the function is near its minimum and the parameters 'a' and 'b' # are a good guess for modeling the dataset. while delta_cost > precision: iterations += 1 iteration_list.append(iterations) # Calculates the initial error with current a,b values prev_cost = cost_function(a, b, x_values, y_values) # Calculates new values for a and b a, b = gradient_descent_step(a, b, x_values, y_values, alpha) # Updates the value of the error actual_cost = cost_function(a, b, x_values, y_values) error_list.append(actual_cost) # Calculates the difference between previous and actual error values. delta_cost = prev_cost - actual_cost # Plot the error in each iteration to see how it decreases # and some information about our final results if plot: plt.plot(iteration_list, error_list, '-') plt.title('Error Minimization') plt.xlabel('Iteration',fontsize=12) plt.ylabel('Error',fontsize=12) plt.show() if verbose: print('Iterations = ' + str(iterations)) print('Cost Function Value = '+ str(cost_function(a, b, x_values, y_values))) print('a = ' + str(a) + ' and b = ' + str(b)) return (actual_cost, a, b),python,algorithm,data-science,gradient-descent,Python,Algorithm,Data Science,Gradient Descent,当我使用以下命令运行算法时： run_gradient_descent(0, 0, data['x'], data['y'], 0.0001, 0.01) 我得到（a=0.0496688656535，b=1.47825808018）但“a”的最佳值约为7.9（尝试了另一个用于线性回归的资源）此外，如果我更改参数“a”的初始猜测，算法只需尝试调整参数“b” 例如，如果我设置a=200，b=0 run_gradient_descent(200, 0, data['x'], data['y'],

当我使用以下命令运行算法时：

run_gradient_descent(0, 0, data['x'], data['y'], 0.0001, 0.01)

我得到（a=0.0496688656535，b=1.47825808018）

但“a”的最佳值约为7.9（尝试了另一个用于线性回归的资源）

此外，如果我更改参数“a”的初始猜测，算法只需尝试调整参数“b”

例如，如果我设置a=200，b=0

run_gradient_descent(200, 0, data['x'], data['y'], 0.0001, 0.01)

我得到（a=199.933763331和b=2.44824996193）

我没有发现代码有任何错误，我意识到问题在于

参数的初始猜测。请参见上面我自己的答案，其中我定义了一个帮助函数，以获得搜索初始

guess的一些值的范围。

梯度下降并不保证找到全局最优值。您找到全局最优值的机会取决于您的起始值。为了得到参数的真实值，首先我解决了保证全局最小值的最小二乘问题

data = pd.read_csv('data.csv',header=-1)

x,y = data[0],data[1]

from scipy.stats import linregress

linregress(x,y)

这导致了以下统计数据：

LinregressResult(slope=1.32243102275536, intercept=7.9910209822703848, rvalue=0.77372849988782377, pvalue=3.855655536990139e-21, stderr=0.109377979589804)

因此

b=1.32243102275536

和

a=7.9910209822703848

。有鉴于此，使用您的代码，我使用随机起始值

和

多次解决了这个问题：

a,b = np.random.rand()*10,np.random.rand()*10

print("Initial values of parameters: ")

print("a=%f\tb=%f" % (a,b))

run_gradient_descent(a, b,x,y,1e-4,1e-2)

以下是我得到的解决方案：

Initial values of parameters: 
a=6.100305  b=2.606448

Iterations = 21
Cost Function Value = 55.2093808263
a = 6.07601889437 and b = 1.36310312751

因此，无法接近最小值的原因似乎是因为选择了初始参数值。您自己也会看到它，如果您将从最小二乘法获得的

和

放入梯度下降算法，它将只迭代一次并保持不变

不知何故，在某个点上，

delta_cost>precision

是

True

，它停在那里，认为这是一个局部最优值。如果降低

精度

，并且运行足够长的时间，则可能会找到全局最优值。

我的梯度下降实现的完整代码可以在我的Github存储库中找到：

考虑到@relay所说的梯度下降算法不能保证找到全局极小值，我试图提出一个辅助函数，以限制在特定搜索范围内对参数

的猜测，如下所示：

def search_range(x, y, plot=False):
    '''
    Given a dataset with points (x, y) searches for a best guess for 
    initial values of 'a'.
    '''
    data_lenght = len(x)             # Total size of of the dataset
    q_lenght = int(data_lenght / 4)  # Size of a quartile of the dataset

    # Finding the max and min value for y in the first quartile
    min_Q1 = (x[0], y[0])
    max_Q1 = (x[0], y[0])

    for i in range(q_lenght):
        temp_point = (x[i], y[i])
        if temp_point[1] < min_Q1[1]:
            min_Q1 = temp_point
        if temp_point[1] > max_Q1[1]:
            max_Q1 = temp_point

    # Finding the max and min value for y in the 4th quartile
    min_Q4 = (x[data_lenght - 1], y[data_lenght - 1])
    max_Q4 = (x[data_lenght - 1], y[data_lenght - 1])

    for i in range(data_lenght - 1, data_lenght - q_lenght, -1):
        temp_point = (x[i], y[i])
        if temp_point[1] < min_Q4[1]:
            min_Q4 = temp_point
        if temp_point[1] > max_Q4[1]:
            max_Q4 = temp_point

    mean_Q4 = (((min_Q4[0] + max_Q4[0]) / 2), ((min_Q4[1] + max_Q4[1]) / 2))

    # Finding max_y and min_y given the points found above
    # Two lines need to be defined, L1 and L2.
    # L1 will pass through min_Q1 and mean_Q4
    # L2 will pass through max_Q1 and mean_Q4

    # Calculatin slope for L1 and L2 given m = Delta(y) / Delta (x)
    slope_L1 = (min_Q1[1] - mean_Q4[1]) / (min_Q1[0] - mean_Q4[0])
    slope_L2 = (max_Q1[1] - mean_Q4[1]) / (max_Q1[0] -mean_Q4[0])

    # Calculating y-intercepts for L1 and L2 given line equation in the form y = mx + b
    # Float numbers are converted to int because they will be used as range for itaration
    y_L1 = int(min_Q1[1] - min_Q1[0] * slope_L1)
    y_L2 = int(max_Q1[1] - max_Q1[0] * slope_L2)

    # Ploting L1 and L2
    if plot:
        L1 = [(y_L1 + slope_L1 * x) for x in data['x']]
        L2 = [(y_L2 + slope_L2 * x) for x in data['x']]

        plt.plot(data['x'], data['y'], '.')
        plt.plot(data['x'], L1, '-', color='r') 
        plt.plot(data['x'], L2, '-', color='r') 
        plt.title('Scatterplot of Sample Data')
        plt.xlabel('x',fontsize=12)
        plt.ylabel('y',fontsize=12)
        plt.show()

    return y_L1, y_L2

运行代码

运行搜索梯度下降（数据['x']，数据['y']，0.0001，0.001，verbose=True）

我有：

成本函数=55.1294483959
a=8.0259599606和b=1.3209768383

为了进行比较，使用scipy.stats返回的线性回归

a=7.99102098227和b=1.32243102276

我正在考虑实现一些东西来测试

和

的一些初始值。根据我的记忆，这个例子中的cost_函数只有一个全局极小值，所以不可能陷入另一个局部极小值。我认为代码中有错误，但我还看不出来。谢谢。我改变了答案，请看上面。你的解决方案有效吗？我在这里试过，也遇到了同样的问题。我认为在导数之前加上这个常数不会改变解。谢谢。我建议你检查什么是导数，试着自己取误差函数的导数。这些不仅仅是一些随机常数。有错误的导数会改变你的结果。不要这么生气。我知道什么是导数，2是从哪里来的。在写代码之前，我用纸和笔做了导数。如果你看到我的成本函数，你会注意到它被除以2，这抵消了a和b的导数中的2。我尝试了你的解决方案，因为也许我遗漏了一些东西，但效果是一样的。优化过程中没有行为变化，y截距也没有适当变化以收敛到正确的值。你试过密码了吗？对你有用吗？请在这里发布你的代码和结果。Tks！我在处理和你一样的问题，而且。。。我被困在同一个台阶上。我用我的输入尝试了你的程序：令人惊讶的是，我的程序和你的程序有相同的输出。除了代码相关的问题，我们可能忘记了一些东西。如果你有办法让我联系你，我们可能会一起解决这个问题。我没有找到一种方法在这里直接向你发送消息。我也不愿意在这里透露我的电子邮件：D你有什么建议吗？谢谢

def run_search_gradient_descent(x_values, y_values, alpha, precision, verbose=False):
    '''
    Runs the gradient_descent_step function and updates (a,b) until
    the value of the cost function varies less than 'precision'.

    x_values, y_values: points (x,y) of the dataset
    alpha: learning rate for the algorithm
    precision: value for the algorithm to stop calculation
    '''    
    from math import inf

    a1, a2 = search_range(x_values, y_values)

    best_guess = [inf, 0, 0]

    for a in range(a1, a2):

        cost, linear_coef, slope = run_gradient_descent(a, 0, x_values, y_values, alpha, precision)

        # Saving value for cost_function and parameters (a,b)        
        if cost < best_guess[0]:
            best_guess = [cost, linear_coef, slope]
    if verbose:        
        print('Cost Function = ' + str(best_guess[0]))
        print('a = ' + str(best_guess[1]) + ' and b = ' + str(best_guess[2]))

    return (best_guess[0], best_guess[1], best_guess[2])

[algorithm]相关文章推荐

随机文章推荐