Numpy 计算时的梯度误差-pytorch

Numpy 计算时的梯度误差-pytorch,numpy,graph,neural-network,pytorch,Numpy,Graph,Neural Network,Pytorch,我正在学习使用pytorch(0.4.0)来自动计算梯度,但是我不太了解如何使用backward()和grad,因为我正在做一个练习,需要使用pytorch和 通过分析得出导数,分别返回auto_grad,user_grad,但我不太了解自动微分的用法,在代码中,我使用f.backward()和w.grad来查找df/dw,另外两个计算结果不对应,如果我甚至犯了导数错误,它遵循我正在使用的图表和我正在尝试执行的代码: 测试: 我认为你用错误的方法计算了梯度。试试这个 import numpy

我正在学习使用pytorch(0.4.0)来自动计算梯度,但是我不太了解如何使用backward()和grad,因为我正在做一个练习,需要使用pytorch和 通过分析得出导数,分别返回auto_grad,user_grad,但我不太了解自动微分的用法,在代码中,我使用f.backward()和w.grad来查找df/dw,另外两个计算结果不对应,如果我甚至犯了导数错误,它遵循我正在使用的图表和我正在尝试执行的代码:

测试:


我认为你用错误的方法计算了梯度。试试这个

import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def graph2(W_np, x_np, b_np):
    W = Variable(torch.Tensor(W_np), requires_grad=True)
    x = torch.tensor(x_np, requires_grad=True).type(torch.FloatTensor)
    b = torch.tensor(b_np, requires_grad=True).type(torch.FloatTensor)
    u = torch.matmul(W, x) + b
    g = F.sigmoid(u)
    f = torch.sum(g)
    user_grad = (sigmoid(np.matmul(W_np, x_np) + b_np)*(1 - sigmoid(np.matmul(W_np, x_np) + b_np)))*x_np.T
    f.backward(retain_graph=True)
    auto_grad = W.grad
    print("auto_grad", auto_grad) 
    print("user_grad", user_grad)
    # END YOUR CODE
    return f, auto_grad, user_grad




iterations = 1000
sizes = np.random.randint(2,10, size=(iterations))
for i in range(iterations):
    size = sizes[i]
    print("i, size", i, size)
    W_np = np.random.rand(size, size)
    x_np = np.random.rand(size, 1)
    b_np = np.random.rand(size, 1)
    f, auto_grad, user_grad = graph2(W_np, x_np, b_np)
    manual_f = np.sum(sigmoid(np.matmul(W_np, x_np) + b_np))
    assert np.isclose(f.data.numpy(), manual_f, atol=1e-4), "f not correct"
    assert np.allclose(auto_grad.numpy(), user_grad), "Gradient not correct"
iterations = 1000
sizes = np.random.randint(2,10, size=(iterations))
for i in range(iterations):
    size = sizes[i]
    W_np = np.random.rand(size, size)
    x_np = np.random.rand(size, 1)
    b_np = np.random.rand(size, 1)
    f, auto_grad, user_grad = graph2(W_np, x_np, b_np)
    manual_f = np.sum(sigmoid(np.matmul(W_np, x_np) + b_np))
    assert np.isclose(f.data.numpy(), manual_f, atol=1e-4), "f not correct"
    assert np.allclose(auto_grad.numpy(), user_grad), "Gradient not correct"
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def graph2(W_np, x_np, b_np):
    W = Variable(torch.Tensor(W_np), requires_grad=True)
    x = torch.tensor(x_np, requires_grad=True).type(torch.FloatTensor)
    b = torch.tensor(b_np, requires_grad=True).type(torch.FloatTensor)
    u = torch.matmul(W, x) + b
    g = F.sigmoid(u)
    f = torch.sum(g)
    user_grad = (sigmoid(np.matmul(W_np, x_np) + b_np)*(1 - sigmoid(np.matmul(W_np, x_np) + b_np)))*x_np.T
    f.backward(retain_graph=True)
    auto_grad = W.grad
    print("auto_grad", auto_grad) 
    print("user_grad", user_grad)
    # END YOUR CODE
    return f, auto_grad, user_grad




iterations = 1000
sizes = np.random.randint(2,10, size=(iterations))
for i in range(iterations):
    size = sizes[i]
    print("i, size", i, size)
    W_np = np.random.rand(size, size)
    x_np = np.random.rand(size, 1)
    b_np = np.random.rand(size, 1)
    f, auto_grad, user_grad = graph2(W_np, x_np, b_np)
    manual_f = np.sum(sigmoid(np.matmul(W_np, x_np) + b_np))
    assert np.isclose(f.data.numpy(), manual_f, atol=1e-4), "f not correct"
    assert np.allclose(auto_grad.numpy(), user_grad), "Gradient not correct"