Python 保存和加载后,模型的行为不同

Python 保存和加载后,模型的行为不同,python,pytorch,Python,Pytorch,我想使用torch.save()保存一个经过训练的模型以供推断。但是,使用torch.load\u state\u dict()或torch.load(),我无法获取保存的模型。加载模型计算的损失与保存模型计算的损失不同 相关库: import numpy as np import torch import torch.nn as nn from torch.autograd import Variable from torch.nn import functional as F class

我想使用
torch.save()
保存一个经过训练的模型以供推断。但是,使用
torch.load\u state\u dict()
torch.load()
,我无法获取保存的模型。加载模型计算的损失与保存模型计算的损失不同

相关库:

import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
class nn_block(nn.Module):

    def __init__(self, feats_dim):

        super(nn_block, self).__init__()

        self.linear = nn.Linear(feats_dim, feats_dim)
        self.bn = nn.BatchNorm1d(feats_dim)
        self.softplus1 = nn.Softplus()
        self.softplus2 = nn.Softplus()

    def forward(self, rep_mat):

        transformed_mat = self.linear(rep_mat)
        transformed_mat = self.bn(transformed_mat)
        transformed_mat = self.softplus1(transformed_mat)
        transformed_mat = self.softplus2(transformed_mat + rep_mat)

        return transformed_mat


class test_nn(nn.Module):

    def __init__(self, in_feats, feats_dim, num_conv, num_classes):

        super(test_nn, self).__init__()

        self.linear1 = nn.Linear(in_feats, feats_dim)
        self.convs = [nn_block(feats_dim) for _ in range(num_conv)]
        self.linear2 = nn.Linear(feats_dim, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, rep_mat):

        h = self.linear1(rep_mat)
        for conv_func in self.convs:
            h = conv_func(h)
        h = self.linear2(h)
        h = self.softmax(h)

        return h
# fake a classification task
num_classes = 2; input_dim = 8
one = np.random.multivariate_normal(np.zeros(input_dim),np.eye(input_dim),20)
two = np.random.multivariate_normal(np.ones(input_dim),np.eye(input_dim),20)
inputs = np.concatenate([one, two], axis=0)
labels = np.concatenate([np.zeros(20), np.ones(20)])

inputs = Variable(torch.Tensor(inputs))
labels = torch.LongTensor(labels)

# build a model
net = test_nn(input_dim, 5, 2, num_classes)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
net.train()

losses = []
best_score = 1e10
for epoch in range(25):

    preds = net(inputs)
    loss = F.cross_entropy(preds, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    state_dict = {'state_dict': net.state_dict()}

    if loss.item()-best_score<-1e-4: 
        # save only parameters 
        torch.save(state_dict, 'model_params.torch')
        # save the whole model 
        torch.save(net, 'whole_model.torch')
    best_score = np.min([best_score, loss.item()])

    losses.append(loss.item())

net_params = test_nn(input_dim, 5, 2, num_classes)
net_params.load_state_dict(torch.load('model_params.torch')['state_dict'])
net_params.eval()
preds_params = net_params(inputs)
loss_params = F.cross_entropy(preds_params, labels)
print('reloaded params %.4f %.4f' % (loss_params.item(), np.min(losses)))

net_whole = torch.load('whole_model.torch')
net_whole.eval()
preds_whole = net_whole(inputs)
loss_whole = F.cross_entropy(preds_whole, labels)
print('reloaded whole %.4f %.4f' % (loss_whole.item(), np.min(losses)))
模型:

import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
class nn_block(nn.Module):

    def __init__(self, feats_dim):

        super(nn_block, self).__init__()

        self.linear = nn.Linear(feats_dim, feats_dim)
        self.bn = nn.BatchNorm1d(feats_dim)
        self.softplus1 = nn.Softplus()
        self.softplus2 = nn.Softplus()

    def forward(self, rep_mat):

        transformed_mat = self.linear(rep_mat)
        transformed_mat = self.bn(transformed_mat)
        transformed_mat = self.softplus1(transformed_mat)
        transformed_mat = self.softplus2(transformed_mat + rep_mat)

        return transformed_mat


class test_nn(nn.Module):

    def __init__(self, in_feats, feats_dim, num_conv, num_classes):

        super(test_nn, self).__init__()

        self.linear1 = nn.Linear(in_feats, feats_dim)
        self.convs = [nn_block(feats_dim) for _ in range(num_conv)]
        self.linear2 = nn.Linear(feats_dim, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, rep_mat):

        h = self.linear1(rep_mat)
        for conv_func in self.convs:
            h = conv_func(h)
        h = self.linear2(h)
        h = self.softmax(h)

        return h
# fake a classification task
num_classes = 2; input_dim = 8
one = np.random.multivariate_normal(np.zeros(input_dim),np.eye(input_dim),20)
two = np.random.multivariate_normal(np.ones(input_dim),np.eye(input_dim),20)
inputs = np.concatenate([one, two], axis=0)
labels = np.concatenate([np.zeros(20), np.ones(20)])

inputs = Variable(torch.Tensor(inputs))
labels = torch.LongTensor(labels)

# build a model
net = test_nn(input_dim, 5, 2, num_classes)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
net.train()

losses = []
best_score = 1e10
for epoch in range(25):

    preds = net(inputs)
    loss = F.cross_entropy(preds, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    state_dict = {'state_dict': net.state_dict()}

    if loss.item()-best_score<-1e-4: 
        # save only parameters 
        torch.save(state_dict, 'model_params.torch')
        # save the whole model 
        torch.save(net, 'whole_model.torch')
    best_score = np.min([best_score, loss.item()])

    losses.append(loss.item())

net_params = test_nn(input_dim, 5, 2, num_classes)
net_params.load_state_dict(torch.load('model_params.torch')['state_dict'])
net_params.eval()
preds_params = net_params(inputs)
loss_params = F.cross_entropy(preds_params, labels)
print('reloaded params %.4f %.4f' % (loss_params.item(), np.min(losses)))

net_whole = torch.load('whole_model.torch')
net_whole.eval()
preds_whole = net_whole(inputs)
loss_whole = F.cross_entropy(preds_whole, labels)
print('reloaded whole %.4f %.4f' % (loss_whole.item(), np.min(losses)))
训练、保存和重新加载模型:

import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
class nn_block(nn.Module):

    def __init__(self, feats_dim):

        super(nn_block, self).__init__()

        self.linear = nn.Linear(feats_dim, feats_dim)
        self.bn = nn.BatchNorm1d(feats_dim)
        self.softplus1 = nn.Softplus()
        self.softplus2 = nn.Softplus()

    def forward(self, rep_mat):

        transformed_mat = self.linear(rep_mat)
        transformed_mat = self.bn(transformed_mat)
        transformed_mat = self.softplus1(transformed_mat)
        transformed_mat = self.softplus2(transformed_mat + rep_mat)

        return transformed_mat


class test_nn(nn.Module):

    def __init__(self, in_feats, feats_dim, num_conv, num_classes):

        super(test_nn, self).__init__()

        self.linear1 = nn.Linear(in_feats, feats_dim)
        self.convs = [nn_block(feats_dim) for _ in range(num_conv)]
        self.linear2 = nn.Linear(feats_dim, num_classes)
        self.softmax = nn.Softmax()

    def forward(self, rep_mat):

        h = self.linear1(rep_mat)
        for conv_func in self.convs:
            h = conv_func(h)
        h = self.linear2(h)
        h = self.softmax(h)

        return h
# fake a classification task
num_classes = 2; input_dim = 8
one = np.random.multivariate_normal(np.zeros(input_dim),np.eye(input_dim),20)
two = np.random.multivariate_normal(np.ones(input_dim),np.eye(input_dim),20)
inputs = np.concatenate([one, two], axis=0)
labels = np.concatenate([np.zeros(20), np.ones(20)])

inputs = Variable(torch.Tensor(inputs))
labels = torch.LongTensor(labels)

# build a model
net = test_nn(input_dim, 5, 2, num_classes)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
net.train()

losses = []
best_score = 1e10
for epoch in range(25):

    preds = net(inputs)
    loss = F.cross_entropy(preds, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    state_dict = {'state_dict': net.state_dict()}

    if loss.item()-best_score<-1e-4: 
        # save only parameters 
        torch.save(state_dict, 'model_params.torch')
        # save the whole model 
        torch.save(net, 'whole_model.torch')
    best_score = np.min([best_score, loss.item()])

    losses.append(loss.item())

net_params = test_nn(input_dim, 5, 2, num_classes)
net_params.load_state_dict(torch.load('model_params.torch')['state_dict'])
net_params.eval()
preds_params = net_params(inputs)
loss_params = F.cross_entropy(preds_params, labels)
print('reloaded params %.4f %.4f' % (loss_params.item(), np.min(losses)))

net_whole = torch.load('whole_model.torch')
net_whole.eval()
preds_whole = net_whole(inputs)
loss_whole = F.cross_entropy(preds_whole, labels)
print('reloaded whole %.4f %.4f' % (loss_whole.item(), np.min(losses)))
#伪造分类任务
num_classes=2;输入尺寸=8
一=np.随机多变量正态(np.零(输入尺寸),np.眼(输入尺寸),20)
二=np.随机多变量正态(np.一(输入尺寸),np.眼(输入尺寸),20)
输入=np。连接([1,2],轴=0)
标签=np.连接([np.零(20),np.一(20)])
输入=变量(火炬张量(输入))
标签=火炬长传感器(标签)
#建立模型
net=测试nn(输入尺寸,5,2,数量类)
optimizer=torch.optim.Adam(net.parameters(),lr=0.01)
net.train()
损失=[]
最佳成绩=1e10
对于范围内的历元(25):
preds=净(输入)
损失=F.交叉熵(预测值、标签)
optimizer.zero_grad()
loss.backward()
optimizer.step()
state_dict={'state_dict':net.state_dict()}

如果loss.item()-最佳分数状态dict包含每个参数( )以及已在模块及其所有子模块上注册的缓冲区(类似于参数,但不应进行培训/优化)。其他所有内容将不包括在该状态中

您的
test\u nn
模块使用了
convs
的列表,因此它不包括在state dict中:

self.convs=[nn_块(专长尺寸)用于范围内(num_conv)]
它们不仅不包含在状态dict中,而且对
net.parameters()
也不可见,这意味着它们根本没有经过培训/优化

要从列表中注册模块,您可以将其包装为一个类似于列表的模块,同时正确注册其包含的模块:

self.convs=nn.ModuleList([nn_block(feats_dim)用于范围内(num_conv)])
随着这一变化,两种模型产生了相同的结果

由于在for循环中顺序调用“代码> VISS < /COD>模块(一个模块的输出是下一个输入),您可以考虑使用,您可以直接调用,而不必使用for循环。排序被大量使用,它只是使它变得简单了一点,例如,如果您想用单个模块替换模块的顺序,您不需要在forward方法中更改任何内容

这两种损失不仅不同,而且也不同于最初保存的最佳模型计算的损失

培训时,计算当前输入(批次)的损失,然后根据该输入优化参数。这意味着您的参数与用于计算损失的参数不同。因为在此之后要保存模型,所以它也会有不同的损失(下一次迭代中会发生的损失)

preds=net(输入)
#计算当前模型的损耗
损失=F.交叉熵(预测值、标签)
optimizer.zero_grad()
loss.backward()
#根据损失更新模型参数
optimizer.step()
#模型更新后的状态
state_dict={'state_dict':net.state_dict()}
#比较更新之前的损失
#但是在更新之后从中保存模型

如果丢失。项()-最好的分数谢谢<代码>nn.ModuleList解决了我的问题。但是我仍然不确定为什么用最好的模型计算的损失与用重新加载的模型计算的不同。正如您在我的代码中所看到的,用于训练和测试的输入都是相同的,在每个训练阶段,所有数据都用于优化。您用作最佳分数度量的损失是在更新模型之前计算的,您保存的状态现在因更新而不同,您必须使用新状态重新计算损失。我在代码中添加了一些注释来澄清这一点。