Machine learning 除非使用PyTorch clone（），否则列表未正确填充_Machine Learning_Deep Learning_Pytorch

Machine learning 除非使用PyTorch clone（），否则列表未正确填充

machine-learning deep-learning pytorch

Machine learning 除非使用PyTorch clone（），否则列表未正确填充,machine-learning,deep-learning,pytorch,Machine Learning,Deep Learning,Pytorch,我尝试使用以下代码将每个训练模型的最终权重添加到列表中： %reset -f import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms import torch.u

我尝试使用以下代码将每个训练模型的最终权重添加到列表中：

%reset -f

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn


trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])

root = './data'
if not os.path.exists(root):
    os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)

batch_size = 64

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
shuffle=True)

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 2)
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

num_epochs = 2
random_sample_size = 200

values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]

print(len(values_0_or_1))
print(len(values_0_or_1_testset))

train_loader_subset = torch.utils.data.DataLoader(
                 dataset=values_0_or_1,
                 batch_size=batch_size,
                 shuffle=True)

test_loader_subset = torch.utils.data.DataLoader(
                 dataset=values_0_or_1_testset,
                 batch_size=batch_size,
                 shuffle=False)

train_loader = train_loader_subset

# Hyper-parameters 
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1

model = NeuralNet().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

N = len(train_loader)
# Train the model
total_step = len(train_loader)

most_recent_prediction = []
test_actual_predicted_dict = {}

rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)

weights_without_clone = []
weights_with_clone = []

for i in range(0 , 2) : 
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader_subset):  
            # Move tensors to the configured device
            images = images.reshape(-1, 2).to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if (epoch) % print_progress_every_n_epochs == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))


    print('model fc2 weights ' , model.fc2.weight.data)
    weights_without_clone.append(model.fc2.weight.data)
    weights_with_clone.append(model.fc2.weight.data.clone())

模型输出：

12665
2115
Epoch [1/2], Step [50/198], Loss: 0.0968
Epoch [2/2], Step [50/198], Loss: 0.0082
model fc2 weights  tensor([[-3.9507e-02, -4.0454e-02,  3.5576e-03,  ...,  6.2181e-03,
          4.1372e-02, -6.2960e-03],
        [ 1.8778e-02,  2.7049e-02, -3.5624e-02,  ...,  2.6797e-02,
          2.2041e-03, -4.2284e-02],
        [ 1.9571e-02, -3.2545e-02,  2.6618e-02,  ..., -1.6139e-02,
          4.1192e-02, -2.3458e-02],
        ...,
        [-4.6123e-03,  2.6943e-02,  3.9979e-02,  ..., -3.3848e-02,
          3.6096e-02,  2.4211e-02],
        [-1.4698e-02,  9.7528e-04, -2.5244e-03,  ..., -3.3145e-02,
          1.0888e-02,  3.1091e-02],
        [-1.7451e-02, -2.1646e-02,  2.5885e-02,  ...,  4.0453e-02,
         -6.5324e-03, -3.5410e-02]])
Epoch [1/2], Step [50/198], Loss: 0.0025
Epoch [2/2], Step [50/198], Loss: 0.0013
model fc2 weights  tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])

在不克隆的情况下打印

权重\u的值

：

print(weights_without_clone[0])
print(weights_without_clone[1])

print(weights_with_clone[0])
print(weights_with_clone[1])

产出：

tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])

tensor([[-3.9507e-02, -4.0454e-02,  3.5576e-03,  ...,  6.2181e-03,
          4.1372e-02, -6.2960e-03],
        [ 1.8778e-02,  2.7049e-02, -3.5624e-02,  ...,  2.6797e-02,
          2.2041e-03, -4.2284e-02],
        [ 1.9571e-02, -3.2545e-02,  2.6618e-02,  ..., -1.6139e-02,
          4.1192e-02, -2.3458e-02],
        ...,
        [-4.6123e-03,  2.6943e-02,  3.9979e-02,  ..., -3.3848e-02,
          3.6096e-02,  2.4211e-02],
        [-1.4698e-02,  9.7528e-04, -2.5244e-03,  ..., -3.3145e-02,
          1.0888e-02,  3.1091e-02],
        [-1.7451e-02, -2.1646e-02,  2.5885e-02,  ...,  4.0453e-02,
         -6.5324e-03, -3.5410e-02]])
tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])

使用克隆打印权重值：

print(weights_without_clone[0])
print(weights_without_clone[1])

print(weights_with_clone[0])
print(weights_with_clone[1])

产出：

tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])
tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])

tensor([[-3.9507e-02, -4.0454e-02,  3.5576e-03,  ...,  6.2181e-03,
          4.1372e-02, -6.2960e-03],
        [ 1.8778e-02,  2.7049e-02, -3.5624e-02,  ...,  2.6797e-02,
          2.2041e-03, -4.2284e-02],
        [ 1.9571e-02, -3.2545e-02,  2.6618e-02,  ..., -1.6139e-02,
          4.1192e-02, -2.3458e-02],
        ...,
        [-4.6123e-03,  2.6943e-02,  3.9979e-02,  ..., -3.3848e-02,
          3.6096e-02,  2.4211e-02],
        [-1.4698e-02,  9.7528e-04, -2.5244e-03,  ..., -3.3145e-02,
          1.0888e-02,  3.1091e-02],
        [-1.7451e-02, -2.1646e-02,  2.5885e-02,  ...,  4.0453e-02,
         -6.5324e-03, -3.5410e-02]])
tensor(1.00000e-02 *
       [[-3.9891, -4.0454,  0.3558,  ...,  0.7168,  4.1902, -0.6253],
        [ 1.8766,  2.7049, -3.5632,  ...,  2.6785,  0.2192, -4.2297],
        [ 2.1426, -3.2545,  2.6621,  ..., -1.6285,  4.1196, -2.2653],
        ...,
        [-0.4930,  2.6943,  3.9971,  ..., -3.2940,  3.6641,  2.4248],
        [-1.5160,  0.0975, -0.2524,  ..., -3.1938,  1.1753,  3.1065],
        [-1.8116, -2.1646,  2.5883,  ...,  4.1355, -0.5921, -3.5416]])

为什么将

1.00000e-02*

前置到第二个型号的最终重量值

为什么需要使用

clone（）

为每个迭代添加最终权重，而忽略

clone（）

则为每个迭代添加相同的权重

weights_without_clone.append(model.fc2.weight.data)
weights_with_clone.append(model.fc2.weight.data.clone())

首先，我将复制你的案例。我将使用非常简单的模型：

代码：

导入火炬
导入torch.nn作为nn
将torch.optim导入为optim
手电筒。手动种子（42）
#一些虚拟数据：
X=torch.randn（100，5，需要_grad=True，dtype=torch.float）
Y=torch.randn（100，5，需要_grad=True，dtype=torch.float）
类模型（nn.Module）：
定义初始化（自）：
super（）。\uuuu init\uuuuu（）
self.fc1=nn.线性（5，5，偏差=假）
self.relu=nn.relu（）
self.fc2=nn.线性（5，5，偏差=假）
def前进（自身，x）：
x=自身.fc1（x）
x=自相关（x）
x=自身.fc2（x）
返回x
def序列（型号，x，y，损耗，优化器，n个历元=1000，打印损耗=True）：
权重=[]
对于范围内的i（n_时代）：
y_hat=型号（x）
损失=损失（y，y）
optimizer.zero_grad（）
loss.backward（）
如果打印丢失：
打印（f'{i+1}}丢失：{Loss.item（）：.4f}'）
optimizer.step（）
打印（'W:\n'，model.fc2.weight.data）
weights.append（model.fc2.weight.data）
回程重量
手电筒。手动种子（42）
模型=模型（）
损失_fn=nn.MSELoss（）
optimizer=torch.optim.SGD（model.parameters（），lr=0.01）
n_时代=2
重量=列车（型号=型号，
x=x，
y=y，
损失=损失，
优化器=优化器，
n_时代=n_时代，
打印（丢失=真）

输出：

| 1 |损失：1.0285
W:
张量（[-0.2052，-0.1257，-0.2684,0.0425，-0.4413]，
[ 0.4034, -0.3797,  0.3448,  0.0741, -0.1450],
[ 0.2759,  0.0695,  0.3608,  0.0487, -0.1411],
[ 0.1201, -0.1213,  0.1881,  0.3990,  0.2583],
[-0.1956,  0.2581,  0.0798,  0.2270, -0.2725]])
|2 |损失：1.0279
W:
张量（[-0.2041，-0.1251，-0.2679,0.0428，-0.4410]，
[ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
[ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
[ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
[-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])

好的，效果很好。现在让我们看一下权重：

代码：

print（*权重，sep='\n'）

输出：

张量（[-0.2041，-0.1251，-0.2679,0.0428，-0.4410]，
[ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
[ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
[ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
[-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])
张量（[-0.2041，-0.1251，-0.2679,0.0428，-0.4410]，
[ 0.4030, -0.3795,  0.3444,  0.0738, -0.1447],
[ 0.2755,  0.0693,  0.3603,  0.0484, -0.1411],
[ 0.1200, -0.1213,  0.1879,  0.3987,  0.2580],
[-0.1958,  0.2580,  0.0796,  0.2269, -0.2725]])

好吧，这不是我们想要的，但实际上这是预期的行为。如果再次查看，您将看到列表中的值对应于第二个历元中的权重值。这意味着我们并没有添加新的张量，而是添加了指向真实权重存储的赋值，这就是为什么我们得到了相同的最终结果

换句话说，当使用常规附加时，您将获得相同的值，因为渐变仍然传播到原始权重张量。附加的“权重张量”指向在backprop期间更改的模型中的相同张量

这就是为什么您需要使用

clone

来创建一个新的张量，但是建议使用
tensor.clone（）.detach（）
，而
clone
被记录到计算图中，这意味着如果您通过这个克隆的张量反向推进
传播到克隆张量的梯度将传播到原始张量
因此，如果您想安全地附加权重，请使用以下方法：

weights.append（model.fc2.weight.data.clone（）.detach（））