Deep learning PyTorch不会使初始权重随机
我创建了一个神经网络,它获取两个灰度图像14x14像素,描绘一个数字(来自MNIST数据库),如果第一个数字小于或等于第二个数字,则返回1,否则返回0代码运行,但每次初始权重都相同。它们应该是随机的 通过在Net类中使用下面的代码行强制初始权重为随机,这没有帮助Deep learning PyTorch不会使初始权重随机,deep-learning,neural-network,pytorch,windows-10,Deep Learning,Neural Network,Pytorch,Windows 10,我创建了一个神经网络,它获取两个灰度图像14x14像素,描绘一个数字(来自MNIST数据库),如果第一个数字小于或等于第二个数字,则返回1,否则返回0代码运行,但每次初始权重都相同。它们应该是随机的 通过在Net类中使用下面的代码行强制初始权重为随机,这没有帮助 torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01) 以下是“main.py”文件的代码: 下面是“dlc_practical_monogue.py”的代码: 同样
torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01)
以下是“main.py”文件的代码:
下面是“dlc_practical_monogue.py”的代码:
同样在Linux上,我总是得到相同的初始权重
请帮我一个忙好吗?如果我在这里说错了,请纠正我,但每次运行时,只有第一层的重量应该相同。问题是,当您导入
dlc\u practical\u monogue.py时,其中包含以下内容:
if args.seed >= 0:
torch.manual_seed(args.seed)
如果种子大于等于0(默认值为0),则会激发。
这应仅初始化第一层,使其在每次运行时具有相同的权重。检查情况是否如此。解决方案是从“dlv\u practical\u prologue.py”中删除以下行:
我无法重现这种行为。无论是否使用nn.init
行,每次实例化新的Net()
时,我都会获得不同的初始权重。在Windows 10上运行时,我总是获得相同的初始权重。同样在Linux上,我总是得到相同的权重。但我刚刚意识到,在Linux上,初始权重与Windows 10不同。如果我在Net类之前写入print(torch.rand(1)),我总是得到相同的print值,这意味着随机种子总是相同的。奇怪不幸的是,每次我运行程序时,每层的权重都是相同的。我通过在main的开头添加torch.manual_seed(time.time())解决了这个问题,但是在我必须完成的作业中,我不允许使用库“time”,所以即使您从dlc_practical_monogue.py中删除这个“``if args.seed>=0:torch.manual_seed(args.seed)``,并且如果您没有包含任何种子操作,每次跑步的重量都一样吗?有趣的是,我删除了``if args.seed>=0:torch.manual_seed(args.seed)``现在它工作得很好。谢谢!
import os; os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
from torchvision import datasets
import argparse
import os
import urllib
######################################################################
parser = argparse.ArgumentParser(description='DLC prologue file for practical sessions.')
parser.add_argument('--full',
action='store_true', default=False,
help = 'Use the full set, can take ages (default False)')
parser.add_argument('--tiny',
action='store_true', default=False,
help = 'Use a very small set for quick checks (default False)')
parser.add_argument('--seed',
type = int, default = 0,
help = 'Random seed (default 0, < 0 is no seeding)')
parser.add_argument('--cifar',
action='store_true', default=False,
help = 'Use the CIFAR data-set and not MNIST (default False)')
parser.add_argument('--data_dir',
type = str, default = None,
help = 'Where are the PyTorch data located (default $PYTORCH_DATA_DIR or \'./data\')')
# Timur's fix
parser.add_argument('-f', '--file',
help = 'quick hack for jupyter')
args = parser.parse_args()
if args.seed >= 0:
torch.manual_seed(args.seed)
######################################################################
# The data
def convert_to_one_hot_labels(input, target):
tmp = input.new_zeros(target.size(0), target.max() + 1)
tmp.scatter_(1, target.view(-1, 1), 1.0)
return tmp
def load_data(cifar = None, one_hot_labels = False, normalize = False, flatten = True):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
if args.cifar or (cifar is not None and cifar):
print('* Using CIFAR')
cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)
train_input = torch.from_numpy(cifar_train_set.data)
train_input = train_input.transpose(3, 1).transpose(2, 3).float()
train_target = torch.tensor(cifar_train_set.targets, dtype = torch.int64)
test_input = torch.from_numpy(cifar_test_set.data).float()
test_input = test_input.transpose(3, 1).transpose(2, 3).float()
test_target = torch.tensor(cifar_test_set.targets, dtype = torch.int64)
else:
print('* Using MNIST')
######################################################################
# import torchvision
# raw_folder = data_dir + '/mnist/raw/'
# resources = [
# ("https://fleuret.org/dlc/data/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
# ("https://fleuret.org/dlc/data/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
# ("https://fleuret.org/dlc/data/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
# ("https://fleuret.org/dlc/data/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
# ]
# os.makedirs(raw_folder, exist_ok=True)
# # download files
# for url, md5 in resources:
# filename = url.rpartition('/')[2]
# torchvision.datasets.utils.download_and_extract_archive(url, download_root=raw_folder, filename=filename, md5=md5)
######################################################################
mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
train_input = mnist_train_set.data.view(-1, 1, 28, 28).float()
train_target = mnist_train_set.targets
test_input = mnist_test_set.data.view(-1, 1, 28, 28).float()
test_target = mnist_test_set.targets
if flatten:
train_input = train_input.clone().reshape(train_input.size(0), -1)
test_input = test_input.clone().reshape(test_input.size(0), -1)
if args.full:
if args.tiny:
raise ValueError('Cannot have both --full and --tiny')
else:
if args.tiny:
print('** Reduce the data-set to the tiny setup')
train_input = train_input.narrow(0, 0, 500)
train_target = train_target.narrow(0, 0, 500)
test_input = test_input.narrow(0, 0, 100)
test_target = test_target.narrow(0, 0, 100)
else:
print('** Reduce the data-set (use --full for the full thing)')
train_input = train_input.narrow(0, 0, 1000)
train_target = train_target.narrow(0, 0, 1000)
test_input = test_input.narrow(0, 0, 1000)
test_target = test_target.narrow(0, 0, 1000)
print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))
if one_hot_labels:
train_target = convert_to_one_hot_labels(train_input, train_target)
test_target = convert_to_one_hot_labels(test_input, test_target)
if normalize:
mu, std = train_input.mean(), train_input.std()
train_input.sub_(mu).div_(std)
test_input.sub_(mu).div_(std)
return train_input, train_target, test_input, test_target
######################################################################
def mnist_to_pairs(nb, input, target):
input = torch.functional.F.avg_pool2d(input, kernel_size = 2)
a = torch.randperm(input.size(0))
a = a[:2 * nb].view(nb, 2)
input = torch.cat((input[a[:, 0]], input[a[:, 1]]), 1)
classes = target[a]
target = (classes[:, 0] <= classes[:, 1]).long()
return input, target, classes
######################################################################
def generate_pair_sets(nb):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
train_input = train_set.data.view(-1, 1, 28, 28).float()
train_target = train_set.targets
test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
test_input = test_set.data.view(-1, 1, 28, 28).float()
test_target = test_set.targets
return mnist_to_pairs(nb, train_input, train_target) + \
mnist_to_pairs(nb, test_input, test_target)
######################################################################
import os; os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
if args.seed >= 0:
torch.manual_seed(args.seed)
if args.seed >= 0:
torch.manual_seed(args.seed)