Deep learning Pytorch Optimizer不';t更新参数
我制作了我的定制模型AlexNetQIL(Alexnet带有QIL层) “QIL”表示量化学习间隔Deep learning Pytorch Optimizer不';t更新参数,deep-learning,pytorch,backpropagation,quantization,Deep Learning,Pytorch,Backpropagation,Quantization,我制作了我的定制模型AlexNetQIL(Alexnet带有QIL层) “QIL”表示量化学习间隔 我训练了我的模型,损失值并没有减少,我发现我的模型中的参数并没有更新,因为我添加了QIL层 我附上了我的代码AlexNetQil和Kil 请有人告诉我我的密码有什么问题 AlexNetQIL import torch import torch.nn as nn from qil import * class AlexNetQIL(nn.Module): #def __init__(se
import torch
import torch.nn as nn
from qil import *
class AlexNetQIL(nn.Module):
#def __init__(self, num_classes=1000): for imagenet
def __init__(self, num_classes=10): # for cifar-10
super(AlexNetQIL, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.qil2 = Qil()
self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(192)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.qil3 = Qil()
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(384)
self.relu3 = nn.ReLU(inplace=True)
self.qil4 = Qil()
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.qil5 = Qil()
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.maxpool5 = nn.MaxPool2d(kernel_size=2)
self.classifier = nn.Sequential(
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self,x,inference = False):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu2(x)
x = self.maxpool1(x)
x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.maxpool5(x)
x = x.view(x.size(0),256 * 2 * 2)
x = self.classifier(x)
return x
QIL
import torch
import torch.nn as nn
from qil import *
class AlexNetQIL(nn.Module):
#def __init__(self, num_classes=1000): for imagenet
def __init__(self, num_classes=10): # for cifar-10
super(AlexNetQIL, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.qil2 = Qil()
self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(192)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.qil3 = Qil()
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(384)
self.relu3 = nn.ReLU(inplace=True)
self.qil4 = Qil()
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.qil5 = Qil()
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.maxpool5 = nn.MaxPool2d(kernel_size=2)
self.classifier = nn.Sequential(
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self,x,inference = False):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu2(x)
x = self.maxpool1(x)
x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.maxpool5(x)
x = x.view(x.size(0),256 * 2 * 2)
x = self.classifier(x)
return x
前进
- 通过两个步骤量化权重和输入激活
- 变压器(参数)->离散化器(参数)
导入火炬
导入torch.nn作为nn
将numpy作为np导入
导入副本
#Qil(量化区间学习)
类别Qil(nn.模块):
离散化水平=32
定义初始化(自):
超级(齐尔,自我)。\uuuu初始
self.cw=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
self.dw=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
self.cx=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
self.dx=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
self.gamma=nn.Parameter(火炬张量(1.0))#我必须训练这个变压器参数
self.a=齐尔离散化水平
def forward(自身、x、权重、推断=False):
如果不是推断:
重量=自身变压器重量(重量)
重量=自离散化器(重量)
x=自变压器激活(x)
x=自离散化器(x)
返回火炬nn.参数(x),火炬nn.参数(重量)
def变压器重量(自身重量):
设备=重量。设备
aw,bw=(0.5/self.dw),(-0.5*self.cw/self.dw+0.5)
重量=火炬,其中(abs(重量)self.cw+self.dw,
weights.sign(),weights)
重量=火炬,其中((abs(重量)>=self.cw-self.dw)和(abs(重量)self.cx+self.dx,
火炬张量(1.0).至(装置),x)
x=torch.where((abs(x)>=self.cx-self.dx)&(abs(x)你是怎么训练的?你是说loss.backward吗?你确定.round
是可微的吗?是的,如果我删除使用.round的离散化函数,它会工作……但是我需要使用.round函数来量化我的参数。