Deep learning Pytorch Optimizer不';t更新参数

Deep learning Pytorch Optimizer不';t更新参数,deep-learning,pytorch,backpropagation,quantization,Deep Learning,Pytorch,Backpropagation,Quantization,我制作了我的定制模型AlexNetQIL(Alexnet带有QIL层) “QIL”表示量化学习间隔 我训练了我的模型,损失值并没有减少,我发现我的模型中的参数并没有更新,因为我添加了QIL层 我附上了我的代码AlexNetQil和Kil 请有人告诉我我的密码有什么问题 AlexNetQIL import torch import torch.nn as nn from qil import * class AlexNetQIL(nn.Module): #def __init__(se

我制作了我的定制模型AlexNetQIL(Alexnet带有QIL层) “QIL”表示量化学习间隔

  • 我训练了我的模型,损失值并没有减少,我发现我的模型中的参数并没有更新,因为我添加了QIL层

  • 我附上了我的代码AlexNetQil和Kil 请有人告诉我我的密码有什么问题

  • AlexNetQIL

    import torch
    import torch.nn as nn
    from qil import *
    
    class AlexNetQIL(nn.Module):
    
        #def __init__(self, num_classes=1000): for imagenet
        def __init__(self, num_classes=10): # for cifar-10
            super(AlexNetQIL, self).__init__()
    
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
            self.bn1 = nn.BatchNorm2d(64)
            self.relu2 = nn.ReLU(inplace=True)
            self.maxpool1 = nn.MaxPool2d(kernel_size=2)
    
            self.qil2 = Qil()
            self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
            self.bn2 = nn.BatchNorm2d(192)
            self.relu2 = nn.ReLU(inplace=True)
            self.maxpool2 = nn.MaxPool2d(kernel_size=2)
    
            self.qil3 = Qil()
            self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
            self.bn3 = nn.BatchNorm2d(384)
            self.relu3 = nn.ReLU(inplace=True)
    
            self.qil4 = Qil()
            self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
            self.bn4 = nn.BatchNorm2d(256)
            self.relu4 = nn.ReLU(inplace=True)
    
            self.qil5 = Qil()
            self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
            self.bn5 = nn.BatchNorm2d(256)
            self.relu5 = nn.ReLU(inplace=True)
            self.maxpool5 = nn.MaxPool2d(kernel_size=2)
    
            self.classifier = nn.Sequential(
                nn.Linear(256 * 2 * 2, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )
        def forward(self,x,inference = False):
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.relu2(x)
            x = self.maxpool1(x)
    
            x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem 
            x = self.conv2(x)
            x = self.bn2(x)
            x = self.relu2(x)
            x = self.maxpool2(x)
    
            x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem 
            x = self.conv3(x)
            x = self.bn3(x)
            x = self.relu3(x)
    
            x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem 
            x = self.conv4(x)
            x = self.bn4(x)
            x = self.relu4(x)
    
            x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem 
            x = self.conv5(x)
            x = self.bn5(x)
            x = self.relu5(x)
            x = self.maxpool5(x)
            x = x.view(x.size(0),256 * 2 * 2)
            x = self.classifier(x)
            return x
    
    
    QIL

    import torch
    import torch.nn as nn
    from qil import *
    
    class AlexNetQIL(nn.Module):
    
        #def __init__(self, num_classes=1000): for imagenet
        def __init__(self, num_classes=10): # for cifar-10
            super(AlexNetQIL, self).__init__()
    
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
            self.bn1 = nn.BatchNorm2d(64)
            self.relu2 = nn.ReLU(inplace=True)
            self.maxpool1 = nn.MaxPool2d(kernel_size=2)
    
            self.qil2 = Qil()
            self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
            self.bn2 = nn.BatchNorm2d(192)
            self.relu2 = nn.ReLU(inplace=True)
            self.maxpool2 = nn.MaxPool2d(kernel_size=2)
    
            self.qil3 = Qil()
            self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
            self.bn3 = nn.BatchNorm2d(384)
            self.relu3 = nn.ReLU(inplace=True)
    
            self.qil4 = Qil()
            self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
            self.bn4 = nn.BatchNorm2d(256)
            self.relu4 = nn.ReLU(inplace=True)
    
            self.qil5 = Qil()
            self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
            self.bn5 = nn.BatchNorm2d(256)
            self.relu5 = nn.ReLU(inplace=True)
            self.maxpool5 = nn.MaxPool2d(kernel_size=2)
    
            self.classifier = nn.Sequential(
                nn.Linear(256 * 2 * 2, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )
        def forward(self,x,inference = False):
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.relu2(x)
            x = self.maxpool1(x)
    
            x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem 
            x = self.conv2(x)
            x = self.bn2(x)
            x = self.relu2(x)
            x = self.maxpool2(x)
    
            x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem 
            x = self.conv3(x)
            x = self.bn3(x)
            x = self.relu3(x)
    
            x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem 
            x = self.conv4(x)
            x = self.bn4(x)
            x = self.relu4(x)
    
            x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem 
            x = self.conv5(x)
            x = self.bn5(x)
            x = self.relu5(x)
            x = self.maxpool5(x)
            x = x.view(x.size(0),256 * 2 * 2)
            x = self.classifier(x)
            return x
    
    
    前进

    • 通过两个步骤量化权重和输入激活
    • 变压器(参数)->离散化器(参数)
    导入火炬
    导入torch.nn作为nn
    将numpy作为np导入
    导入副本
    #Qil(量化区间学习)
    类别Qil(nn.模块):
    离散化水平=32
    定义初始化(自):
    超级(齐尔,自我)。\uuuu初始
    self.cw=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
    self.dw=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
    self.cx=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
    self.dx=nn.Parameter(torch.rand(1))#我必须训练这个间隔参数
    self.gamma=nn.Parameter(火炬张量(1.0))#我必须训练这个变压器参数
    self.a=齐尔离散化水平
    def forward(自身、x、权重、推断=False):
    如果不是推断:
    重量=自身变压器重量(重量)
    重量=自离散化器(重量)
    x=自变压器激活(x)
    x=自离散化器(x)
    返回火炬nn.参数(x),火炬nn.参数(重量)
    def变压器重量(自身重量):
    设备=重量。设备
    aw,bw=(0.5/self.dw),(-0.5*self.cw/self.dw+0.5)
    重量=火炬,其中(abs(重量)self.cw+self.dw,
    weights.sign(),weights)
    重量=火炬,其中((abs(重量)>=self.cw-self.dw)和(abs(重量)self.cx+self.dx,
    火炬张量(1.0).至(装置),x)
    
    x=torch.where((abs(x)>=self.cx-self.dx)&(abs(x)你是怎么训练的?你是说loss.backward吗?你确定
    .round
    是可微的吗?是的,如果我删除使用.round的离散化函数,它会工作……但是我需要使用.round函数来量化我的参数。