Python 如何计算Pytork中关键点检测CNN模型的精度?

Python 如何计算Pytork中关键点检测CNN模型的精度?,python,deep-learning,pytorch,Python,Deep Learning,Pytorch,有人能帮我吗 def train_net(n_epochs): valid_loss_min = np.Inf history = {'train_loss': [], 'valid_loss': [], 'epoch': []} for epoch in range(n_epochs): train_loss = 0.0 valid_loss = 0.0 net.train() runni

有人能帮我吗

def train_net(n_epochs):
    valid_loss_min = np.Inf    
    history = {'train_loss': [], 'valid_loss': [], 'epoch': []}

    for epoch in range(n_epochs):  
        train_loss = 0.0
        valid_loss = 0.0  
        net.train()
        running_loss = 0.0
        for batch_i, data in enumerate(train_loader):
            images = data['image']
            key_pts = data['keypoints']
            key_pts = key_pts.view(key_pts.size(0), -1)
            key_pts = key_pts.type(torch.FloatTensor).to(device)
            images = images.type(torch.FloatTensor).to(device)
            output_pts = net(images)
            loss = criterion(output_pts, key_pts)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*images.data.size(0)      
        net.eval() 

        with torch.no_grad():
            for batch_i, data in enumerate(test_loader):
                images = data['image']
                key_pts = data['keypoints']
                key_pts = key_pts.view(key_pts.size(0), -1)
                key_pts = key_pts.type(torch.FloatTensor).to(device)
                images = images.type(torch.FloatTensor).to(device)
                output_pts = net(images)
                loss = criterion(output_pts, key_pts)          
                valid_loss += loss.item()*images.data.size(0) 
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(test_loader.dataset) 
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1,train_loss,valid_loss))

        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))    
            torch.save(net,f'X:\\xxxx\\xxx\\xxx\\epoch{epoch + 1}_loss{valid_loss}.pth')
            valid_loss_min = valid_loss
        history['epoch'].append(epoch + 1)
        history['train_loss'].append(train_loss)
        history['valid_loss'].append(valid_loss)
    print('Finished Training')
    return history
'''

Above is the training code for reference!
def系列网络(n个时代):
有效损失最小值=np.Inf
历史={'train_loss':[],'valid_loss':[],'epoch':[]
对于范围内的历元(n_历元):
列车损耗=0.0
有效损失=0.0
net.train()
运行损耗=0.0
对于批次i,枚举(列装载机)中的数据:
图像=数据['image']
关键点=数据[“关键点”]
关键点=关键点视图(关键点大小(0),-1)
按键=按键类型(火炬、浮子、张量)至(装置)
images=images.type(torch.FloatTensor).to(设备)
输出=净(图像)
损耗=标准(输出点、关键点)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss+=loss.item()*images.data.size(0)
净增值()
使用手电筒。无梯度()
对于批次i,枚举(测试加载程序)中的数据:
图像=数据['image']
关键点=数据[“关键点”]
关键点=关键点视图(关键点大小(0),-1)
按键=按键类型(火炬、浮子、张量)至(装置)
images=images.type(torch.FloatTensor).to(设备)
输出=净(图像)
损耗=标准(输出点、关键点)
有效的_loss+=loss.item()*images.data.size(0)
列车损失=列车损失/长度(列车装载机数据集)
有效的丢失=有效的丢失/len(test\u loader.dataset)
打印('Epoch:{}\t训练损耗:{:.6f}'。格式(Epoch+1,训练损耗,有效损耗))

如果有效损失可能与欧几里德距离有关: 真关键点:(x,y) 预测关键点:(x,y) 距离d:sqrt((x_u-x)^2+(y_-y)^2)。 从中你必须得到一个百分比。如果d==0,则该关键点的准确率为100%。但是0%是什么呢?我想说的是从真实关键点到图像角落的距离,这是离关键点最远的距离。我们称之为距离R。所以你的点的精度是d/R。对每个关键点都这样做,然后取平均值。
我刚刚想到了这个,所以它可能有一些缺陷,但我想你可以用它来检查它是否是适合你的解决方案。

这很有趣,我几分钟前就在做这个!正如您可能意识到的,简单地计算两组关键点之间的欧几里德距离并不能很好地推广到需要比较不同体型和大小的情况。因此,我建议使用对象关键点相似性评分,该评分通过人的尺度来衡量身体关节的距离。如图所示,OKS的定义如下:

(第313行函数
computeOKS
)是Facebook research的实现:


快速提示:您应该提供有关数据外观的更具体信息(特别是关键点,基本上是CNN模型的输出)。另外,请指定CNN模型的输出。
def computeOks(self, imgId, catId):
    p = self.params
    # dimention here should be Nxm
    gts = self._gts[imgId, catId]
    dts = self._dts[imgId, catId]
    inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
    dts = [dts[i] for i in inds]
    if len(dts) > p.maxDets[-1]:
        dts = dts[0:p.maxDets[-1]]
    # if len(gts) == 0 and len(dts) == 0:
    if len(gts) == 0 or len(dts) == 0:
        return []
    ious = np.zeros((len(dts), len(gts)))
    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
    vars = (sigmas * 2)**2
    k = len(sigmas)
    # compute oks between each detection and ground truth object
    for j, gt in enumerate(gts):
        # create bounds for ignore regions(double the gt bbox)
        g = np.array(gt['keypoints'])
        xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
        k1 = np.count_nonzero(vg > 0)
        bb = gt['bbox']
        x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
        y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
        for i, dt in enumerate(dts):
            d = np.array(dt['keypoints'])
            xd = d[0::3]; yd = d[1::3]
            if k1>0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                z = np.zeros((k))
                dx = np.max((z, x0-xd), axis=0) + np.max((z, xd-x1), axis=0)
                dy = np.max((z, y0-yd), axis=0) + np.max((z, yd-y1), axis=0)
            e = (dx**2 + dy**2) / vars / (gt['area'] + np.spacing(1)) / 2
            if k1 > 0:
                e=e[vg > 0]
            ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
    return ious