Pytorch 索引器:索引39092超出大小为39092的轴0的界限;I';我正在努力训练一种多标签分类法

Pytorch 索引器:索引39092超出大小为39092的轴0的界限;I';我正在努力训练一种多标签分类法,pytorch,multilabel-classification,Pytorch,Multilabel Classification,我认为问题是由我从csv文件加载数据的方式引起的,但我不知道如何修复它 以下是我的列车csv文件的一小部分: (我从第1列到最后使用了15个标签) 错误如下所示: 代码==> import csv import os import pandas as pd import numpy as np from PIL import Image import torch class FashionData(torch.utils.data.Dataset): def __init__(se

我认为问题是由我从csv文件加载数据的方式引起的,但我不知道如何修复它

以下是我的列车csv文件的一小部分: (我从第1列到最后使用了15个标签)

错误如下所示:

代码==>

import csv
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch

class FashionData(torch.utils.data.Dataset):
    def __init__(self, csv_file, mode='train', transform=None):
        self.mode=mode
        #label(img1) = [0, 0, 0, 1], label(img3) = [1, 0, 1, 0], ...),
        self.transform = transform


        self.data_info = pd.read_csv(csv_file, header=None)
        #print(self.data_info)
        
        
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[1:, 0])
        if mode !='test':
            self.label_arr = np.asarray(self.data_info.iloc[1:, 2:]) # columns 1 to N
            self.label_arr=self.label_arr.astype('float32')

        
        # Calculate len
        self.data_len = len(self.data_info.index)
    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        img_as_img = Image.open(single_image_name)
        
        if self.transform is not None:
            img_as_img = self.transform(img_as_img)
        if self.mode=='test':
            return img_as_img

        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]
        #single_image_label = torch.from_numpy(self.label_arr[index]).float()
        #img = torch.from_numpy(img).float().to(device)
        #label = torch.tensor(int(self.labels[index]))

        return (img_as_img, single_image_label)

    def __len__(self):
        return self.data_len

transforms_train = transforms.Compose([transforms.Resize((224,224)),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
transforms_test = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
dataset_train = FashionData('./deep_fashion/train.csv', mode='train', transform=transforms_train)
dataset_val = FashionData('./deep_fashion/val.csv', mode='val', transform=transforms_test)
dataset_test = FashionData('./deep_fashion/test.csv', mode='test', transform=transforms_test)

from torch.utils.data import DataLoader

train_loader = DataLoader(dataset_train, batch_size=128, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=128, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size=128, shuffle=False)

model=models.resnet50(pretrained=True)
for params in model.parameters():
  params.requires_grad=False
model.fc=nn.Sequential(
    nn.Linear(2048,15),
    nn.Sigmoid()
    )

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=model.to(device)
print(model)

criterion=nn.BCELoss()
#criterion=nn.BCEWithLogitsLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)
criterion=criterion.to(device)

def train(train_loader,model,criterion,optimizer):
  model.train()
  loss_list=[]
  total_count=0
  acc_count=0
  
  for x,y in train_loader:
    
    x=x.to(device)
    y=y.to(device)

    optimizer.zero_grad()
    output=model(x)
    
    loss=criterion(output,y)
    loss.backward()
    optimizer.step()
    
    #_,predicted=torch.max(output,1)
    predicted=(output>0.5).float()
    total_count+=y.size(0)
    acc_count+=(predicted==y).sum().item()
    
    loss_list.append(loss.item())
  acc=acc_count/total_count
  loss=sum(loss_list)/len(loss_list)
  return acc, loss

def val(valid_loader,model,criterion):
  model.eval()
  loss_list=[]
  total_count=0
  acc_count=0
  with torch.no_grad():
    for x,y in valid_loader:
      x=x.to(device)
      
      y=y.to(device)
      
      output=model(x)
      loss=criterion(output,y)
      #_,predicted=torch.max(output,1)
      predicted=(output>0.5).float()
    
      total_count+=y.size(0)
      acc_count+=(predicted==y).sum().item()
      loss_list.append(loss.item())

  acc=acc_count/total_count
  loss=sum(loss_list)/len(loss_list)
  return acc, loss

train_acc_list = []
train_loss_list = []
val_acc_list = []
val_loss_list = []

for epoch in range(10):
    train_acc, train_loss = train(train_loader, model, criterion, optimizer)
    val_acc, val_loss=val(val_loader, model, criterion)
    train_acc_list.append(train_acc)
    train_loss_list.append(train_loss)
    val_acc_list.append(val_acc)
    val_loss_list.append(val_loss)
    print('epoch',epoch)     
    print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
    print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))

我在加载数据部分做错了吗?还是另一个问题

11111111111111 11111111111111111
11111111111111

您的
\uuuu len\uuu
仅比实际数据大1,因为您加载的df的
头=无

只需将
的最后一行更改为
self.data\u len=len(self.image\u arr)
。这将以最小的更改解决您的问题

(或者,在加载df时设置
header=True
,在这种情况下,您必须将
iloc[1:,…]
更改为
iloc[:,…]
,因为您不再需要跳过第一行。)