Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/lua/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在Dataloader类之外的pytorch中创建数据预处理管道?_Pytorch_Conv Neural Network_Pipeline_Torch_Pytorch Dataloader - Fatal编程技术网

如何在Dataloader类之外的pytorch中创建数据预处理管道?

如何在Dataloader类之外的pytorch中创建数据预处理管道?,pytorch,conv-neural-network,pipeline,torch,pytorch-dataloader,Pytorch,Conv Neural Network,Pipeline,Torch,Pytorch Dataloader,我正在尝试建立一个包含40个特征的数据模型,这些特征必须分为10类。我是PyTorch的新手,这是我的第一个项目 我得到一个自定义数据集类(,我不允许更改该类),如下所示: class MyData(Dataset): def _init_(self, mode): with open(mode+'.pkl', 'rb') as handle: data = pickle.load(handle) self.X = data

我正在尝试建立一个包含40个特征的数据模型,这些特征必须分为10类。我是PyTorch的新手,这是我的第一个项目

我得到一个自定义数据集类(,我不允许更改该类),如下所示:

class MyData(Dataset):
    def _init_(self, mode):
        with open(mode+'.pkl', 'rb') as handle:
            data = pickle.load(handle)
            self.X = data['x'].astype('float')
            self.y = data['y'].astype('long')

    def _len_(self):
        return len(self.X)

    def _getitem_(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = (self.X[idx], self.y[idx])

        return sample
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data.double())
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
    save_model(model)

def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in val_loader:
      output = model(data.double())
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(val_loader.dataset)
  test_losses.append(test_loss)
  
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        #self.flatten=nn.Flatten()

        self.net_stack=nn.Sequential(
            nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(256, affine=True),
            nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(128, affine=True),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(64, affine=True),
            nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(32, affine=True),
            nn.Flatten(),
            nn.Linear(32, 10),
            nn.Softmax(dim=1)).double()

    def forward(self,x):
        # result=self.net_stack(x[None])
        x=x.double()
        result=self.net_stack(x[:, :, None]).double()
        print(result.size())
        return result
我对数据做了一些预处理,比如归一化,然后训练并保存模型。由于不允许我更改dataset类,我在dataset类之外进行了更改,然后使用了
DataLoader
方法。预处理如下:

train_data=MyData("train")
features, labels = train_data[:]

df = pd.DataFrame(features)

x = df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)

input_array = x_scaled
output_array = labels

inputs = torch.Tensor(input_array)
targets = torch.Tensor(output_array).type(torch.LongTensor)
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [3300, 300])


batch_size = 300
n_epochs = 200
log_interval = 10
train_losses = []
train_counter = []
test_losses = []

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
在此之后,我将培训和测试功能定义如下(并删除打印语句,因为如果我这样做,自动签名者将无法为我的作业评分):

class MyData(Dataset):
    def _init_(self, mode):
        with open(mode+'.pkl', 'rb') as handle:
            data = pickle.load(handle)
            self.X = data['x'].astype('float')
            self.y = data['y'].astype('long')

    def _len_(self):
        return len(self.X)

    def _getitem_(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = (self.X[idx], self.y[idx])

        return sample
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data.double())
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
    save_model(model)

def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in val_loader:
      output = model(data.double())
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(val_loader.dataset)
  test_losses.append(test_loss)
  
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        #self.flatten=nn.Flatten()

        self.net_stack=nn.Sequential(
            nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(256, affine=True),
            nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(128, affine=True),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(64, affine=True),
            nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(32, affine=True),
            nn.Flatten(),
            nn.Linear(32, 10),
            nn.Softmax(dim=1)).double()

    def forward(self,x):
        # result=self.net_stack(x[None])
        x=x.double()
        result=self.net_stack(x[:, :, None]).double()
        print(result.size())
        return result
即使这样做了,自动签名器仍然无法为我的代码评分。我主要认为这是因为我可能在如何将数据输入模型时犯了错误,但我无法缩小到问题的确切范围以及如何纠正它。由于我是pytorch的新手,我正在研究如何进行预处理,但所有这些都涉及Dataset类,因此我不确定如何进行预处理

我的模型如下:

class MyData(Dataset):
    def _init_(self, mode):
        with open(mode+'.pkl', 'rb') as handle:
            data = pickle.load(handle)
            self.X = data['x'].astype('float')
            self.y = data['y'].astype('long')

    def _len_(self):
        return len(self.X)

    def _getitem_(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = (self.X[idx], self.y[idx])

        return sample
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data.double())
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
    save_model(model)

def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in val_loader:
      output = model(data.double())
      test_loss += criterion(output, target).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(val_loader.dataset)
  test_losses.append(test_loss)
  
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        #self.flatten=nn.Flatten()

        self.net_stack=nn.Sequential(
            nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(256, affine=True),
            nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(128, affine=True),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(64, affine=True),
            nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=1),
            nn.Dropout(p=0.1),
            nn.BatchNorm1d(32, affine=True),
            nn.Flatten(),
            nn.Linear(32, 10),
            nn.Softmax(dim=1)).double()

    def forward(self,x):
        # result=self.net_stack(x[None])
        x=x.double()
        result=self.net_stack(x[:, :, None]).double()
        print(result.size())
        return result
我得到的一个指示是他们写了:

# Please make sure we can load your model with:
# model = MyModel()
# This means you must give default values to all parameters you may wish to set, such as output size.

你可以试着在训练循环中做到这一点

对于枚举(列加载器)中的批处理idx(数据,目标):
#你可以在这里做一些事情来操纵你的输入
数据=转换(数据)
data.to(‘cuda’)#转到gpu,我注意到您在培训循环中没有这样做
#前传
输出=模型(数据)