Python 3.x ValueError:目标大小(火炬大小([8])必须与输入大小(火炬大小([8,2])相同

Python 3.x ValueError:目标大小(火炬大小([8])必须与输入大小(火炬大小([8,2])相同,python-3.x,pytorch,bert-language-model,huggingface-transformers,bilstm,Python 3.x,Pytorch,Bert Language Model,Huggingface Transformers,Bilstm,我正在尝试使用BERT实现情绪分析(正面或负面标签)的代码,我想添加一个BiLSTM层,看看是否可以从HuggingFace提高预训练模型的准确性。我有以下代码和几个问题: import numpy as np import pandas as pd from sklearn import metrics import transformers import torch from torch.utils.data import Dataset, DataLoader, RandomSampler

我正在尝试使用BERT实现情绪分析(正面或负面标签)的代码,我想添加一个BiLSTM层,看看是否可以从HuggingFace提高预训练模型的准确性。我有以下代码和几个问题:

import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from torch import cuda
import re
import torch.nn as nn

device = 'cuda' if cuda.is_available() else 'cpu'
MAX_LEN = 200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 1e-05 #5e-5, 3e-5 or 2e-5
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class CustomDataset(Dataset):
 def __init__(self, dataframe, tokenizer, max_len):
  self.tokenizer = tokenizer
  self.data = dataframe
  self.comment_text = dataframe.review
  self.targets = self.data.sentiment
  self.max_len = max_len
 def __len__(self):
  return len(self.comment_text)
 def __getitem__(self, index):
  comment_text = str(self.comment_text[index])
  comment_text = " ".join(comment_text.split())

  inputs = self.tokenizer.encode_plus(comment_text,None,add_special_tokens=True,max_length=self.max_len,
   pad_to_max_length=True,return_token_type_ids=True)
  ids = inputs['input_ids']
  mask = inputs['attention_mask']
  token_type_ids = inputs["token_type_ids"]

  return {
   'ids': torch.tensor(ids, dtype=torch.long),
   'mask': torch.tensor(mask, dtype=torch.long),
   'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
   'targets': torch.tensor(self.targets[index], dtype=torch.float)
  }
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)
train_params = {'batch_size': TRAIN_BATCH_SIZE,'shuffle': True,'num_workers': 0}
test_params = {'batch_size': VALID_BATCH_SIZE,'shuffle': True,'num_workers': 0}
training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)


class BERTClass(torch.nn.Module):
 def __init__(self):
   super(BERTClass, self).__init__()
   self.bert = BertModel.from_pretrained('bert-base-uncased',return_dict=False, num_labels =2)
   self.lstm = nn.LSTM(768, 256, batch_first=True, bidirectional=True)
   self.linear = nn.Linear(256*2,2)

 def forward(self, ids , mask,token_type_ids):
  sequence_output, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids = token_type_ids)
  lstm_output, (h, c) = self.lstm(sequence_output)  ## extract the 1st token's embeddings
  hidden = torch.cat((lstm_output[:, -1, :256], lstm_output[:, 0, 256:]), dim=-1)
  linear_output = self.linear(lstm_output[:, -1].view(-1, 256 * 2))

  return linear_output

model = BERTClass()
model.to(device)
print(model)
def loss_fn(outputs, targets):
 return torch.nn.BCEWithLogitsLoss()(outputs, targets)
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

def train(epoch):
 model.train()
 for _, data in enumerate(training_loader, 0):
  ids = data['ids'].to(device, dtype=torch.long)
  mask = data['mask'].to(device, dtype=torch.long)
  token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
  targets = data['targets'].to(device, dtype=torch.float)
  outputs = model(ids, mask, token_type_ids)
  optimizer.zero_grad()
  loss = loss_fn(outputs, targets)
  if _ % 5000 == 0:
   print(f'Epoch: {epoch}, Loss:  {loss.item()}')
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

for epoch in range(EPOCHS):
  train(epoch)
因此,在上面的代码中,我遇到了错误:
目标大小(torch.size([8]))必须与输入大小(torch.size([8,2]))相同。
。联机检查并尝试使用
targets=targets.unsqueze(2)
,但随后出现另一个错误,我必须使用[-2,1]中的值进行unsqueze。我还尝试将损失函数修改为

def loss_fn(outputs, targets):
 return torch.nn.BCELoss()(outputs, targets)

但我仍然收到同样的错误。有人能告诉我这个问题是否有解决办法吗?或者我能做些什么使这项工作顺利进行?非常感谢。

哪一行给出了这个错误?调用loss\u fn loss=loss\u fn(输出,目标),然后调用“return torch.nn.BCEWithLogitsLoss()(输出,目标)”。我试过使用BCEloss,但也是同样的错误。因为它是一种二进制分类(分为1-正或0-负),所以最好使用BertForSequenceClassification,或者我也可以使用BertforModel?我尝试使用BertForSequenceClassification,但在前面的部分我有一些问题。请您提供建议,因为您的输出维度是2而不是1,所以在前面的案例中,
forward()
中出现的错误是什么。