Machine learning 将预测测试标签添加到PyTorch LSTM中的原始测试数据帧

Machine learning 将预测测试标签添加到PyTorch LSTM中的原始测试数据帧,machine-learning,pytorch,lstm,recurrent-neural-network,Machine Learning,Pytorch,Lstm,Recurrent Neural Network,我已经在PyTorch中对文本数据运行了一个LSTM模型。我的原始数据帧(测试和训练)包含3列。下面是我的测试数据框架的末尾,我将其放入了我的模型中 TEXT ICD_25000 HADM_ID 23995 s brother diabetes deceased brother mastoid ca... 0 115229.0 23996 x to be used with insulin pen three times a da... 0 170587


    TEXT    ICD_25000   HADM_ID
23995   s brother diabetes deceased brother mastoid ca...   0   115229.0
23996   x to be used with insulin pen three times a da...   0   170587.0
23997   lung biopsy in borderline diabetes diagnosed y...   0   174893.0
23998   have nv she has been unable to eat today past ...   0   108008.0
23999   one brother had a stroke at age several member...   0   151301.0


fields = [('TEXT', TEXT), ('ICD_25000', LABEL)]

train_df = data.TabularDataset(
    path="window_train_with_HADM.csv", format='csv',
    skip_header=True, fields=fields)

test_df = data.TabularDataset(
    path="window_test_with_HADM.csv", format='csv',
    skip_header=True, fields=fields)

TEXT.build_vocab(train_df, max_size=VOCABULARY_SIZE)

print(f'Vocabulary size: {len(TEXT.vocab)}')
print(f'Number of classes: {len(LABEL.vocab)}')

def compute_binary_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(data_loader):
            text, text_lengths = batch_data.TEXT
            logits = model(text, text_lengths.cpu())
            predicted_labels = (torch.sigmoid(logits) > 0.5).long()
            num_examples += batch_data.ICD_25000.size(0)
            correct_pred += (predicted_labels.long() == batch_data.ICD_25000.long()).sum()
        return correct_pred.float()/num_examples * 100




import pandas as pd
d = {'Review': [1,0,0,0,1,1,1,0,1], 'Text': ['This movies rocks', 'I hate this movie', "what a bad movie",'This movie was not good','Amazing movie!', 'This was a good film', 'I enjoyed watching this movie','Not the best','Super interesting movie'], 'ID':[1,2,3,4,5,6,7,8,9]}
df = pd.DataFrame(data=d)

# make training and testing data 

train_df = df.sample(frac=0.8,random_state=1234) #random state is a seed value
test_df = df.drop(train_df.index).copy()

train.to_csv('window_train_with_IDs.csv', index=False)
test.to_csv('window_test_with_IDs.csv', index=False)

def compute_binary_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(data_loader):
            text, text_lengths = batch_data.TEXT
            logits = model(text, text_lengths.cpu())
            predicted_labels = (torch.sigmoid(logits) > 0.5).long()
            num_examples += batch_data.ICD_25000.size(0)
            correct_pred += (predicted_labels.long() == batch_data.ICD_25000.long()).sum()
        return correct_pred.float()/num_examples * 100

import pandas as pd
d = {'Review': [1,0,0,0,1,1,1,0,1], 'Text': ['This movies rocks', 'I hate this movie', "what a bad movie",'This movie was not good','Amazing movie!', 'This was a good film', 'I enjoyed watching this movie','Not the best','Super interesting movie'], 'ID':[1,2,3,4,5,6,7,8,9]}
df = pd.DataFrame(data=d)

# make training and testing data 

train_df = df.sample(frac=0.8,random_state=1234) #random state is a seed value
test_df = df.drop(train_df.index).copy()

train.to_csv('window_train_with_IDs.csv', index=False)
test.to_csv('window_test_with_IDs.csv', index=False)