Python 如何从输出LSTM中提取预测

Python 如何从输出LSTM中提取预测,python,tensorflow,keras,lstm,recurrent-neural-network,Python,Tensorflow,Keras,Lstm,Recurrent Neural Network,我用LSTM建立了一个模型来预测情绪。 该模型的准确率超过80%。 但当我试图预测一个外部值时。model.predict()不能预测。它只是提供了一个空数组的感觉 模型如下 import numpy as np import pandas as pd import warnings warnings.filterwarnings("ignore") from keras.utils import to_categorical from keras.preprocessing.text imp

我用LSTM建立了一个模型来预测情绪。 该模型的准确率超过80%。 但当我试图预测一个外部值时。model.predict()不能预测。它只是提供了一个空数组的感觉

模型如下

import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense,Embedding,LSTM
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report


# Load Data
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/analysis.csv')
test = pd.read_csv('/content/drive/My Drive/Colab Notebooks/analysis.csv')
pd.set_option('display.max_colwidth', -1)

seed = 101 
np.random.seed(seed)

X = df['ride_review']
temp = test['ride_review']
y = to_categorical(df['sentiment'])
num_classes = df['sentiment'].nunique()

# Spilt Train Test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,stratify=y,random_state=seed)
#print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Tokenize Text
max_features = 15000
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(X_train))
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
temp = tokenizer.texts_to_sequences(temp)

max_words = 50 
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)
temp = sequence.pad_sequences(temp, maxlen=max_words)
#print(X_train.shape,X_test.shape)


batch_size = 128
epochs = 7
def get_model(max_features, embed_dim, embedding_matrix):
    np.random.seed(seed)
    K.clear_session()
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X_train.shape[1],
                       weights=[embedding_matrix]))#,trainable=False
    model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    return model

def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')

def get_embed_mat(EMBEDDING_FILE, max_features=20000):
    # word vectors
    embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(EMBEDDING_FILE, encoding='utf8'))
    print('Found %s word vectors.' % len(embeddings_index))

    # embedding matrix
    word_index = tokenizer.word_index
    num_words = min(max_features, len(word_index) + 1)
    all_embs = np.stack(embeddings_index.values()) #for random init
    embedding_matrix = np.random.normal(all_embs.mean(), all_embs.std(), 
                                        (num_words, embed_dim))
    for word, i in word_index.items():
        if i >= max_features:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    max_features = embedding_matrix.shape[0]

    return max_features, embedding_matrix

# embedding matrix
EMBEDDING_FILE = '/content/drive/My Drive/Colab Notebooks/Unwanted/glove.twitter.27B.200d.txt'
embed_dim = 100 #word vector dim
max_features, embedding_matrix = get_embed_mat(EMBEDDING_FILE)

# train the model
model = get_model(max_features, embed_dim, embedding_matrix)
model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=epochs, batch_size=batch_size, verbose=2)

sub = pd.read_csv('/content/drive/My Drive/Colab Notebooks/analysis.csv')
sub['Prediction Sentiment '] =  model.predict_classes(temp, batch_size=batch_size, verbose=0)
sub.to_csv("/content/drive/My Drive/Colab Notebooks/predict.csv", index=False)
这是我用来预测外部值的代码:

a=['completed', 'running', 'new', 'york', 'marathon', 'requested', 'pool', 'ride', 'back', 'hotel']
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
tk = Tokenizer()
tk.fit_on_texts(a)
index_list = tk.texts_to_sequences(a)
a = pad_sequences(index_list, maxlen=50)
sentiment = model.predict(a)
print(sentiment)
结果如下:

[[0.9001644  0.09983556]
 [0.8839435  0.11605652]
 [0.9005757  0.09942431]
 [0.9305595  0.06944045]
 [0.85847026 0.14152978]
 [0.8978375  0.10216247]
 [0.93535316 0.06464689]
 [0.9622155  0.03778455]
 [0.7891844  0.2108156 ]
 [0.9265106  0.07348941]]

这意味着什么?我怎样才能得到对情绪的预测

您需要使用
dict
存储实际标签:

act_labels = {val: ind for ind, val in enumerate(df['sentiment'].unique())}
df['sentiment'] = df['sentiment'].replace(act_labels)
y = to_categorical(df['sentiment'])
要获得预测,您需要执行以下操作:

rev_act_labels = {val: key for key, val in act_labels.items()}
predictions = np.argmax(sentiment, axis=1)
act_predictions  = [rev_act_labels[val] for val in predictions]
print(act_predictions)
rev_act_labels = {val: key for key, val in act_labels.items()}
sentiment = model.predict_classes(a)
# show the inputs and predicted outputs
for i in range(len(sentiment)):
    print("X=%s, Predicted=%s" % (a[i], rev_act_labels[sentiment[i])])
如果
model.predict\u class
有效,则可以尝试以下操作:

rev_act_labels = {val: key for key, val in act_labels.items()}
predictions = np.argmax(sentiment, axis=1)
act_predictions  = [rev_act_labels[val] for val in predictions]
print(act_predictions)
rev_act_labels = {val: key for key, val in act_labels.items()}
sentiment = model.predict_classes(a)
# show the inputs and predicted outputs
for i in range(len(sentiment)):
    print("X=%s, Predicted=%s" % (a[i], rev_act_labels[sentiment[i])])

您需要使用
dict
存储实际标签:

act_labels = {val: ind for ind, val in enumerate(df['sentiment'].unique())}
df['sentiment'] = df['sentiment'].replace(act_labels)
y = to_categorical(df['sentiment'])
要获得预测,您需要执行以下操作:

rev_act_labels = {val: key for key, val in act_labels.items()}
predictions = np.argmax(sentiment, axis=1)
act_predictions  = [rev_act_labels[val] for val in predictions]
print(act_predictions)
rev_act_labels = {val: key for key, val in act_labels.items()}
sentiment = model.predict_classes(a)
# show the inputs and predicted outputs
for i in range(len(sentiment)):
    print("X=%s, Predicted=%s" % (a[i], rev_act_labels[sentiment[i])])
如果
model.predict\u class
有效,则可以尝试以下操作:

rev_act_labels = {val: key for key, val in act_labels.items()}
predictions = np.argmax(sentiment, axis=1)
act_predictions  = [rev_act_labels[val] for val in predictions]
print(act_predictions)
rev_act_labels = {val: key for key, val in act_labels.items()}
sentiment = model.predict_classes(a)
# show the inputs and predicted outputs
for i in range(len(sentiment)):
    print("X=%s, Predicted=%s" % (a[i], rev_act_labels[sentiment[i])])

这是模型的输出。预测类X=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1],预测=0对于ng.argmax:array([0,0,0,0,0,0,0,0,0,0])
np argmax(情绪,轴=1)
从数组中给出预测值
[0.9479169 0.05208309][0.94631827 0.05368178]…
,是否希望获得实际情绪,如
积极或消极
是。我需要实际情绪为积极或消极。我已更新解决方案,如果您面临任何问题,请告诉我这是模型的输出。预测=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1],预测=0对于ng.argmax:array([0,0,0,0,0,0,0,0,0,0,0,0])
np.argmax(情绪,轴=1)
从数组中给出预测值
[[0.9479169 0.05208309][0.94631827 0.05368178]…
,您想获得实际情绪,如
积极或消极
是的。我需要实际情绪,如积极或消极。我已更新解决方案,如果您面临任何问题,请告诉我