关于简单Keras和Tensorflow代码性能的一个问题

关于简单Keras和Tensorflow代码性能的一个问题,tensorflow,keras,lstm,recurrent-neural-network,Tensorflow,Keras,Lstm,Recurrent Neural Network,我使用Keras和Tensorflow与LSTM编写了简单的Sin函数预测器,但发现Keras代码的性能要慢得多,大约运行5分钟,而Tensorflow代码只在20秒内运行模型。此外,Keras预测性能不如Keras预测精度高。有人能帮我找出2种型号之间的代码差异吗 我在线破解了代码,打算用相同的超参数训练模型。但表现并不像预期的那样。尝试在网上搜索许多资料,但没有找到原因 Keras代码: import numpy as np import os import sys import time

我使用Keras和Tensorflow与LSTM编写了简单的Sin函数预测器,但发现Keras代码的性能要慢得多,大约运行5分钟,而Tensorflow代码只在20秒内运行模型。此外,Keras预测性能不如Keras预测精度高。有人能帮我找出2种型号之间的代码差异吗

我在线破解了代码,打算用相同的超参数训练模型。但表现并不像预期的那样。尝试在网上搜索许多资料,但没有找到原因

Keras代码:

import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0

params = {
    "batch_size": BATCH_SIZE,  # 20<16<10, 25 was a bust
    "epochs": 500,
    "lr": 0.00010000,
    "time_steps": TIME_STEPS
}

TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20

# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)


def generate_data(seq):
    X = []
    y = []
    for i in range(len(seq) - TIME_STEPS):
        X.append([[e] for e in seq[i: i + TIME_STEPS]])
        y.append([seq[i + TIME_STEPS]])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
    0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
    test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))

x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)


def print_time(text, stime):
    seconds = (time.time()-stime)
    print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")


def create_model():
    lstm_model = Sequential()
    lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
    lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
    lstm_model.add(LSTM(HIDDEN_UNITS))
    lstm_model.add(Dense(1, activation=None))
    lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
    return lstm_model


model = create_model()

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)

mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
                      "best_model.h5"), monitor='val_loss', verbose=1,
                      save_best_only=True, save_weights_only=False, mode='min', period=1)

# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
                              verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)

history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
                    shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])

print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))

# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))

# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)

y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])

# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)

您可能应该尝试使用
CuDNNLSTM
而不是
LSTM
。它们是CUDA加速的

快速的LSTM实现


请参见此处:

您可能应该尝试使用
CuDNNLSTM
而不是
LSTM
。它们是CUDA加速的

快速的LSTM实现

请看这里:

  • 您的模型结构不同,首先是
    3
    LSTM
    ,另一层是
    2
  • Tensorflow数据API经过高度优化,可以在不浪费任何资源的情况下准备数据集
  • 注意:使用
    dynamic\n
    cell中的并行化,您可以进一步加快tensorflow中的训练速度

  • 您的模型结构不同,首先是
    3
    LSTM
    ,另一层是
    2
  • Tensorflow数据API经过高度优化,可以在不浪费任何资源的情况下准备数据集

  • 注意:使用
    dynamic\n
    cell中的并行化,您可以进一步加快tensorflow中的训练速度

    LSTM代码之前是两层的,而且比tensorflow代码花费的时间要长得多。除了层数之外,这两个代码块是否完全相同?我有点搞不懂为什么性能差异如此之大。LSTM代码之前是两层的,而且比tensorflow代码花费的时间要长得多。除了层数之外,这两个代码块是否完全相同?我有点困惑为什么性能差异如此之大。
    import numpy as np
    import tensorflow as tf
    import matplotlib.pyplot as plt
    
    
    NUM_EPOCH = 1000
    HIDDEN_SIZE = 30
    NUM_LAYERS = 2
    TIMESTEPS = 10
    TRAINING_STEPS = 10000
    BATCH_SIZE = 20
    TRAINING_EXAMPLES = 10000
    TESTING_EXAMPLES = 1000
    SAMPLE_GAP = 0.01
    
    
    def generate_data(seq):
        X = []
        y = []
        for i in range(len(seq) - TIMESTEPS):
            X.append([seq[i: i + TIMESTEPS]])
            y.append([seq[i + TIMESTEPS]])
        return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
    
    
    def lstm_model(X, y, is_training):
        cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
        outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
        output = outputs[:, -1, :]
        predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
        if not is_training:
            return predictions, None, None
        loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
        train_op = tf.contrib.layers.optimize_loss(
            loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
        return predictions, loss, train_op
    
    
    def train(sess, train_X, train_Y):
        ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
        ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
        X, y = ds.make_one_shot_iterator().get_next()
        losses = np.array([])
    
        with tf.variable_scope('model'):
            predictions, loss, train_op = lstm_model(X, y, True)
        sess.run(tf.global_variables_initializer())
        for i in range(TRAINING_STEPS):
            _, l = sess.run([train_op, loss])
            losses = np.append(losses, l)
            if i % NUM_EPOCH == 0:
                print('train step: ' + str(i) + ', loss: ' + str(l))
    
        plt.figure()
        plt.plot(losses, label='loss')
        plt.legend()
        # plt.show()
        plt.savefig('./test_outputs/loss.png')
    
    
    def run_eval(sess, test_X, test_y):
        ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
        ds = ds.batch(1)
        X, y = ds.make_one_shot_iterator().get_next()
        with tf.variable_scope('model', reuse=True):
            prediction, _, _ = lstm_model(X, [0, 0], False)
        predictions = []
        labels = []
        for i in range(int(TESTING_EXAMPLES / 2)):
            p, l = sess.run([prediction, y])
            predictions.append(p)
            labels.append(l)
    
        predictions = np.array(predictions).squeeze()
        labels = np.array(labels).squeeze()
        rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
        print('Mean Square Error is: %f' % rmse)
    
        plt.figure()
        print(predictions[:15])
        print(labels[:15])
        plt.plot(predictions, label='predictions')
        plt.plot(labels, label='real_val')
        plt.legend()
        # plt.show()
        plt.savefig('./test_outputs/test.png')
    
    
    test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
    test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
    train_X, train_y = generate_data(np.sin(np.linspace(
        0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
    test_X, test_y = generate_data(np.sin(np.linspace(
        test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
    
    x_val, test_X = np.split(test_X, 2)
    y_val, test_y = np.split(test_y, 2)
    
    with tf.Session() as sess:
        train(sess, train_X, train_y)
        run_eval(sess, test_X, test_y)