Python 具有相同体系结构的Keras和Pytorch代码之间的差异

Python 具有相同体系结构的Keras和Pytorch代码之间的差异,python,keras,pytorch,Python,Keras,Pytorch,我正在构建一个自动编码器模型,我在Keras和PyTorch中发现了两个具有相同架构模型的代码片段。但当我运行它时,即使它们使用相同的体系结构,时间也会有很大的差异。你能解释一下为什么我会在时间和表现上遇到如此巨大的差异吗 PyTorch代码 class Encoder(nn.Module): def __init__(self, seq_len, n_features, embedding_dim=64): super(Encoder, self).__init__(

我正在构建一个自动编码器模型,我在Keras和PyTorch中发现了两个具有相同架构模型的代码片段。但当我运行它时,即使它们使用相同的体系结构,时间也会有很大的差异。你能解释一下为什么我会在时间和表现上遇到如此巨大的差异吗

PyTorch代码

class Encoder(nn.Module):

    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(Encoder, self).__init__()

        self.seq_len, self.n_features = seq_len, n_features
        self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim

        self.rnn1 = nn.LSTM(
            input_size=n_features,
            hidden_size=self.hidden_dim,
            num_layers=1,
            batch_first=True
        )
        
        self.rnn2 = nn.LSTM(
            input_size=self.hidden_dim,
            hidden_size=embedding_dim,
            num_layers=1,
            batch_first=True
        )

    def forward(self, x):
        x = x.reshape((1, self.seq_len, self.n_features))

        x, (_, _) = self.rnn1(x)
        x, (hidden_n, _) = self.rnn2(x)

        return hidden_n.reshape((self.n_features, self.embedding_dim))
    
class Decoder(nn.Module):

    def __init__(self, seq_len, input_dim=64, n_features=1):
        super(Decoder, self).__init__()

        self.seq_len, self.input_dim = seq_len, input_dim
        self.hidden_dim, self.n_features = 2 * input_dim, n_features

        self.rnn1 = nn.LSTM(
            input_size=input_dim,
            hidden_size=input_dim,
            num_layers=1,
            batch_first=True
        )

        self.rnn2 = nn.LSTM(
            input_size=input_dim,
            hidden_size=self.hidden_dim,
            num_layers=1,
            batch_first=True
        )

        self.output_layer = nn.Linear(self.hidden_dim, n_features)

    def forward(self, x):
        x = x.repeat(self.seq_len, self.n_features)
        x = x.reshape((self.n_features, self.seq_len, self.input_dim))

        x, (hidden_n, cell_n) = self.rnn1(x)
        x, (hidden_n, cell_n) = self.rnn2(x)
        x = x.reshape((self.seq_len, self.hidden_dim))

        return self.output_layer(x)

class RecurrentAutoencoder(nn.Module):

    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(RecurrentAutoencoder, self).__init__()

        self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
        self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)

        return x
    
    
model = RecurrentAutoencoder(seq_len, n_features, 128)


import time
time_dict = {}
def train_model(model, train_dataset, val_dataset, n_epochs):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.L1Loss(reduction='sum').to(device)
    history = dict(train=[], val=[])

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10000.0
    time_dict[0] = time.time()
    for epoch in range(1, n_epochs + 1):

        model = model.train()

        train_losses = []
        for seq_true in train_dataset:
            optimizer.zero_grad()

            seq_true = seq_true.to(device)
            seq_pred = model(seq_true)

            loss = criterion(seq_pred, seq_true)

            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())

        val_losses = []
        model = model.eval()
        with torch.no_grad():
            for seq_true in val_dataset:

                seq_true = seq_true.to(device)
                seq_pred = model(seq_true)

                loss = criterion(seq_pred, seq_true)
                val_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)

        history['train'].append(train_loss)
        history['val'].append(val_loss)

        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())

        print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
        stop = time.time()
        time_dict[epoch] = stop

    model.load_state_dict(best_model_wts)
    
    return model.eval(), history

model, history = train_model(
  model,
  train_dataset, 
  val_dataset, 
  n_epochs=10)
打印输出(model.summary())

图层(类型)输出形状参数#
lstm_6(lstm)(无,1256)266240
lstm_7(lstm)(无,1128)197120
lstm_8(lstm)(无,1256)394240
时间分布(时间分布(无,1,3)771


这两种模型的体系结构看起来相同,但性能和培训时间有所不同。

请添加一些详细信息:每个框架的总体性能(时间和结果)以及您使用的软件包版本。请添加一些详细信息:总体性能如何(时间和结果)以及您正在使用的包的版本。
model.add(keras.layers.LSTM(
    units=256, 
    input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(units=128, return_sequences=True))

model.add(keras.layers.LSTM(units=256, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
model.compile(loss='mae', optimizer='adam')
nb_epoch = 10

autoencoder.compile(optimizer='adam', 
                    loss='mean_squared_error')



tensorboard = TensorBoard(log_dir='/tmp/logs',
                          histogram_freq=0,
                          write_graph=True,       #to visualize
                          write_images=True)

history = autoencoder.fit(X_train, X_train,
                    epochs=nb_epoch,
                    shuffle=True,
                    validation_data=(X_validate, X_validate),
                    verbose=1,
                    callbacks=[tensorboard]).history