Python 如何在TensorFlow中开发深度稀疏自动编码器成本函数?

Python 如何在TensorFlow中开发深度稀疏自动编码器成本函数?,python,tensorflow,autoencoder,Python,Tensorflow,Autoencoder,我使用Tensorflow开发了深度稀疏自动编码器成本函数,并从以下链接下载了自动编码器结构: 在简单自动编码器中,我有以下成本函数: loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2)) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) 我使用以下数学函数开发了自动编码器中的稀疏性: 我用以下代码开发了这些数学函数: learning_rate

我使用Tensorflow开发了深度稀疏自动编码器成本函数,并从以下链接下载了自动编码器结构:

在简单自动编码器中,我有以下成本函数:

loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
我使用以下数学函数开发了自动编码器中的稀疏性:

我用以下代码开发了这些数学函数:

learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 10
lambda_ = 3e-3
beta = 3
Nv = batch_size

def KL_divergence(x1, y1):
return x1* tf.log(x1 / y1) + (1 - x1) * tf.log((1 - x1) / (1 - y1))

#Weights 
W1 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 
'encoder_' in var.name)
W2 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 
'decoder_' in var.name)
## Sparsity
rho_hat = (1+tf.reduce_mean(encoder(X),axis=0))/2
rho = np.tile(sparsity_param, n_output)

cost = tf.reduce_sum(tf.pow(y_true - y_pred, 2))/(2*Nv) + (lambda_/2)*(W1+W2) 
+ beta * tf.reduce_sum(KL_divergence(rho,rho_hat))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
我使用数学函数的论文名称: 基于深度学习隐藏特征提取的驾驶行为可视化


感谢大家

您好,我用以下python代码开发了Deep sparse AutoEncoder的最终版本:

它还可以使用:

from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

def next_batch(num, data, labels):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [data[ i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

# Parameters
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 10
lambda_ = 3e-3
beta = 3


# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])

# Network Parameters
n_input = 60 # number of input layers
n_hidden_1 = 30 # 1st layer num features
n_hidden_2 = 10 # 2nd layer num features
n_output = 3 # output layer num features
sparsity_param = 0.05

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 
    'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_output])),                                                         
    'decoder_h1': tf.Variable(tf.random_normal([n_output, n_hidden_2])),
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
    'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'encoder_b3': tf.Variable(tf.random_normal([n_output])),
    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
    'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b3': tf.Variable(tf.random_normal([n_input])),

}

# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                               biases['encoder_b1']))

    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                               biases['encoder_b2']))
    # Decoder Hidden layer with sigmoid activation #3
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['encoder_h3']),
                               biases['encoder_b3']))

return layer_3

# Building the decoder
def decoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                               biases['decoder_b1']))

    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                              biases['decoder_b2']))

    # Decoder Hidden layer with sigmoid activation #3
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['decoder_h3']),
                              biases['decoder_b3']))
return layer_3


def KL_divergence(x1, y1):
    return x1* tf.log(x1 / y1) + (1 - x1) * tf.log((1 - x1) / (1 - y1))

# Construct model
Nv = batch_size
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
#Weights 
W1 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 'encoder_' in var.name)
W2 = sum(tf.reduce_sum(tf.abs(var)**2) for var in tf.trainable_variables() if 'decoder_' in var.name)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X



## Sparsity
rho_hat = tf.reduce_mean(encoder(X),axis=0)
#rho_hat = (1+tf.reduce_mean(encoder(X),axis=0))/2
rho = np.tile(sparsity_param, n_output)

# Define loss and optimizer, minimize the squared error
size = tf.shape(tf.pow(y_true - y_pred, 2))

cost = tf.reduce_sum(tf.pow(y_true - y_pred, 2))/(2*Nv) + (lambda_/2)*(W1+W2) + beta * tf.reduce_sum(KL_divergence(rho,rho_hat))
#(lambda_/2)*(tf.reduce_sum(W1**2) + tf.reduce_sum(W1**2))


optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    total_batch = int(len(data)/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = next_batch(batch_size,data[:,0:60], data[:,60:] )
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1),
                  "cost=", "{:.9f}".format(c))


    print("Optimization Finished!")

    tr, label = next_batch(200000,data[:,0:60], data[:,60:])

    encode_decode = sess.run(
        encoder_op, feed_dict={X: tr})

以下是Tensorflow 2.1中实现的3层稀疏自动编码器的代码。 在本例中,输入和输出是1D阵列496。 我要感谢阿尔斯特大学的林志伟博士提供了github的初步实现

我将它包装在一个类中,其中每个层现在都是一个实例变量。这使得为每一层获得不同的输出变得更容易。 您会注意到,我只为稀疏约束使用了第一层输出。 此体系结构与本文中使用的体系结构类似:

我的实施很简单,培训也很简单,可以改进: 训练模型 model=my\u model然后在1000:model.network\u learnX,Y范围内为i循环

class my_model:
def __init__(self):
    xavier=tf.keras.initializers.GlorotUniform()
    self.l1 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid,input_shape=(496,))
    self.l2 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid)
    self.l3 = tf.keras.layers.Dense(496,kernel_initializer=xavier,activation=tf.nn.sigmoid)
    self.train_op = tf.keras.optimizers.SGD(learning_rate=0.01)
    self.rho = 0.05
    self.alpha= 0.001
    self.beta = 4

def kl_divergence(self, rho, rho_hat):
    return rho * tf.math.log(rho) - rho * tf.math.log(rho_hat) + (1 - rho) * tf.math.log(1 - rho) - (1 - rho) * tf.math.log(1 - rho_hat)

def run(self,X):
    out1=self.l1(X)
    out2=self.l2(out1)
    out3 = self.l3(out2)
    return out3
def get_loss(self,X,Y):
    rho_hat = tf.reduce_mean(self.l1(X),axis=0)
    kl = self.kl_divergence(self.rho,rho_hat)

    out1=self.l1(X)
    out2=self.l2(out1)
    X_prime=self.l3(out2)
    diff = X-X_prime

    W1 = self.l1.variables[0]
    W2 = self.l2.variables[0]
    W3 = self.l3.variables[0]
    cost= 0.5*tf.reduce_mean(tf.reduce_sum(diff**2,axis=1))  \
    +0.5*self.alpha*(tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3))   \
    +self.beta*tf.reduce_sum(kl)
    return cost

    return tf.math.square(boom2-Y)

def get_grad(self,X,Y):
    with tf.GradientTape() as tape:
        tape.watch(self.l1.variables)
        tape.watch(self.l2.variables)
        tape.watch(self.l3.variables)
        L = self.get_loss(X,Y)
        g = tape.gradient(L, [self.l1.variables[0],self.l1.variables[1],self.l2.variables[0],self.l2.variables[1],self.l3.variables[0],self.l3.variables[1]])
    return g

def network_learn(self,X,Y):
    g = self.get_grad(X,Y)
    self.train_op.apply_gradients(zip(g, [self.l1.variables[0],self.l1.variables[1],self.l2.variables[0],self.l2.variables[1],self.l3.variables[0],self.l3.variables[1]]))
下面是如何训练这样的网络