TensorFlow:训练期间的余弦差ObjFunc常数

TensorFlow:训练期间的余弦差ObjFunc常数,tensorflow,deep-learning,cosine-similarity,Tensorflow,Deep Learning,Cosine Similarity,下面的示例是我正在研究的内容的简化版本。我试图找到一个使余弦距离最小的神经网络。我之所以实现自己的余弦差损失函数,而不是使用tensorflow的内置方法,是因为在我的项目的完整版本中,它并不完全满足我的要求(尽管在这个简单版本中,它们是等效的) 我将两个正交向量(A和B)输入网络。我试图减少(A和B)之间的余弦距离。它通过最小化损失函数(其中还包括一个在变换时保留向量B长度的分量)来实现。最终,我的输出应该是一个向量,其方向与向量A的方向相同,向量B的长度相同 我遇到的问题是,网络输出“vec

下面的示例是我正在研究的内容的简化版本。我试图找到一个使余弦距离最小的神经网络。我之所以实现自己的余弦差损失函数,而不是使用tensorflow的内置方法,是因为在我的项目的完整版本中,它并不完全满足我的要求(尽管在这个简单版本中,它们是等效的)

我将两个正交向量(A和B)输入网络。我试图减少(A和B)之间的余弦距离。它通过最小化损失函数(其中还包括一个在变换时保留向量B长度的分量)来实现。最终,我的输出应该是一个向量,其方向与向量A的方向相同,向量B的长度相同

我遇到的问题是,网络输出“vector_B_transformed”,而这个向量永远不会改变。在整个训练过程中,我建立的损失函数也是不变的。我尝试过以不同的方式初始化权重,但没有帮助。我从未在完全连接的网络的最后一层上使用过relu功能,并且在隐藏层上尝试过relu激活功能——但这似乎没有什么区别

我粗略地将结果添加到一个列表中,并将其打印到终端。历元数减少到200,但增加时会出现同样的问题

如果有人能帮助我,我将不胜感激,因为我真的被卡住了

import math
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops

# from utils import *

##### New Helper Functions

# weight and bais wrappers
def weight_variable(name, shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W_' + name,
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(name, shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b_' + name,
                           dtype=tf.float32,
                           initializer=initial)


def fc_layer(x, num_units, name, use_relu=True):
    """
    Create a fully-connected layer
    :param x: input from previous layer
    :param num_units: number of hidden units in the fully-connected layer
    :param name: layer name
    :param use_relu: boolean to add ReLU non-linearity (or not)
    :return: The output array
    """
    in_dim = x.get_shape()[1]
    W = weight_variable(name, shape=[in_dim, num_units])
    b = bias_variable(name, [num_units])
    layer = tf.matmul(x, W)
    layer += b
    if use_relu:
        layer = tf.nn.relu(layer)
    return layer

好的,我发现了问题,最后是一个简单的错误:

我的乐观主义者不在我的更新中

            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

我的代码仍然没有像预期的那样工作,但它至少在尝试优化某些东西,以便取得进展

从这篇文章开始,我尝试了几件事:*我深入挖掘了一下,发现没有任何权重或偏差在更新。*我尝试了tanh而不是relu*而不是总是输入一个向量,我尝试了在半径为1的圆中初始化10000个向量,并在每个历元上随机选择一个。这可以说是一项比学习如何翻译一个输入向量更困难的任务。但权重和偏差保持不变。
from __future__ import division
import tensorflow as tf

# generator network without residual block
def generator(vector, reuse=False, name="generator"):

    with tf.variable_scope(name):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        output_dimension = vector.shape[1]

        e1 = fc_layer(vector, 2, name='g_e1', use_relu=False)
        e2 = fc_layer(e1, 4, name='g_e2', use_relu=False)    
        e3 = fc_layer(e2, 8, name='g_e3', use_relu=False)
        e4 = fc_layer(e3, 16, name='g_e4', use_relu=False)
        e5 = fc_layer(e4, 16, name='g_e5', use_relu=False)
        e6 = fc_layer(e5, 8, name='g_e6', use_relu=False)
        e7 = fc_layer(e6, 4, name='g_e7', use_relu=False)
        e8 = fc_layer(e7, output_dimension, name='g_e8', use_relu=False)


        return e8
from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from collections import namedtuple
from sklearn.model_selection import train_test_split

# from module import *
# from utils import *

class cosine_diff_test(object):
    def __init__(self, sess, args):
        # initialise tensorflow session
        self.sess = sess

        # data, test, train splits
        self.data_A = args.vA
        self.data_B = args.vB

        self.generator = generator

        # when an instance of class cycleGAN is created, build model is automatically called
        self._build_model()


    def _build_model(self):

        #### INPUTS TO NETWORKS
        # placeholder for vectors
        self.vector_A = tf.placeholder(tf.float32,
                                     [None, 2],
                                     name='vector_A')
        self.vector_B = tf.placeholder(tf.float32,
                                    [None, 2],
                                    name='vector_B')

        # FCNN to determine vector move required
        self.vector_B_ = self.generator(self.vector_B, False, name="generatorB")

        # minimise cos_dist between A and B while keeping A same

        self.loss = cosine_distance_simple(self.vector_A, self.vector_B_) \
                        + maintain_length(self.vector_B, self.vector_B_)
        '''

        self.loss = abs_criterion(self.vector_A, self.vector_A_) \
                + abs_criterion(self.vector_B, self.vector_B_)
        '''

        # trainable variables
        t_vars = tf.trainable_variables()

        # training variables for generator
        self.g_vars = [var for var in t_vars if 'generator' in var.name]




    def train(self, args):
        # placeholder for learning rate
        self.lr = tf.placeholder(tf.float32, None, name='learning_rate')

        # define optimizer
        self.optim = tf.train.AdamOptimizer(self.lr, beta1=args.beta1).minimize(self.loss, var_list=self.g_vars)



        # initialise global varibles and run session
        init_op = tf.global_variables_initializer()

        self.sess.run(init_op)

        lr = args.lr

        # Import Data
        vecA = self.data_A.copy()
        vecB = self.data_B.copy()

        results_loss = []
        results_vector_B_transformed = []

        # iterate over the number of epochs definied
        for epoch in range(args.epoch):

            # Update 
            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

            results_loss.append(_)
            results_vector_B_transformed.append(vector_B_transformed)

        print(results_loss)
        print(results_vector_B_transformed)



        origin = args.orig
        print('plotting ...')
        plt.xlim((-0.5,1.5));
        plt.ylim((-0.5,2.5));
        plt.quiver(*origin, vecA, vector_B_transformed, 
                   color=['r','b'],angles='xy', scale_units='xy', scale=1);


class Args():
    A_vec = np.array([1, 0]).reshape(1,-1)
    B_vec = np.array([0, 2]).reshape(1,-1)
    ori = np.array([0, 0]).reshape(1,-1)


    epoch = 200
    lr = 0.0002
    vA = A_vec
    vB = B_vec
    beta1 = 0.5
    orig = ori

args = Args()
# TRAIN
tf.reset_default_graph()

tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
with tf.Session(config=tfconfig) as sess:
    model = cosine_diff_test(sess, args)
    model.train(args) 
            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})
            # Update 
            vector_B_transformed, _, loss = self.sess.run(
                [self.vector_B_, self.optim, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})