Python 基于tensorflow的批量矩阵分解中的内存泄漏

Python 基于tensorflow的批量矩阵分解中的内存泄漏,python,memory-leaks,tensorflow,batch-updates,matrix-factorization,Python,Memory Leaks,Tensorflow,Batch Updates,Matrix Factorization,假设我有一个速率矩阵R,我想用tensorflow把它分解成矩阵U和V 如果没有批量大小,这是一个简单的问题,可以通过以下代码解决: # define Variables u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u') v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v') #

假设我有一个速率矩阵
R
,我想用tensorflow把它分解成矩阵
U
V

如果没有批量大小,这是一个简单的问题,可以通过以下代码解决:

# define Variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(u, tf.float32), tf.cast(v, tf.float32))

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, R))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    init = tf.initialize_all_variables()
    sess.run(init)
    for i in range(no_epochs):
        _, this_cost = sess.run([train_step, cost])
        print 'cost: ', this_cost
我决定用批量更新来解决这个问题,我的解决方案是发送
U
V
的索引,我想用它们来预测速率矩阵
R
,并只更新那些选定的索引 这是我的代码(如果需要花费很多时间,请阅读注释):

但是我在
u=tf.assign(u,tmp\u)
u=tf.assign(u,tmp\u)
我申请了,但一无所获。
还有另一种解决方案,只对
U
V
的子集应用更新,但遇到了许多其他错误,因此请继续讨论如何解决我的内存泄漏问题

对不起,我的问题太长了,谢谢您阅读。

我只是通过将
U
V
的更新值作为占位符发送来解决这个问题,然后将
U
V
分配给这些传递的参数,这样创建的图形在不同的迭代中保持不变。 代码如下:

# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')

#define new place holder for changed values of U and V
last_u = tf.placeholder(tf.float32, shape=[R_dim_1, output_dim], name='last_u')
last_v = tf.placeholder(tf.float32, shape=[output_dim, R_dim_2], name='last_v')

#set U and V to updated ones
change_u = tf.assign(u, last_u)
change_v = tf.assign(v, last_v)

# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))

# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    tmp_u = initial_u;
    tmp_v = initial_v;

    # initialize variables  
    init_new_vars_op = tf.initialize_variables([v, u])
    sess.run(init_new_vars_op, feed_dict={last_u: tmp_u, last_v: tmp_v})

    init = tf.initialize_all_variables()
    rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
    rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
    sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})

    for i in range(no_epochs):
        with tf.Graph().as_default():
            rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
            rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
            _, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v, _, _ =
                sess.run([train_step, cost, u, v, cur_u, cur_v, change_u, change_v],
                feed_dict={idx1: rand_idx1, idx2: rand_idx2, last_u: tmp_u, last_v: tmp_v})
        print this_cost

            # find new values of  U and current V but don't assign to them
            tmp_u = np.array(tmp_u)
            tmp_u[rand_idx] = tmp_cur_u

            tmp_v = np.array(tmp_v)
            tmp_v[:, rand_idx2] = tmp_cur_v

在每次循环迭代中都会创建一个新的assign op,类似于中所发生的情况,我知道这是我的问题,但我不能像您在运行间调用时修改图形所说的那样“在开始时构建图形,在训练循环中只执行它”,这会增加内存使用,而且速度相当慢。每次修改图形时,它都必须对整个图形进行编码并将其复制,因此类似于i…sess.run(a.assign\u add)的内容具有二次复杂性。为了解决这个问题,我编写了一个名为“命令式”的包装器——您能告诉我,我的代码文档太少了,我应该怎么做吗
# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')

#define new place holder for changed values of U and V
last_u = tf.placeholder(tf.float32, shape=[R_dim_1, output_dim], name='last_u')
last_v = tf.placeholder(tf.float32, shape=[output_dim, R_dim_2], name='last_v')

#set U and V to updated ones
change_u = tf.assign(u, last_u)
change_v = tf.assign(v, last_v)

# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))

# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    tmp_u = initial_u;
    tmp_v = initial_v;

    # initialize variables  
    init_new_vars_op = tf.initialize_variables([v, u])
    sess.run(init_new_vars_op, feed_dict={last_u: tmp_u, last_v: tmp_v})

    init = tf.initialize_all_variables()
    rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
    rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
    sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})

    for i in range(no_epochs):
        with tf.Graph().as_default():
            rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
            rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
            _, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v, _, _ =
                sess.run([train_step, cost, u, v, cur_u, cur_v, change_u, change_v],
                feed_dict={idx1: rand_idx1, idx2: rand_idx2, last_u: tmp_u, last_v: tmp_v})
        print this_cost

            # find new values of  U and current V but don't assign to them
            tmp_u = np.array(tmp_u)
            tmp_u[rand_idx] = tmp_cur_u

            tmp_v = np.array(tmp_v)
            tmp_v[:, rand_idx2] = tmp_cur_v