Python tensorflow中的内存问题
我试图用张量流建立一个高斯RBM模型。但是程序会占用太多内存 高斯分布Python tensorflow中的内存问题,python,tensorflow,Python,Tensorflow,我试图用张量流建立一个高斯RBM模型。但是程序会占用太多内存 高斯分布 import tensorflow as tf import math import input_data import numpy as np def sample_prob(probs): return tf.nn.relu( tf.sign( probs - tf.random_uniform(tf.shape(probs)))) class RBM(object):
import tensorflow as tf
import math
import input_data
import numpy as np
def sample_prob(probs):
return tf.nn.relu(
tf.sign(
probs - tf.random_uniform(tf.shape(probs))))
class RBM(object):
""" represents a sigmoidal rbm """
def __init__(self, name, input_size, output_size, gaussian_std_val=0.1):
with tf.name_scope("rbm_" + name):
self.weights = tf.Variable(
tf.truncated_normal([input_size, output_size],
stddev=1.0 / math.sqrt(float(input_size))), name="weights")
self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
self.input = tf.placeholder("float", shape=[None, 784])
#Gaussian
def_a = 1/(np.sqrt(2)*gaussian_std_val)
def_a = tf.constant(def_a, dtype=tf.float32)
self.a = tf.Variable( tf.ones(shape=[input_size]) * def_a,
name="a")
def propup(self, visible):
""" P(h|v) """
return tf.nn.sigmoid(tf.matmul(visible, self.weights) + self.h_bias)
def propdown(self, hidden):
""" P(v|h) """
# return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias)
return ( tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias ) / (2 * (self.a * self.a))
def sample_h_given_v(self, v_sample):
""" Generate a sample from the hidden layer """
return sample_prob(self.propup(v_sample))
def sample_v_given_h(self, h_sample):
""" Generate a sample from the visible layer """
return self.sample_gaussian(self.propdown(h_sample))
def gibbs_hvh(self, h0_sample):
""" A gibbs step starting from the hidden layer """
v_sample = self.sample_v_given_h(h0_sample)
h_sample = self.sample_h_given_v(v_sample)
return [v_sample, h_sample]
def gibbs_vhv(self, v0_sample):
""" A gibbs step starting from the visible layer """
h_sample = self.sample_h_given_v(v0_sample)
v_sample = self.sample_v_given_h(h_sample)
return [h_sample, v_sample]
def sample_gaussian(self, mean_field):
return tf.random_normal(shape=tf.shape(mean_field),
mean=mean_field,
stddev=1.0 / (np.sqrt(2) * self.a))
def cd1(self, learning_rate=0.1):
" One step of contrastive divergence, with Rao-Blackwellization "
h_start = self.sample_h_given_v(self.input)
v_end = self.sample_v_given_h(h_start)
h_end = self.sample_h_given_v(v_end)
w_positive_grad = tf.matmul(tf.transpose(self.input), h_start)
w_negative_grad = tf.matmul(tf.transpose(v_end), h_end)
update_w = self.weights + (learning_rate * (w_positive_grad - w_negative_grad) / tf.to_float(tf.shape(self.input)[0]))
update_vb = self.v_bias + (learning_rate * tf.reduce_mean(self.input - v_end, 0))
update_hb = self.h_bias + (learning_rate * tf.reduce_mean(h_start - h_end, 0))
return [update_w, update_vb, update_hb]
def cal_err(self):
err = self.input - self.gibbs_vhv(self.input)[1]
return tf.reduce_mean(err * err)
test_mnist.py
import tensorflow as tf
import input_data
from gaussian_RBM import RBM
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
rbm_modle = RBM(name="gaussian_rbm", input_size=784, output_size=1000)
sess = tf.Session()
init_op = tf.initialize_all_variables()
sess.run(init_op)
for i in range(100):
print "step: %s"%i
for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
rbm_modle.weights, rbm_modle.v_bias, rbm_modle.h_bias = \
sess.run(rbm_modle.cd1(), feed_dict={rbm_modle.input : trX[start : end]})
if start % 1280 == 0:
print sess.run(rbm_modle.cal_err(), feed_dict={rbm_modle.input : teX})
输出是
运行test_mnist.py提取mnist_数据/train-images-idx3-ubyte.gz
提取MNIST_数据/train-labels-idx1-ubyte.gz提取
MNIST_数据/t10k-images-idx3-ubyte.gz提取
MNIST_数据/t10k-labels-idx1-ubyte.gz I
tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:900]成功
从SysFS读取的NUMA节点具有负值(-1),但必须存在
至少一个NUMA节点,因此返回NUMA节点零I
tensorflow/core/common_runtime/gpu/gpu_init.cc:102]找到设备0
属性:名称:GeForce GTX 560主要:2次要:1
memoryClockRate(GHz)1.62 pciBusID 0000:01:00.0总内存:
1018.69MiB可用内存:916.73MiB I tensorflow/core/common_runtime/gpu/gpu_init.cc:126]DMA:0 I
tensorflow/core/common_runtime/gpu/gpu_init.cc:136]0:yi
tensorflow/core/common_runtime/gpu/gpu_device.cc:684]忽略gpu
设备(设备:0,名称:GeForce GTX 560,pci总线id:0000:01:00.0)
具有Cuda计算能力2.1。所需的最小Cuda能力
是3.5。步骤:0
0.0911714
0.0781856
0.0773076
0.0770751
0.0776582
0.0764748
0.0755164
0.0741131
0.0726497
0.0712237
0.0701839
0.0686315
0.0664856
0.0658309
0.0646239
0.0626652
0.0616178
0.0610061
0.0598332
0.0588843
0.0587477
0.0572056
0.0561556
0.0554848死亡
有没有办法监控内存?
有人能帮我吗?你可以用命令监视GPU内存 看起来您的GPU不支持运行tensorflow所需的CUDA的更高版本。你可以查一下 从您的输出来看,tensorflow足够聪明,不使用GPU,因此您的型号/批次大小对于RAM来说太大,或者内存泄漏 尝试运行log\u device\u placement=True运行会话,以查看tensorflow在一步一步地执行什么操作,同时运行“top”以监视内存
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
答案似乎是正确的,(计算能力不足,无法运行最新版本的CUDA/Tensorflow
然而,最低要求似乎是“计算能力=3.0”,因为我的GTX_770M能够运行Tensorflow 1.0/CUDA 8.0(见下文)
和/或尝试从源重新编译tensorflow,并在生成过程中包括2.0目标(默认情况下建议为3.5-5.5)
祝你今天愉快
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 375.51 Driver Version: 375.51 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 770M Off | 0000:01:00.0 N/A | N/A |
|100% 48C P0 N/A / N/A | 2819MiB / 3017MiB | N/A Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 Not Supported |
+-----------------------------------------------------------------------------+
训练循环可能有问题,导致计算机内存不足 对于循环的每个迭代,您都调用:
sess.run(rbm_modle.cd1(), feed_dict={rbm_modle.input : trX[start : end]})
在这个rbm_modle.cd1()
函数中,您正在创建几个新的操作,例如tf.matmul()
,因此每次调用rbm_modle.cd1()
时,您都将创建新的操作,这将导致每次迭代后使用的内存增加
您应该在循环之前定义所有操作,然后在运行操作期间使用
sess.run()
,而不创建新操作。在使用
tf.get_default_graph().finalize()
每次尝试添加新节点时,TensorFlow都会引发异常。谢谢您的帮助。我正在尝试仅使用cpu TensorFlow运行此代码。但我的程序仍将在中途终止。