Python TensorFlow在看似中等大小的张量上内存不足_Python_Tensorflow_Memory Management_Batch Normalization

Python TensorFlow在看似中等大小的张量上内存不足

python tensorflow memory-management

Python TensorFlow在看似中等大小的张量上内存不足,python,tensorflow,memory-management,batch-normalization,Python,Tensorflow,Memory Management,Batch Normalization,我试图使用TensorFlow在python中编写一个GAN，但是，在GPU上运行时，我遇到了内存不足的问题，尽管我的代码中似乎没有任何东西需要特别大的内存。正在使用所有默认参数运行程序。它应该读取少量图像，通过鉴别器网络运行它们，使用生成器网络基于噪声生成“图像”，通过同一鉴别器运行结果，并训练两个网络我的python代码，testtf.py： import tensorflow as tf import numpy as np import PIL, os, argparse, sys,

我试图使用TensorFlow在python中编写一个GAN，但是，在GPU上运行时，我遇到了内存不足的问题，尽管我的代码中似乎没有任何东西需要特别大的内存。正在使用所有默认参数运行程序。它应该读取少量图像，通过鉴别器网络运行它们，使用生成器网络基于噪声生成“图像”，通过同一鉴别器运行结果，并训练两个网络

我的python代码，

testtf.py

：

import tensorflow as tf
import numpy as np
import PIL, os, argparse, sys, random

lrelu = lambda x: tf.maximum(x,tf.multiply(x,.02))
bce = lambda x,z: -(x*tf.log(z+1e-12) + (1.-x)*tf.log(1.-z+1e-12))

parser = argparse.ArgumentParser()
parser.add_argument('-src',default=os.path.dirname(os.path.abspath(__file__))) #PAth of training data
parser.add_argument('-size',default='128x128') #Size of images
parser.add_argument('-mode',default='RGB') #Color space
parser.add_argument('-depf',type=int,default=16) #Starting filters
parser.add_argument('-depi',type=int,default=16) #Amount to increase filters
parser.add_argument('-batch',type=int,default=10) #Batch size
parser.add_argument('-rate',type=float,default=12e-5) #Learning rate
parser.add_argument('-epoch',type=int,default=1000) #Epochs to train
args = parser.parse_args(sys.argv[1:])
args.src = args.src.replace('\\','/')
if not args.src.endswith('/'): args.src += '/'
if args.size.count('x')!=1: args.size = '64x64'
args.mode = args.mode.upper()

tr_data = [file for file in os.listdir(args.src) if (file.lower().endswith('.jpg') or file.lower().endswith('.png'))] #List of training data files

ind = args.size.index('x',0)
i_width = int(args.size[:ind])
i_height = int(args.size[ind+1:])
i_chan = 3
if args.mode=='RGBA' or args.mode=='HSVA': i_chan=4
elif args.mode=='BW' or args.mode=='G': i_chan=1 #Get image dimensions and channels

fil_fin = args.depf #Additional parameters
wid_fin,hi_fin = i_width, i_height
while min(wid_fin,hi_fin)>=8:
    wid_fin/=2
    hi_fin/=2
    fil_fin+=args.depi

def rgb2hsv(r,g,b): #Helper function to convert rgb to hsv
    hsv = [0,0,0]
    r/=255
    g/=255
    b/=255
    low = min(r,g,b)
    hi = max(r,g,b)
    hsv[2]=hi
    if  hi>0: hsv[1]=(hi-lo)/hi
    if hi==r: hsv[0] = (g-b)/(hi-lo)
    elif hi==g: hsv[0] = (b-r)/(hi-lo)+2
    else: hsv[0] = (r-g)/(hi-lo)+4
    while hsv[0]<0: hsv[0]+=6
    hsv[0] *= 255/6
    return hsv

def load_img(path): #Loads image as data
    global i_height, i_width, args
    im = PIL.Image.open(args.src+path).resize((i_width,i_height)).convert(mode='RGBA')
    pix = im.load()
    ret = []
    for y in range(i_height):
        row = []
        for x in range(i_width):
            data = list(pix[x,y])
            if args.mode=='RGB': data = data[:3]
            elif args.mode=='HSV': data = rgb2hsv(*data[:3])
            elif args.mode=='HSVA':
                data = rgb2hsv(*data[:3])
                data.append(data[3])
            elif args.mode=='BW': data.append(1 if max(data[:3])>(255/2) else 0)
            elif args.mode=='G': data.append(max(data[:3]))
            row.append([k/255. for k in data])
        ret.append(row)
    return ret

tr_img_data = [load_img(path) for path in tr_data]

#===========================TENSORFLOW CODE STARTS HERE=======================

def disc(inp, reuse=None): #Discriminator for GAN
    global args, i_width, i_height, i_chan
    cw = i_width
    ch = i_height
    f = args.depf
    with tf.variable_scope('disc',reuse=reuse):
        x = tf.reshape(inp,shape=[-1,i_width,i_height,i_chan])
        while min(cw,ch)>=8:
            cw/=2
            ch/=2
            x = tf.layers.conv2d(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Convolve tensor to smaller layer of more filters
            f+=args.depi
        f-=args.depi
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x,units=int(cw*ch*f),activation=lrelu)
        x = tf.layers.dense(x,units=int(cw*ch*f),activation=tf.nn.sigmoid)
        return x

def gen(inp, trng=True): #Generator for GAN
    global args, i_width, i_height, i_chan
    cw,ch = [i_width, i_height]
    f = args.depf
    while min(cw,ch)>=8: #Get some useful information
        cw/=2
        ch/=2
        f += args.depi
    with tf.variable_scope('gen',reuse=None):
        x = tf.layers.dense(inp,units=cw*ch*f,activation=lrelu)
        x = tf.contrib.layers.batch_norm(x,decay=.99, is_training=trng)
        x = tf.reshape(x,shape=[-1,int(cw),int(ch),int(f)])
        while f>args.depf:
            cw*=2
            ch*=2
            f -= args.depi
            x = tf.layers.conv2d_transpose(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Transpose convolve to larger layer of fewer filters
            x = tf.contrib.layers.batch_norm(x,decay=.99,is_training=trng)
        x = tf.layers.conv2d_transpose(x,filters=i_chan,kernel_size=8,strides=1,padding='same',activation=tf.nn.sigmoid)
        return x

img_in = tf.placeholder(tf.float32, shape=[None,i_width,i_height,i_chan]) #Placeholder for image dat
noise_in = tf.placeholder(tf.float32, shape=[None,int(wid_fin*hi_fin*fil_fin)]) #Placeholder for noise

d_real = disc(img_in) #Results of discriminator on training data
g = gen(noise_in) #Generated data
d_fake = disc(g,reuse=True) #Results of discriminator on generated data

d_real_loss = bce(np.ones_like(d_real),d_real) #Losses to minimize
d_fake_loss = bce(np.zeros_like(d_fake),d_fake)
g_loss = tf.reduce_mean(bce(np.ones_like(d_fake),d_fake))
d_loss = tf.reduce_mean(.5*(d_real_loss+d_fake_loss))

d_vars = [var for var in tf.trainable_variables() if var.name.startswith("disc")]
g_vars = [var for var in tf.trainable_variables() if var.name.startswith("gen")]

d_reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-6),d_vars)
g_reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-6),g_vars)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    opt_g = tf.train.RMSPropOptimizer(args.rate).minimize(g_reg+g_loss,var_list=g_vars)
    opt_d = tf.train.RMSPropOptimizer(args.rate).minimize(d_reg+d_loss,var_list=d_vars)
    pass

sess = tf.Session()
sess.run(tf.global_variables_initializer(),options=tf.RunOptions(report_tensor_allocations_upon_oom=True))
losss=0
def step():
    global args, losss
    train_d,train_g = True,True

    feed_imgs = [random.choice(tr_img_data) for i in range(args.batch)] #Get image data to use
    feed_noise = np.random.uniform(0.,1.,[args.batch,int(wid_fin*hi_fin*fil_fin)]) #Generate noise

    loss_dreal, loss_dfake, loss_g, loss_d = sess.run([d_real_loss, d_fake_loss, g_loss, d_loss], feed_dict={img_in:feed_imgs, noise_in:feed_noise}) #Run TF

    if loss_g*1.5<loss_d: train_g = False
    if loss_d*2<loss_g: train_d=False
    losss = [loss_g, loss_d]
    if train_g: #Train
        sess.run(opt_g, feed_dict={img_in:feed_imgs, noise_in:feed_noise})
        pass
    if train_d:
        sess.run(opt_d, feed_dict={img_in:feed_imgs, noise_in:feed_noise})
        pass

def train(ep):
    global losss
    for i in range(ep):
        if ep-i<30: print(losss)
        step()

train(args.epoch)

是否有额外的内存使用隐藏在某处，不好的做法导致太多的内存被使用，或者这只是一个~154MB GPU的内存太多？

我猜他指的是GB@JHBonarius@modesitt“限制：161529856”即154MB；）也许是256MB的卡，Nvidia GeForce 7600左右。好家伙@杰博纳里斯。

2018-07-16 14:28:43.636085: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1392] Found device 0 with properties: 
name: GeForce GTX 750 Ti major: 5 minor: 0 memoryClockRate(GHz): 1.15
pciBusID: 0000:01:00.0
totalMemory: 2.00GiB freeMemory: 428.24MiB
2018-07-16 14:28:43.642553: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1471] Adding visible gpu devices: 0
2018-07-16 14:28:44.811132: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-07-16 14:28:44.811711: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:958]      0 
(Many similar lines omitted to stay under character limit)
2018-07-16 14:28:58.283837: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:674] 1 Chunks of size 13892608 totalling 13.25MiB
2018-07-16 14:28:58.284323: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:678] Sum Total of in-use chunks: 137.94MiB
2018-07-16 14:28:58.284793: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:680] Stats: 
Limit:                   161529856
InUse:                   144637952
MaxInUse:                161529856
NumAllocs:                     289
MaxAllocSize:             14613504

2018-07-16 14:28:58.285723: W T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:279] *********************************************************************************_******xxx_________
2018-07-16 14:28:58.286415: W T:\src\github\tensorflow\tensorflow\core\framework\op_kernel.cc:1318] OP_REQUIRES failed at conv_grad_input_ops.cc:676 : Resource exhausted: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
Traceback (most recent call last):
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
    return fn(*args)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

     [[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "testtf.py", line 168, in <module>
    train(args.epoch)
  File "testtf.py", line 166, in train
    step()
  File "testtf.py", line 150, in step
    loss_dreal, loss_dfake, loss_g, loss_d = sess.run([d_real_loss, d_fake_loss, g_loss, d_loss], feed_dict={img_in:feed_imgs, noise_in:feed_noise}) #Run TF
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
    run_metadata_ptr)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
    run_metadata)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

     [[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'gen/conv2d_transpose_4/conv2d_transpose', defined at:
  File "testtf.py", line 120, in <module>
    g = gen(noise_in) #Generated data
  File "testtf.py", line 111, in gen
    x = tf.layers.conv2d_transpose(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Transpose convolve to larger layer of fewer filters
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\convolutional.py", line 1272, in conv2d_transpose
    return layer.apply(inputs)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 774, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\layers\convolutional.py", line 777, in call
    data_format=conv_utils.convert_data_format(self.data_format, ndim=4))
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1254, in conv2d_transpose
    name=name)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1340, in conv2d_backprop_input
    dilations=dilations, name=name)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
    op_def=op_def)
  File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

     [[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.