Tensorflow网络对于GPU内存太大_Tensorflow

Tensorflow网络对于GPU内存太大

tensorflow

Tensorflow网络对于GPU内存太大,tensorflow,Tensorflow,我有一个相当大的网络，我的GPU内存不足。这不是任何代码中的错误，网络本身太大，无法装入内存。我甚至尝试过GPU配置建议例如，我尝试了下面的gpu\u选项 gpu_options = tf.GPUOptions() config = tf.ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True # config.optimizer_options.opt_level = 2 # config.gra

我有一个相当大的网络，我的GPU内存不足。这不是任何代码中的错误，网络本身太大，无法装入内存。我甚至尝试过GPU配置建议

例如，我尝试了下面的

gpu\u选项

gpu_options = tf.GPUOptions()
config = tf.ConfigProto(gpu_options=gpu_options)
config.gpu_options.allow_growth = True
# config.optimizer_options.opt_level = 2
# config.graph_options.enable_recv_scheduling = True
# config.graph_options.build_cost_model = 1
config.gpu_options.per_process_gpu_memory_fraction = 0.1

但我的内存还是不够用。GitHub用户@girving告诉我Tensorflow不处理内存溢出（这对我来说毫无意义，他们为什么不实现这一点）

然而，他也声称存在变通办法。我找不到任何必须实施变通办法的人的支持。谁能给我指出正确的方向吗？我可以实现排队吗

以下是一些代码供参考。。。程序在

sess（init）

你能计算出你的参数和激活需要多少内存吗？如果你的参数太大而无法容纳，你就无能为力了。如果是激活，可能会有一些技巧来减少记忆usage@YaroslavBulatov我对LSTM中的权重数进行了计算（假设权重值为4B），结果约为5GB，因此可以肯定地说网络本身太大了。事实证明，我实际上不需要这么大的网络，所以我只想缩小规模。@Kendall Weihe如何缩小规模？

#Kendall Weihe
#This is a CNN that handles 3D data
#Adjust network parameters below, also adjust data directory

import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
from tensorflow.tensorflow.scroll import scroll_data

# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 1
display_step = 1

# Network Parameters
n_images = 100
n_input_x = 396 # Input image x-dimension
n_input_y = 396 # Input image y-dimension
n_input_z = 5
n_hidden = 128
n_classes = 2 # Binary classification -- on a surface or not
n_output = n_input_x * n_classes

dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y])
y = tf.placeholder(tf.float32, [None, n_input_z, n_input_x, n_input_y, n_classes], name="ground_truth")
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

def input_data():
    data = np.empty((n_images, n_input_x, n_input_y))
    temp = []
    for i in range(n_images):
        filename = "/home/volcart/Documents/Data/input_crops/cropped00" + str(i) + ".tif"
        im = Image.open(path)
        imarray = np.array(im)
        temp.append(imarray)

    for i in range(n_images):
        for j in range(n_input_x):
            for k in range(n_input_y):
                data[i][j][k] = temp[i][j][k]

    return data

# Create some wrappers for simplicity
def conv3d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool3d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, k, k, k, 1],
                          padding='SAME')

def deconv3d(prev_layer, w, b, output_shape, strides):
    # Deconv layer
    deconv = tf.nn.conv3d_transpose(prev_layer, w, output_shape=output_shape, strides=strides, padding="VALID")
    deconv = tf.nn.bias_add(deconv, b)
    deconv = tf.nn.relu(deconv)
    return deconv

# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, n_input_z, n_input_x, n_input_y, 1])

    with tf.name_scope("conv1") as scope:
    # Convolution Layer
        conv1 = conv3d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        #conv1 = tf.nn.local_response_normalization(conv1)
        conv1 = maxpool3d(conv1, k=2)

    # Convolution Layer
    with tf.name_scope("conv2") as scope:
        conv2 = conv3d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        # conv2 = tf.nn.local_response_normalization(conv2)
        conv2 = maxpool3d(conv2, k=2)

    # Convolution Layer
    with tf.name_scope("conv3") as scope:
        conv3 = conv3d(conv2, weights['wc3'], biases['bc3'])
        # Max Pooling (down-sampling)
        # conv3 = tf.nn.local_response_normalization(conv3)
        conv3 = maxpool3d(conv3, k=2)

    # pdb.set_trace()

    temp_batch_size = tf.shape(x)[0] #batch_size shape
    with tf.name_scope("deconv1") as scope:
        output_shape = [temp_batch_size, 2, n_input_x / 4, n_input_y / 4, 16]
        strides = [1,2,2,2,1]
        #conv4 = deconv3d(conv3, weights['wdc1'], biases['bdc1'], output_shape, strides)
        # conv4 = tf.nn.local_response_normalization(conv4)
        conv4 = tf.nn.conv3d_transpose(conv3, weights['wdc1'], output_shape=output_shape, strides=strides, padding="SAME")
        conv4 = tf.nn.bias_add(conv4, biases['bdc1'])
        conv4 = tf.nn.relu(conv4)

    with tf.name_scope("deconv2") as scope:
        output_shape = [temp_batch_size, 3, n_input_x / 2, n_input_y / 2, 8]
        strides = [1,1,2,2,1]
        conv5 = deconv3d(conv4, weights['wdc2'], biases['bdc2'], output_shape, strides)
        # conv5 = tf.nn.local_response_normalization(conv5)

    with tf.name_scope("deconv3") as scope:
        output_shape = [temp_batch_size, n_input_z, n_input_x, n_input_y, 1]
        #this time don't use ReLu -- since output layer
        conv6 = tf.nn.conv3d_transpose(conv5, weights['wdc3'], output_shape=output_shape, strides=[1,1,2,2,1], padding="VALID")
        conv6 = tf.nn.bias_add(conv6, biases['bdc3'])
        conv6 = tf.nn.dropout(conv6, dropout)
        # conv6 = tf.nn.relu(conv6)


    # pdb.set_trace()

    x = tf.reshape(conv6, [-1, n_input_x])
    x = tf.split(0, n_input_y * n_input_z, x)

    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True, activation=tf.nn.relu)
    # lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * n_hidden, state_is_tuple=True)
    lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.75)
    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)


    output = []
    for i in xrange(n_input_y * n_input_z):
        output.append(tf.matmul(outputs[i], lstm_weights[i]) + lstm_biases[i])

    return output

weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1' : tf.Variable(tf.random_normal([2, 2, 2, 1, 8])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc3' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),

    'wdc1' : tf.Variable(tf.random_normal([2, 2, 2, 16, 32])),

    'wdc2' : tf.Variable(tf.random_normal([2, 2, 2, 8, 16])),

    'wdc3' : tf.Variable(tf.random_normal([3, 2, 2, 1, 8])),
}

biases = {
    'bc1': tf.Variable(tf.random_normal([8])),
    'bc2': tf.Variable(tf.random_normal([16])),
    'bc3': tf.Variable(tf.random_normal([32])),
    'bdc1': tf.Variable(tf.random_normal([16])),
    'bdc2': tf.Variable(tf.random_normal([8])),
    'bdc3': tf.Variable(tf.random_normal([1])),
}

lstm_weights = {}
lstm_biases = {}

for i in xrange(n_input_y * n_input_z):
    lstm_weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
    lstm_biases[i] = tf.Variable(tf.random_normal([n_output]))

# Construct model
with tf.name_scope("net") as scope:
    print "Building network..."
    pred = conv_net(x, weights, biases, keep_prob)
    print "Network built!"

    # pdb.set_trace()
    pred = tf.transpose(tf.pack(pred),[1,0,2])
    pred = tf.reshape(pred, [-1, n_input_z, n_input_x, n_input_y, n_classes])
    # Reshape for cost function
    temp_pred = tf.reshape(pred, [-1, n_classes])
    temp_y = tf.reshape(y, [-1, n_classes])

with tf.name_scope("loss") as scope:
    # cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
    cost = (tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))

with tf.name_scope("opt") as scope:
    print "Initializing optimizer..."
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    print "optimizer initialized!"

# pdb.set_trace()

# Evaluate model
with tf.name_scope("acc") as scope:
    # accuracy is the difference between prediction and ground truth matrices
    correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.name_scope("prediction-node") as scope:
    prediction_node = tf.nn.sigmoid(temp_pred)

# Initializing the variables
with tf.name_scope("initialize-and-config") as scope:
    print "Initializing variables & configuring..."
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()
    gpu_options = tf.GPUOptions()
    config = tf.ConfigProto(gpu_options=gpu_options)
    config.gpu_options.allow_growth = True
    # config.optimizer_options.opt_level = 2
    # config.graph_options.enable_recv_scheduling = True
    # config.graph_options.build_cost_model = 1
    config.gpu_options.per_process_gpu_memory_fraction = 0.1
    print "Variables and configurations initialized!"

# Launch the graph
with tf.Session(config=config) as sess:
    print "Initializing session..."
    sess.run(init)
    print "Session initialized!"

    print "Restoring session..."
    saver.restore(sess, "/home/volcart/Documents/3D-CNN-2D-LSTM-reg-model/model.ckpt")
    print "Session restored!"

    tf.get_default_graph().finalize()
    # Import data
    print "Importing data..."
    data = input_data()
    print "Data imported!"

    # Keep training until reach max iterations
    for i in range(n_images):

        print "Prediction image number -- " + str(i)

        temp = []
        for j in range(n_input_z):
            temp.append(data[j,:,:])

        temp = np.asarray(temp)
        temp = temp.reshape((1, n_input_z, n_input_x, n_input_y))
        prediction = sess.run(prediction_node, feed_dict={x: temp, keep_prob: 1.0})

        prediction = prediction.reshape((n_input_x, n_input_y, n_classes))

        temp_arr1 = np.empty((n_input_x, n_input_y))
        for i in xrange(n_input_x):
            for j in xrange(n_input_y):
                if l == 0:
                    temp_arr1[i][j] = prediction[i][j][0]

        csv_file = "/home/volcart/Documents/3D-CNN-2D-LSTM-pred/3D-CNN-2D-LSTM-step-" + str(i) + ".csv"
        np.savetxt(csv_file, temp_arr1, delimiter=",")