Python TensorFlow 2.0梯度带非类型错误_Python_Tensorflow_Neural Network

Python TensorFlow 2.0梯度带非类型错误

python tensorflow neural-network

Python TensorFlow 2.0梯度带非类型错误,python,tensorflow,neural-network,Python,Tensorflow,Neural Network,我正在尝试使用TensorFlow 2.0进行MNIST分类我的神经网络结构如下所示： # Load and prepare the MNIST dataset- mnist = tf.keras.datasets.mnist # type(mnist) # module (X_train, y_train), (X_test, y_test) = mnist.load_data() # type(X_train), type(y_train), type(X_test), type(y

我正在尝试使用TensorFlow 2.0进行MNIST分类

我的神经网络结构如下所示：

# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist

# type(mnist)
# module

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)


# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0

X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)

print("\nShapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}\n".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)


# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))

print("\nDimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}\n".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)


def relu(x):
    '''
    Function to calculate ReLU for
    given 'x'
    '''
    # return np.maximum(x, 0)
    return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)


def relu_derivative(x):
    '''
    Function to calculate derivative
    of ReLU
    '''
    # return np.where(x <= 0, 0, 1)
    # return tf.where(x <=0, 0, 1)
    return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)


def softmax_stable(z):
    '''
    Function to compute softmax activation function.
    Numerically stable
    '''
    # First cast 'z' to floating type-
    z = tf.cast(z, dtype = tf.float32)

    # Get largest element in 'z'-
    largest = tf.math.reduce_max(z)

    # Raise each value to exp('z - largest')-
    z_exp = tf.math.exp(z - largest)

    # Compute softmax activation values-
    s = z_exp / tf.math.reduce_sum(z_exp)

    return s


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}


def forward_propagation(parameters, X, Y):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    Z1 = tf.matmul(X_train, W1) + b1    # (6000, 512)
    A1 = relu(Z1)                       # (6000, 512)

    Z2 = tf.matmul(A1, W2) + b2         # (6000, 10)
    # A2 = softmax(Z2)                  # (6000, 10)
    # OR-
    A2 = tf.nn.softmax(Z2)          # (6000, 10)

    return A2


def cost(parameters, X, Y):
    y_pred_temp = forward_propagation(parameters, X, Y)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    return loss_fn(y_true = Y, y_pred = y_pred_temp)



def train_model(parameters, X, Y, learning_rate):

    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    with tf.GradientTape(persistent = True) as t:
        current_loss = cost(parameters, X_train, y_train)

    dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])

    W2 = W2 - (learning_rate * dW2)
    W1 = W1 - (learning_rate * dW1)
    b2 = b2 - (learning_rate * db2)
    b1 = b1 - (learning_rate * db1)

    updated_params = {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

    return updated_params

params = initialize_parameters()

updated_params, cost_val = train_model(params, X_train, y_train, 0.01)

for epoch in range(100):
    updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)

输入层有784个神经元（28*28）

隐藏层有512个神经元

输出层有10个神经元

隐藏层使用ReLU激活函数，输出层有10个神经元

我的代码如下：

# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist

# type(mnist)
# module

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)


# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0

X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)

print("\nShapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}\n".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)


# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))

print("\nDimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}\n".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)


def relu(x):
    '''
    Function to calculate ReLU for
    given 'x'
    '''
    # return np.maximum(x, 0)
    return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)


def relu_derivative(x):
    '''
    Function to calculate derivative
    of ReLU
    '''
    # return np.where(x <= 0, 0, 1)
    # return tf.where(x <=0, 0, 1)
    return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)


def softmax_stable(z):
    '''
    Function to compute softmax activation function.
    Numerically stable
    '''
    # First cast 'z' to floating type-
    z = tf.cast(z, dtype = tf.float32)

    # Get largest element in 'z'-
    largest = tf.math.reduce_max(z)

    # Raise each value to exp('z - largest')-
    z_exp = tf.math.exp(z - largest)

    # Compute softmax activation values-
    s = z_exp / tf.math.reduce_sum(z_exp)

    return s


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}


def forward_propagation(parameters, X, Y):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    Z1 = tf.matmul(X_train, W1) + b1    # (6000, 512)
    A1 = relu(Z1)                       # (6000, 512)

    Z2 = tf.matmul(A1, W2) + b2         # (6000, 10)
    # A2 = softmax(Z2)                  # (6000, 10)
    # OR-
    A2 = tf.nn.softmax(Z2)          # (6000, 10)

    return A2


def cost(parameters, X, Y):
    y_pred_temp = forward_propagation(parameters, X, Y)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    return loss_fn(y_true = Y, y_pred = y_pred_temp)



def train_model(parameters, X, Y, learning_rate):

    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    with tf.GradientTape(persistent = True) as t:
        current_loss = cost(parameters, X_train, y_train)

    dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])

    W2 = W2 - (learning_rate * dW2)
    W1 = W1 - (learning_rate * dW1)
    b2 = b2 - (learning_rate * db2)
    b1 = b1 - (learning_rate * db1)

    updated_params = {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

    return updated_params

params = initialize_parameters()

updated_params, cost_val = train_model(params, X_train, y_train, 0.01)

for epoch in range(100):
    updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)

对“train_model（）”的后续调用返回“dW2”、“dW1”、“db2”、“db1” 作为“非类型”

怎么了

谢谢

问题在于

初始化参数（）

函数。您不是在创建

tf.Variables

，而是在创建

tf.Tensors

。如果需要对其进行导数，则需要将参数设置为

tf.Variables


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

即使更改为

tf.Variable

，第二次调用“train_model（）”也会使“dW2”、“dW1”、“db2”和“db1”成为非类型。出什么事了？我没有同样的错误。它在我这边运行良好（测试了10个时代）。你能重新设置运行时间并重新运行，看看错误是否仍然存在。我在“for”循环中犯了一个错误。在for循环中，放入以下-更新的参数，cost\u val=train\u model（更新的参数，X\u train，y\u train，0.01），这将给出错误。为什么？

W2=W2-（学习率*dW2）\W1=W1-（学习率*dW1）.

是罪魁祸首。这些导致

W2，W1，…

成为张量。因此，在第一次迭代之后，它们不再是

tf.变量

。因此，不要手动执行此操作，只需使用您选择的优化器中的

apply_gradients

方法即可