Python 神经网络在Tensorflow中的应用

Python 神经网络在Tensorflow中的应用,python,tensorflow,machine-learning,neural-network,Python,Tensorflow,Machine Learning,Neural Network,我已经生成了一个4000个示例的平衡数据集,其中2000个用于负面类,2000个用于正面类。然后,我建立了一个神经网络,它有一个隐藏层,3个神经元,具有一个ReLU激活函数,一个输出层带有一个S形。成本函数是一个标准的交叉熵函数,我选择Adam作为优化器。使用15个示例的小批量,经过1000个阶段的运行,最终精度达到96.37%,因此我假设模型在测试集上表现良好。但当我想显示决策边界时,我得到的是: 我无法确定问题是代码错误还是模型只需要模式训练。我正在为此使用的脚本: # implement

我已经生成了一个4000个示例的平衡数据集,其中2000个用于负面类,2000个用于正面类。然后,我建立了一个神经网络,它有一个隐藏层,3个神经元,具有一个ReLU激活函数,一个输出层带有一个S形。成本函数是一个标准的交叉熵函数,我选择Adam作为优化器。使用15个示例的小批量,经过1000个阶段的运行,最终精度达到96.37%,因此我假设模型在测试集上表现良好。但当我想显示决策边界时,我得到的是:

我无法确定问题是代码错误还是模型只需要模式训练。我正在为此使用的脚本:

# implement a neural network that finds a decision boundary under a 
  constraint on the second hidden layer with tensorflow

  import numpy as np
  from sklearn.utils import shuffle
  from sklearn.preprocessing import normalize
  from sklearn.model_selection import train_test_split
  import tensorflow as tf
  from tf_utils import random_mini_batches
  import matplotlib.pyplot as plt

  def generate_dataset():

np.random.seed(2)

# positive class samples
d1_x = np.random.normal(5, 10, 1000)
d1_y = np.random.normal(5, 2, 1000)
d2_x = np.random.normal(40, 20, 1000)
d2_y = np.random.normal(2, 1, 1000)

# negative class samples
d3_x = np.random.normal(60, 5, 2000)
d3_y = np.random.normal(10, 1, 2000)

plt.scatter(d1_x, d1_y, color='b')
plt.scatter(d2_x, d2_y, color='b')
plt.scatter(d3_x, d3_y, color='r')

Y = np.zeros((4000, 1))

d_x = np.concatenate([d1_x, d2_x, d3_x])
d_y = np.concatenate([d1_y, d2_y, d3_y])

d_x = d_x.reshape(d_x.shape[0], 1)
d_y = d_y.reshape(d_y.shape[0], 1)

X = np.concatenate([d_x, d_y], axis=1)
Y[2000:] = 1

return X, Y


# define a tensorflow model 5-3-1 with two hideen layers and the output 
being scalar

costs = []
print_cost = True

learning_rate = .0009
minibatch_size = 15
num_epochs = 1000

XX, YY = generate_dataset()
XX, YY = shuffle(XX, YY)
X_norm = normalize(XX)
X_train, X_test, y_train, y_test = train_test_split(X_norm, YY, 
test_size=0.2, random_state=42)

X_train = np.transpose(X_train)
y_train = np.transpose(y_train)
X_test = np.transpose(X_test)
y_test = np.transpose(y_test)

# define train and test sets

m = XX.shape[1]  # input dimension
n = YY.shape[1]  # output dimension

X = tf.placeholder(tf.float32, shape = [m, None], name = 'X')
y = tf.placeholder(tf.float32, shape = [n, None], name = 'y')

# model parameters
n1 = 3  # output dimension of the first hidden layer
#n2 = 4  # output dimension of the second hidden layer
#n3 = 2

W1 = tf.get_variable("W1", [n1, m], 
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [n1 ,1], initializer=tf.zeros_initializer)

#W2 = tf.get_variable("W2", [n2, n1], 
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b2 = tf.get_variable("b2", [n2, 1], initializer=tf.zeros_initializer)

#W3 = tf.get_variable("W3", [n3, n2], 
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b3 = tf.get_variable("b3", [n3, 1], initializer=tf.zeros_initializer)

W4 = tf.get_variable("W4", [n, n1], 
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b4 = tf.get_variable("b4", [n, 1], initializer=tf.zeros_initializer)

# forward propagation
z1 = tf.add(tf.matmul(W1, X), b1)
a1 = tf.nn.relu(z1)

#z2 = tf.add(tf.matmul(W2, a1), b2)
#a2 = tf.nn.relu(z2)

#z3 = tf.add(tf.matmul(W3, a2), b3)
#a3 = tf.nn.relu(z3)

z4 = tf.add(tf.matmul(W4, a1), b4)
pred = tf.nn.sigmoid(z4)

# cost function
cost = tf.reduce_mean(tf.losses.log_loss(labels=y, predictions=pred))  # 
logit is the probability estimate given by the model --> this is what is used inside the formula, not the net input z

# ADAM optimizer
optimizer = 
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# metrics
correct_prediction = tf.less_equal(tf.abs(pred - y), 0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

init = tf.global_variables_initializer()

with tf.Session() as sess:
seed = 1
sess.run(init)

for epoch in range(num_epochs):
    epoch_cost = 0
    seed += 1
    num_minibatches = int(X_train.shape[0] / minibatch_size)
    minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)

    for minibatch in minibatches:
        (minibatch_X, minibatch_Y) = minibatch
        _, minibatch_cost = sess.run([optimizer, cost], feed_dict={X:minibatch_X, y:minibatch_Y})
        epoch_cost += minibatch_cost / minibatch_size

    # Print the cost every epoch
    if print_cost == True and epoch % 100 == 0:
        print("Cost after epoch %i: %f" % (epoch, epoch_cost))
    if print_cost == True and epoch % minibatch_size == 0:
        costs.append(epoch_cost)

#plt.plot(costs)
#plt.show()
cp, val_accuracy = sess.run([correct_prediction, accuracy], feed_dict={X: X_test, y: y_test})

# plot the cost
# plt.plot(np.squeeze(costs))
# plt.ylabel('cost'), feed_dict={X: X_test, y: y_test})
# plt.xlabel('iterations (per fives)')
# plt.title("Learning rate =" + str(learning_rate))
# plt.show()

cmap = plt.get_cmap('Paired')

# Define region of interest by data limits
xmin, xmax = min(XX[:, 0]) - 1, max(XX[:, 0]) + 1
ymin, ymax = min(XX[:, 1]) - 1, max(XX[:, 1]) + 1
steps = 100
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)

xx, yy = np.meshgrid(x_span, y_span)
A = np.concatenate([[xx.ravel()], [yy.ravel()]], axis=0)
A = normalize(A, axis=0)

# Make predictions across region of interest
predictions = sess.run(pred, feed_dict={X: A})

# Plot decision boundary in region of interest
z = predictions.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap=cmap, alpha=.5)

plt.show()

# Get predicted labels on training data and plot
#train_labels = model.predict(X)
#ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, lw=0)