Python 神经网络在Tensorflow中的应用
我已经生成了一个4000个示例的平衡数据集,其中2000个用于负面类,2000个用于正面类。然后,我建立了一个神经网络,它有一个隐藏层,3个神经元,具有一个ReLU激活函数,一个输出层带有一个S形。成本函数是一个标准的交叉熵函数,我选择Adam作为优化器。使用15个示例的小批量,经过1000个阶段的运行,最终精度达到96.37%,因此我假设模型在测试集上表现良好。但当我想显示决策边界时,我得到的是: 我无法确定问题是代码错误还是模型只需要模式训练。我正在为此使用的脚本:Python 神经网络在Tensorflow中的应用,python,tensorflow,machine-learning,neural-network,Python,Tensorflow,Machine Learning,Neural Network,我已经生成了一个4000个示例的平衡数据集,其中2000个用于负面类,2000个用于正面类。然后,我建立了一个神经网络,它有一个隐藏层,3个神经元,具有一个ReLU激活函数,一个输出层带有一个S形。成本函数是一个标准的交叉熵函数,我选择Adam作为优化器。使用15个示例的小批量,经过1000个阶段的运行,最终精度达到96.37%,因此我假设模型在测试集上表现良好。但当我想显示决策边界时,我得到的是: 我无法确定问题是代码错误还是模型只需要模式训练。我正在为此使用的脚本: # implement
# implement a neural network that finds a decision boundary under a
constraint on the second hidden layer with tensorflow
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tf_utils import random_mini_batches
import matplotlib.pyplot as plt
def generate_dataset():
np.random.seed(2)
# positive class samples
d1_x = np.random.normal(5, 10, 1000)
d1_y = np.random.normal(5, 2, 1000)
d2_x = np.random.normal(40, 20, 1000)
d2_y = np.random.normal(2, 1, 1000)
# negative class samples
d3_x = np.random.normal(60, 5, 2000)
d3_y = np.random.normal(10, 1, 2000)
plt.scatter(d1_x, d1_y, color='b')
plt.scatter(d2_x, d2_y, color='b')
plt.scatter(d3_x, d3_y, color='r')
Y = np.zeros((4000, 1))
d_x = np.concatenate([d1_x, d2_x, d3_x])
d_y = np.concatenate([d1_y, d2_y, d3_y])
d_x = d_x.reshape(d_x.shape[0], 1)
d_y = d_y.reshape(d_y.shape[0], 1)
X = np.concatenate([d_x, d_y], axis=1)
Y[2000:] = 1
return X, Y
# define a tensorflow model 5-3-1 with two hideen layers and the output
being scalar
costs = []
print_cost = True
learning_rate = .0009
minibatch_size = 15
num_epochs = 1000
XX, YY = generate_dataset()
XX, YY = shuffle(XX, YY)
X_norm = normalize(XX)
X_train, X_test, y_train, y_test = train_test_split(X_norm, YY,
test_size=0.2, random_state=42)
X_train = np.transpose(X_train)
y_train = np.transpose(y_train)
X_test = np.transpose(X_test)
y_test = np.transpose(y_test)
# define train and test sets
m = XX.shape[1] # input dimension
n = YY.shape[1] # output dimension
X = tf.placeholder(tf.float32, shape = [m, None], name = 'X')
y = tf.placeholder(tf.float32, shape = [n, None], name = 'y')
# model parameters
n1 = 3 # output dimension of the first hidden layer
#n2 = 4 # output dimension of the second hidden layer
#n3 = 2
W1 = tf.get_variable("W1", [n1, m],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [n1 ,1], initializer=tf.zeros_initializer)
#W2 = tf.get_variable("W2", [n2, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b2 = tf.get_variable("b2", [n2, 1], initializer=tf.zeros_initializer)
#W3 = tf.get_variable("W3", [n3, n2],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b3 = tf.get_variable("b3", [n3, 1], initializer=tf.zeros_initializer)
W4 = tf.get_variable("W4", [n, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b4 = tf.get_variable("b4", [n, 1], initializer=tf.zeros_initializer)
# forward propagation
z1 = tf.add(tf.matmul(W1, X), b1)
a1 = tf.nn.relu(z1)
#z2 = tf.add(tf.matmul(W2, a1), b2)
#a2 = tf.nn.relu(z2)
#z3 = tf.add(tf.matmul(W3, a2), b3)
#a3 = tf.nn.relu(z3)
z4 = tf.add(tf.matmul(W4, a1), b4)
pred = tf.nn.sigmoid(z4)
# cost function
cost = tf.reduce_mean(tf.losses.log_loss(labels=y, predictions=pred)) #
logit is the probability estimate given by the model --> this is what is used inside the formula, not the net input z
# ADAM optimizer
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# metrics
correct_prediction = tf.less_equal(tf.abs(pred - y), 0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
seed = 1
sess.run(init)
for epoch in range(num_epochs):
epoch_cost = 0
seed += 1
num_minibatches = int(X_train.shape[0] / minibatch_size)
minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X:minibatch_X, y:minibatch_Y})
epoch_cost += minibatch_cost / minibatch_size
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % minibatch_size == 0:
costs.append(epoch_cost)
#plt.plot(costs)
#plt.show()
cp, val_accuracy = sess.run([correct_prediction, accuracy], feed_dict={X: X_test, y: y_test})
# plot the cost
# plt.plot(np.squeeze(costs))
# plt.ylabel('cost'), feed_dict={X: X_test, y: y_test})
# plt.xlabel('iterations (per fives)')
# plt.title("Learning rate =" + str(learning_rate))
# plt.show()
cmap = plt.get_cmap('Paired')
# Define region of interest by data limits
xmin, xmax = min(XX[:, 0]) - 1, max(XX[:, 0]) + 1
ymin, ymax = min(XX[:, 1]) - 1, max(XX[:, 1]) + 1
steps = 100
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)
xx, yy = np.meshgrid(x_span, y_span)
A = np.concatenate([[xx.ravel()], [yy.ravel()]], axis=0)
A = normalize(A, axis=0)
# Make predictions across region of interest
predictions = sess.run(pred, feed_dict={X: A})
# Plot decision boundary in region of interest
z = predictions.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap=cmap, alpha=.5)
plt.show()
# Get predicted labels on training data and plot
#train_labels = model.predict(X)
#ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, lw=0)