Neural network 优化自编码二层人工神经网络
我最近开始学习神经网络,并决定编写自己的简单2层神经网络,并使用MNIST数据集对其进行基准测试。我曾尝试使用批量SGD编程,其中批量大小由用户提供。我的代码如下:Neural network 优化自编码二层人工神经网络,neural-network,backpropagation,Neural Network,Backpropagation,我最近开始学习神经网络,并决定编写自己的简单2层神经网络,并使用MNIST数据集对其进行基准测试。我曾尝试使用批量SGD编程,其中批量大小由用户提供。我的代码如下: class NeuralNetwork: def __init__(self, inodes, hnodes, outnodes, activation_func, learning_rate): self.inodes = inodes self.hnodes = hnodes
class NeuralNetwork:
def __init__(self, inodes, hnodes, outnodes, activation_func, learning_rate):
self.inodes = inodes
self.hnodes = hnodes
self.onodes = outnodes
self.activation_function = activation_func
self.lr = learning_rate
self.wih = np.random.randn(self.hnodes, self.inodes) / pow(self.inodes, 0.5)
self.who = np.random.randn(self.onodes, self.hnodes) / pow(self.hnodes, 0.5)
def train(self, training_data, target_labels, batch=1, l2_penalty=0, verbose=False):
batch_size = len(training_data) / batch
print "Starting to train........"
for i in range(batch):
train_data_batch = training_data[batch_size*i : batch_size*(i+1)]
label_batch = target_labels[batch_size*i : batch_size*(i+1)]
batch_error = self.train_batch(train_data_batch, label_batch, l2_penalty)
if verbose:
print "Batch : " + str(i+1) + " ; Error : " + str(batch_error)
print "..........Finished!"
def train_batch(self, training_data, target_labels, l2_penalty=0):
train = np.array(training_data, ndmin=2).T
label = np.array(target_labels, ndmin=2).T
inputs = train # IxN
hidden_input = np.dot(self.wih, inputs) # (HxI).(IxN) = HxN
hidden_ouputs = self.activation_function(hidden_input) # (HxN) -> (HxN)
final_input = np.dot(self.who, hidden_ouputs) # (OxH).(HxN) -> OxN
final_outputs = self.activation_function(final_input) # OxN -> OxN
final_outputs = np.exp(final_outputs) # OxN
for f in range(len(final_outputs)):
final_outputs[f] = final_outputs[f] / sum(final_outputs[f])
final_error_wrt_out = label - final_outputs # OxN
hidden_error_wrt_out = np.dot(self.who.T, final_outputs) # HxN
final_in_wrt_out = self.activation_function(final_input, der=True) # OxN
hidden_in_wrt_out = self.activation_function(hidden_input, der=True) # HxN
grad_who = np.dot(final_error_wrt_out * final_in_wrt_out, hidden_ouputs.T) # (OxN).(NxH) -> OxH
grad_wih = np.dot(hidden_error_wrt_out * hidden_in_wrt_out, inputs.T) # (HxN).(NxI) -> HxI
self.who = self.who - self.lr * (grad_who + l2_penalty*(self.who))
self.wih = self.wih - self.lr * (grad_wih + l2_penalty*(self.wih))
return np.sum(final_error_wrt_out * final_error_wrt_out) / (2*len(training_data))
def query(self, inputs):
if len(inputs) != self.inodes:
print "Invalid input size"
return
inputs = np.array(inputs)
hidden_input = np.dot(self.wih, inputs)
hidden_ouputs = self.activation_function(hidden_input)
final_input = np.dot(self.who, hidden_ouputs)
final_outputs = self.activation_function(final_input)
final_outputs = np.exp(final_outputs)
total = sum(final_outputs)
probs = final_outputs / total
return probs
我发现了塔里克·拉希德(Tariq Rashid)的一个类似代码,它给出了大约95%的准确率。另一方面,我的代码只给出了10%
我参考了关于反向传播的各种教程,多次尝试调试代码,但未能提高我的准确性。如果能深入了解这个问题,我将不胜感激
编辑1:
这是马蒂克的回答
我以前在softmax层中使用MSE而不是负对数似然误差,这是我的一个错误。根据回答,我将列车功能更改如下:
def train_batch(self, training_data, target_labels, l2_penalty=0):
train = np.array(training_data, ndmin=2).T
label = np.array(target_labels, ndmin=2).T
inputs = train # IxN
hidden_input = np.dot(self.wih, inputs) # (HxI).(IxN) = HxN
hidden_ouputs = self.activation_function(hidden_input) # (HxN) -> (HxN)
final_input = np.dot(self.who, hidden_ouputs) # (OxH).(HxN) -> OxN
final_outputs = self.activation_function(final_input) # OxN -> OxN
final_outputs = np.exp(final_outputs) # OxN
for f in range(len(final_outputs)):
final_outputs[f] = final_outputs[f] / sum(final_outputs[f])
error = label - final_outputs
final_error_wrt_out = final_outputs - 1 # OxN
hidden_error_wrt_out = np.dot(self.who.T, -np.log(final_outputs)) # (HxO).(OxN) -> HxN
final_in_wrt_out = self.activation_function(final_input, der=True) # OxN
hidden_in_wrt_out = self.activation_function(hidden_input, der=True) # HxN
grad_who = np.dot(final_error_wrt_out * final_in_wrt_out, hidden_ouputs.T) # (OxN).(NxH) -> OxH
grad_wih = np.dot(hidden_error_wrt_out * hidden_in_wrt_out, inputs.T) # (HxN).(NxI) -> HxI
self.who = self.who - self.lr * (grad_who + l2_penalty*(self.who))
self.wih = self.wih - self.lr * (grad_wih + l2_penalty*(self.wih))
return np.sum(final_error_wrt_out * final_error_wrt_out) / (2*len(training_data))
但是,这并没有带来任何性能提升。我不认为您在训练步骤中反向传播了softmax层。 如果我没有弄错的话,我相信softmax的梯度可以简单地计算为:
grad_softmax = final_outputs - 1
@mattdeak这确实是softmax回归,我最终要做的是np.exp(最终输出)/np.sum(最终输出)。该结果在final_outputs=np.exp(final_outputs)之后立即使用“for”循环存储在“probs”变量中。我发现在多行上执行此操作更容易,因为它帮助我更好地调试程序。