请帮助我使用Python实现backprop
编辑2: 新的训练集 投入:请帮助我使用Python实现backprop,python,algorithm,math,neural-network,Python,Algorithm,Math,Neural Network,编辑2: 新的训练集 投入: [ [0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0], [0.0, 4.0], [1.0, 0.0], [1.0, 1.0], [1.0, 2.0], [1.0, 3.0], [1.0, 4.0], [2.0, 0.0], [2.0, 1.0], [2.0, 2.0], [2.0, 3.0], [2.0, 4.0], [3.0, 0.0], [3.0, 1.
[
[0.0, 0.0],
[0.0, 1.0],
[0.0, 2.0],
[0.0, 3.0],
[0.0, 4.0],
[1.0, 0.0],
[1.0, 1.0],
[1.0, 2.0],
[1.0, 3.0],
[1.0, 4.0],
[2.0, 0.0],
[2.0, 1.0],
[2.0, 2.0],
[2.0, 3.0],
[2.0, 4.0],
[3.0, 0.0],
[3.0, 1.0],
[3.0, 2.0],
[3.0, 3.0],
[3.0, 4.0],
[4.0, 0.0],
[4.0, 1.0],
[4.0, 2.0],
[4.0, 3.0],
[4.0, 4.0]
]
产出:
[
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[1.0],
[1.0],
[0.0],
[0.0],
[0.0],
[1.0],
[1.0]
]
编辑1:
我已经用我的最新代码更新了这个问题。我修复了一些小问题,但在网络学习之后,我仍然得到了所有输入组合的相同输出
这里解释了backprop算法:
是的,这是一个家庭作业,从一开始就要弄清楚这一点 我应该在一个简单的神经网络上实现一个简单的反向传播算法 我选择Python作为这项任务的首选语言,并选择了如下神经网络: 3层:1个输入层,1个隐藏层,1个输出层:
O O
O
O O
输入神经元上都有一个整数,输出神经元上都有1或0
这是我的整个实现(有点长)。下面,我将选择较短的相关代码段,我认为错误可能位于:
import os
import math
import Image
import random
from random import sample
#------------------------------ class definitions
class Weight:
def __init__(self, fromNeuron, toNeuron):
self.value = random.uniform(-0.5, 0.5)
self.fromNeuron = fromNeuron
self.toNeuron = toNeuron
fromNeuron.outputWeights.append(self)
toNeuron.inputWeights.append(self)
self.delta = 0.0 # delta value, this will accumulate and after each training cycle used to adjust the weight value
def calculateDelta(self, network):
self.delta += self.fromNeuron.value * self.toNeuron.error
class Neuron:
def __init__(self):
self.value = 0.0 # the output
self.idealValue = 0.0 # the ideal output
self.error = 0.0 # error between output and ideal output
self.inputWeights = []
self.outputWeights = []
def activate(self, network):
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.fromNeuron.value
# sigmoid function
if x < -320:
self.value = 0
elif x > 320:
self.value = 1
else:
self.value = 1 / (1 + math.exp(-x))
class Layer:
def __init__(self, neurons):
self.neurons = neurons
def activate(self, network):
for neuron in self.neurons:
neuron.activate(network)
class Network:
def __init__(self, layers, learningRate):
self.layers = layers
self.learningRate = learningRate # the rate at which the network learns
self.weights = []
for hiddenNeuron in self.layers[1].neurons:
for inputNeuron in self.layers[0].neurons:
self.weights.append(Weight(inputNeuron, hiddenNeuron))
for outputNeuron in self.layers[2].neurons:
self.weights.append(Weight(hiddenNeuron, outputNeuron))
def setInputs(self, inputs):
self.layers[0].neurons[0].value = float(inputs[0])
self.layers[0].neurons[1].value = float(inputs[1])
def setExpectedOutputs(self, expectedOutputs):
self.layers[2].neurons[0].idealValue = expectedOutputs[0]
def calculateOutputs(self, expectedOutputs):
self.setExpectedOutputs(expectedOutputs)
self.layers[1].activate(self) # activation function for hidden layer
self.layers[2].activate(self) # activation function for output layer
def calculateOutputErrors(self):
for neuron in self.layers[2].neurons:
neuron.error = (neuron.idealValue - neuron.value) * neuron.value * (1 - neuron.value)
def calculateHiddenErrors(self):
for neuron in self.layers[1].neurons:
error = 0.0
for weight in neuron.outputWeights:
error += weight.toNeuron.error * weight.value
neuron.error = error * neuron.value * (1 - neuron.value)
def calculateDeltas(self):
for weight in self.weights:
weight.calculateDelta(self)
def train(self, inputs, expectedOutputs):
self.setInputs(inputs)
self.calculateOutputs(expectedOutputs)
self.calculateOutputErrors()
self.calculateHiddenErrors()
self.calculateDeltas()
def learn(self):
for weight in self.weights:
weight.value += self.learningRate * weight.delta
def calculateSingleOutput(self, inputs):
self.setInputs(inputs)
self.layers[1].activate(self)
self.layers[2].activate(self)
#return round(self.layers[2].neurons[0].value, 0)
return self.layers[2].neurons[0].value
#------------------------------ initialize objects etc
inputLayer = Layer([Neuron() for n in range(2)])
hiddenLayer = Layer([Neuron() for n in range(100)])
outputLayer = Layer([Neuron() for n in range(1)])
learningRate = 0.5
network = Network([inputLayer, hiddenLayer, outputLayer], learningRate)
# just for debugging, the real training set is much larger
trainingInputs = [
[0.0, 0.0],
[1.0, 0.0],
[2.0, 0.0],
[0.0, 1.0],
[1.0, 1.0],
[2.0, 1.0],
[0.0, 2.0],
[1.0, 2.0],
[2.0, 2.0]
]
trainingOutputs = [
[0.0],
[1.0],
[1.0],
[0.0],
[1.0],
[0.0],
[0.0],
[0.0],
[1.0]
]
#------------------------------ let's train
for i in range(500):
for j in range(len(trainingOutputs)):
network.train(trainingInputs[j], trainingOutputs[j])
network.learn()
#------------------------------ let's check
for pattern in trainingInputs:
print network.calculateSingleOutput(pattern)
这是我计算三角洲的方法:
def calculateDelta(self, network):
self.delta += self.getFromNeuron(network).value * self.getToNeuron(network).error
这是我的算法的一般流程:
for i in range(numberOfIterations):
for k,expectedOutput in trainingSet.iteritems():
coordinates = k.split(",")
network.setInputs((float(coordinates[0]), float(coordinates[1])))
network.calculateOutputs([float(expectedOutput)])
network.calculateOutputErrors()
network.calculateHiddenErrors()
network.calculateDeltas()
oldWeights = network.weights
network.adjustWeights()
network.resetDeltas()
print "Iteration ", i
j = 0
for weight in network.weights:
print "Weight W", weight.i, weight.j, ": ", oldWeights[j].value, " ............ Adjusted value : ", weight.value
j += j
输出的最后两行是:
0.552785449458 # this should be close to 1
0.552785449458 # this should be close to 0
它实际上返回所有输入组合的输出编号
我遗漏了什么吗?看起来你得到的几乎是神经元的初始状态(几乎是
self.idealValue
)。也许你不应该在有实际数据提供之前初始化这个神经元
编辑:好的,我对代码进行了更深入的研究并对其进行了简化(将在下面发布简化版本)。基本上,您的代码有两个小错误(看起来像是您刚刚忽略的),但这会导致网络肯定无法工作
- 在学习阶段,您忘记在输出层设置expectedOutput的值。没有这一点,网络肯定学不到任何东西,而且总是停留在最初的理想价值上。(这就是我在第一次阅读时看到的巴哈维奥)。这一点甚至可以在您对培训步骤的描述中发现(如果您没有发布代码,可能会发现,这是我所知道的少数情况之一,实际上发布代码是隐藏错误,而不是让错误变得明显)。你在编辑后修复了这个
- 在calculateSingleOutputs中激活网络时,忘记激活隐藏层
import os
import math
"""
A simple backprop neural network. It has 3 layers:
Input layer: 2 neurons
Hidden layer: 2 neurons
Output layer: 1 neuron
"""
class Weight:
"""
Class representing a weight between two neurons
"""
def __init__(self, value, from_neuron, to_neuron):
self.value = value
self.from_neuron = from_neuron
from_neuron.outputWeights.append(self)
self.to_neuron = to_neuron
to_neuron.inputWeights.append(self)
# delta value, this will accumulate and after each training cycle
# will be used to adjust the weight value
self.delta = 0.0
class Neuron:
"""
Class representing a neuron.
"""
def __init__(self):
self.value = 0.0 # the output
self.idealValue = 0.0 # the ideal output
self.error = 0.0 # error between output and ideal output
self.inputWeights = [] # weights that end in the neuron
self.outputWeights = [] # weights that starts in the neuron
def activate(self):
"""
Calculate an activation function of a neuron which is
a sum of all input weights * neurons where those weights start
"""
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.from_neuron.value
# sigmoid function
self.value = 1 / (1 + math.exp(-x))
class Network:
"""
Class representing a whole neural network. Contains layers.
"""
def __init__(self, layers, learningRate, weights):
self.layers = layers
self.learningRate = learningRate # the rate at which the network learns
self.weights = weights
def training(self, entries, expectedOutput):
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
for i in range(len(expectedOutput)):
self.layers[2][i].idealValue = expectedOutput[i]
for layer in self.layers[1:]:
for n in layer:
n.activate()
for n in self.layers[2]:
error = (n.idealValue - n.value) * n.value * (1 - n.value)
n.error = error
for n in self.layers[1]:
error = 0.0
for w in n.outputWeights:
error += w.to_neuron.error * w.value
n.error = error
for w in self.weights:
w.delta += w.from_neuron.value * w.to_neuron.error
def updateWeights(self):
for w in self.weights:
w.value += self.learningRate * w.delta
def calculateSingleOutput(self, entries):
"""
Calculate a single output for input values.
This will be used to debug the already learned network after training.
"""
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
# activation function for output layer
for layer in self.layers[1:]:
for n in layer:
n.activate()
print self.layers[2][0].value
#------------------------------ initialize objects etc
neurons = [Neuron() for n in range(5)]
w1 = Weight(-0.79, neurons[0], neurons[2])
w2 = Weight( 0.51, neurons[0], neurons[3])
w3 = Weight( 0.27, neurons[1], neurons[2])
w4 = Weight(-0.48, neurons[1], neurons[3])
w5 = Weight(-0.33, neurons[2], neurons[4])
w6 = Weight( 0.09, neurons[3], neurons[4])
weights = [w1, w2, w3, w4, w5, w6]
inputLayer = [neurons[0], neurons[1]]
hiddenLayer = [neurons[2], neurons[3]]
outputLayer = [neurons[4]]
learningRate = 0.3
network = Network([inputLayer, hiddenLayer, outputLayer], learningRate, weights)
# just for debugging, the real training set is much larger
trainingSet = [([0.0,0.0],[0.0]),
([1.0,0.0],[1.0]),
([2.0,0.0],[1.0]),
([0.0,1.0],[0.0]),
([1.0,1.0],[1.0]),
([2.0,1.0],[0.0]),
([0.0,2.0],[0.0]),
([1.0,2.0],[0.0]),
([2.0,2.0],[1.0])]
#------------------------------ let's train
for i in range(100): # training iterations
for entries, expectedOutput in trainingSet:
network.training(entries, expectedOutput)
network.updateWeights()
#network has learned, let's check
network.calculateSingleOutput((1, 0)) # this should be close to 1
network.calculateSingleOutput((0, 0)) # this should be close to 0
顺便说一下,还有第三个问题我没有纠正(但很容易纠正)。如果x太大或太小(>320或<-320)
math.exp()
将引发异常。如果您申请培训迭代(比如几千次),就会出现这种情况。我所看到的最简单的纠正方法是检查x的值,如果x太大或太小,根据情况将神经元的值设置为0或1,这是极限值。我认为您需要自己做更多的工作——这比您合理期望人们为您调试的代码还要多。在所有重要位置添加logging.log
语句,以跟踪边的权重,并使用计算器对数字进行几步运算,以查看它们不一致的地方。请阅读以下内容:。对于Bayseian过滤器,这是一个标准问题,具有标准解决方案。对于非常非常小的浮点数,你似乎也有同样的标准问题。@Katrielex是的,我当然也会继续研究这个问题。@S.Lott:问题不可能从这里开始,因为OP已经使用对数作为权重,这就是为什么math.exp
是必要的。这导致了另一个问题:当x变得太小或太大时,python会引发一个异常,但这与观察到的虚假行为无关(只是一个普通的老错误)。只需添加:self.layers[2]。在calculateSingleOutput
中运行所有神经元(self)的激活函数即可。但除了错误修复,收敛性不如编辑后的第一个版本,这令人惊讶。我看不出哪种变化有这种效果。好吧,我明天会试试。非常感谢。是的,我想我把它复杂化了。我只是想避免程序化编程,在OOP中做所有的事情,所以我被冲昏头脑了。顺便说一句,试试print network.calculateSingleOutput(2.0,1.0)。它将打印不正确的输出:)@Richard Knop:你是说网络。calculateSingleOutput([2.0,1.0])
?(entries参数只需要一个由两个数字组成的输入,您的版本将给出语法错误)。经过100次学习迭代,它的结果是:0.04,并不像预期的那样完全为零,但我不会说这两件事不正确,它仍然接近于零。@Richard Knop:好的,我知道了。它适用于我的版本,而不是你的版本。我想这是EDIT1改变了的,因为我从最初的版本进行了重构。问题的原因(再次)对我来说并不明显,你必须自己检查差异。
import os
import math
"""
A simple backprop neural network. It has 3 layers:
Input layer: 2 neurons
Hidden layer: 2 neurons
Output layer: 1 neuron
"""
class Weight:
"""
Class representing a weight between two neurons
"""
def __init__(self, value, from_neuron, to_neuron):
self.value = value
self.from_neuron = from_neuron
from_neuron.outputWeights.append(self)
self.to_neuron = to_neuron
to_neuron.inputWeights.append(self)
# delta value, this will accumulate and after each training cycle
# will be used to adjust the weight value
self.delta = 0.0
class Neuron:
"""
Class representing a neuron.
"""
def __init__(self):
self.value = 0.0 # the output
self.idealValue = 0.0 # the ideal output
self.error = 0.0 # error between output and ideal output
self.inputWeights = [] # weights that end in the neuron
self.outputWeights = [] # weights that starts in the neuron
def activate(self):
"""
Calculate an activation function of a neuron which is
a sum of all input weights * neurons where those weights start
"""
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.from_neuron.value
# sigmoid function
self.value = 1 / (1 + math.exp(-x))
class Network:
"""
Class representing a whole neural network. Contains layers.
"""
def __init__(self, layers, learningRate, weights):
self.layers = layers
self.learningRate = learningRate # the rate at which the network learns
self.weights = weights
def training(self, entries, expectedOutput):
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
for i in range(len(expectedOutput)):
self.layers[2][i].idealValue = expectedOutput[i]
for layer in self.layers[1:]:
for n in layer:
n.activate()
for n in self.layers[2]:
error = (n.idealValue - n.value) * n.value * (1 - n.value)
n.error = error
for n in self.layers[1]:
error = 0.0
for w in n.outputWeights:
error += w.to_neuron.error * w.value
n.error = error
for w in self.weights:
w.delta += w.from_neuron.value * w.to_neuron.error
def updateWeights(self):
for w in self.weights:
w.value += self.learningRate * w.delta
def calculateSingleOutput(self, entries):
"""
Calculate a single output for input values.
This will be used to debug the already learned network after training.
"""
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
# activation function for output layer
for layer in self.layers[1:]:
for n in layer:
n.activate()
print self.layers[2][0].value
#------------------------------ initialize objects etc
neurons = [Neuron() for n in range(5)]
w1 = Weight(-0.79, neurons[0], neurons[2])
w2 = Weight( 0.51, neurons[0], neurons[3])
w3 = Weight( 0.27, neurons[1], neurons[2])
w4 = Weight(-0.48, neurons[1], neurons[3])
w5 = Weight(-0.33, neurons[2], neurons[4])
w6 = Weight( 0.09, neurons[3], neurons[4])
weights = [w1, w2, w3, w4, w5, w6]
inputLayer = [neurons[0], neurons[1]]
hiddenLayer = [neurons[2], neurons[3]]
outputLayer = [neurons[4]]
learningRate = 0.3
network = Network([inputLayer, hiddenLayer, outputLayer], learningRate, weights)
# just for debugging, the real training set is much larger
trainingSet = [([0.0,0.0],[0.0]),
([1.0,0.0],[1.0]),
([2.0,0.0],[1.0]),
([0.0,1.0],[0.0]),
([1.0,1.0],[1.0]),
([2.0,1.0],[0.0]),
([0.0,2.0],[0.0]),
([1.0,2.0],[0.0]),
([2.0,2.0],[1.0])]
#------------------------------ let's train
for i in range(100): # training iterations
for entries, expectedOutput in trainingSet:
network.training(entries, expectedOutput)
network.updateWeights()
#network has learned, let's check
network.calculateSingleOutput((1, 0)) # this should be close to 1
network.calculateSingleOutput((0, 0)) # this should be close to 0