试图用Python编写自己的神经网络
上学期我在斯坦福德上了一门在线机器学习课程,由吴教授讲授。我认为这是非常有益的。为了更好地复习/理解神经网络,我试着用python编写自己的。这是:试图用Python编写自己的神经网络,python,machine-learning,neural-network,Python,Machine Learning,Neural Network,上学期我在斯坦福德上了一门在线机器学习课程,由吴教授讲授。我认为这是非常有益的。为了更好地复习/理解神经网络,我试着用python编写自己的。这是: import numpy class NN: def __init__(self, sl): #sl = number of units (not counting bias unit) in layer l self.sl = sl self.layers = len(sl)
import numpy
class NN:
def __init__(self, sl):
#sl = number of units (not counting bias unit) in layer l
self.sl = sl
self.layers = len(sl)
#Create weights
self.weights = []
for idx in range(1, self.layers):
self.weights.append(numpy.matrix(numpy.random.rand(self.sl[idx-1]+1, self.sl[idx])/5))
self.cost = []
def update(self, input):
if input.shape[1] != self.sl[0]:
raise ValueError, 'The first layer must have a node for every feature'
self.z = []
self.a = []
#Input activations. I'm expecting inputs as numpy matrix (Examples x Featrues)
self.a.append(numpy.hstack((numpy.ones((input.shape[0], 1)), input)))#Set inputs ai + bias unit
#Hidden activations
for weight in self.weights:
self.z.append(self.a[-1]*weight)
self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), numpy.tanh(self.z[-1])))) #tanh is a fancy sigmoid
#Output activation
self.a[-1] = self.z[-1] #Not logistic regression thus no sigmoid function
del self.z[-1]
def backPropagate(self, targets, lamda):
m = float(targets.shape[0]) #m is number of examples
#Calculate cost
Cost = -1/m*sum(numpy.power(self.a[-1] - targets, 2))
for weight in self.weights:
Cost = Cost + lamda/(2*m)*numpy.power(weight[1:, :], 2).sum()
self.cost.append(abs(float(Cost)))
#Calculate error for each layer
delta = []
delta.append(self.a[-1] - targets)
for idx in range(1, self.layers-1): #No delta for the input layer because it is the input
weight = self.weights[-idx][1:, :] #Ignore bias unit
dsigmoid = numpy.multiply(self.a[-(idx+1)][:,1:], 1-self.a[-(idx+1)][:,1:]) #dsigmoid is a(l).*(1-a(l))
delta.append(numpy.multiply(delta[-1]*weight.T, dsigmoid)) #Ignore Regularization
Delta = []
for idx in range(self.layers-1):
Delta.append(self.a[idx].T*delta[-(idx+1)])
self.weight_gradient = []
for idx in range(len(Delta)):
self.weight_gradient.append(numpy.nan_to_num(1/m*Delta[idx] + numpy.vstack((numpy.zeros((1, self.weights[idx].shape[1])), lamda/m*self.weights[idx][1:, :]))))
def train(self, input, targets, alpha, lamda, iterations = 1000):
#alpha: learning rate
#lamda: regularization term
for i in range(iterations):
self.update(input)
self.backPropagate(targets, lamda)
self.weights = [self.weights[idx] - alpha*self.weight_gradient[idx] for idx in range(len(self.weights))]
def predict(self, input):
self.update(input)
return self.a[-1]
但是它不起作用=(。检查成本与迭代我可以看到成本中的一点,对a的预测是一样的。有人能帮我理解为什么我的神经网络不收敛吗
谢谢,
很抱歉代码太多(也许有人会发现它很有用)
更新:
我没有使用随机数据,而是从UCI机器学习存储库中获得了一些结构化数据。特定的数据集是葡萄牙东北部地区森林火灾的燃烧区域,使用气象和其他数据:我修改了数据,使天数和月份都是数字:
为什么成本在4000左右见底,为什么数据与预测没有任何趋势?您可以在这里看到图表:(对不起,我没有足够的代表添加评论,所以我将继续发布答案。)
是的,这看起来很奇怪。但是,如果在培训后生成一个新的矩阵B:
B = numpy.random.rand(5, 4)/5
Targets = B*X
print n.predict(B)
print B*X
它可以很好地工作(大多数时候-有时它仍然会给出平均值(目标值)作为答案)。
注意:在我的示例中,我从使用100个特性切换到仅使用4个特性
此外,我不认为在数据集中的50个元素上运行5000次迭代会对您有任何好处。您通常应该尽可能多地使用培训数据-在这里,您可以随心所欲地使用,但您使用的示例比您的功能更少
这很有趣,我会再考虑一下:)我用你的网络做了一个更简单的例子——作为输入,我提供了两个数字,并期望它们的总和作为输出。它或多或少工作正常。由于一些原因,神经网络无法对森林火灾数据进行训练 首先,numpy.tanh()sigmoid函数的行为不符合预期。代码应更改为:
self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)),numpy.tanh(self.z[-1])))) #tanh is a fancy sigmoid
matplotlib.pyplot.scatter(n.predict(nfeatures), targets)
致:
Second numpy和matplotlib玩得不好。numpy矩阵似乎是向后绘制的。这可以通过使用matrix.tolist()来解决。代码更改自:
self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)),numpy.tanh(self.z[-1])))) #tanh is a fancy sigmoid
matplotlib.pyplot.scatter(n.predict(nfeatures), targets)
致:
最后,节点的数量应该大约是示例大小的10%。与其使用10个节点,不如使用50个节点
下面发布了工作神经网络代码,其中包含一个新函数autoparam,该函数试图找到最佳学习速率和正则化常数。您可以在此处看到森林火灾成本与迭代以及数据与预测的关系图:
谢谢你的阅读!我希望我的神经网络能帮助人们
import numpy
class NN:
def __init__(self, sl):
#sl = number of units (not counting bias unit) in layer l
self.sl = sl
self.layers = len(sl)
#Create weights
self.weights = []
for idx in range(1, self.layers):
self.weights.append(numpy.matrix(numpy.random.rand(self.sl[idx-1]+1, self.sl[idx]))/5)
self.cost = []
def update(self, input):
if input.shape[1] != self.sl[0]:
raise ValueError, 'The first layer must have a node for every feature'
self.z = []
self.a = []
#Input activations. Expected inputs as numpy matrix (Examples x Featrues)
self.a.append(numpy.hstack((numpy.ones((input.shape[0], 1)), input)))#Set inputs ai + bias unit
#Hidden activations
for weight in self.weights:
self.z.append(self.a[-1]*weight)
self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), 1/(1+numpy.exp(-self.z[-1]))))) #sigmoid
#Output activation
self.a[-1] = self.z[-1] #Not logistic regression thus no sigmoid function
del self.z[-1]
def backPropagate(self, targets, lamda):
m = float(targets.shape[0]) #m is number of examples
#Calculate cost
Cost = -1/m*sum(numpy.power(self.a[-1] - targets, 2))
for weight in self.weights:
Cost = Cost + lamda/(2*m)*numpy.power(weight[1:, :], 2).sum()
self.cost.append(abs(float(Cost)))
#Calculate error for each layer
delta = []
delta.append(self.a[-1] - targets)
for idx in range(1, self.layers-1): #No delta for the input layer because it is the input
weight = self.weights[-idx][1:, :] #Ignore bias unit
dsigmoid = numpy.multiply(self.a[-(idx+1)][:,1:], 1-self.a[-(idx+1)][:,1:]) #dsigmoid is a(l).*(1-a(l))
delta.append(numpy.multiply(delta[-1]*weight.T, dsigmoid)) #Ignore Regularization
Delta = []
for idx in range(self.layers-1):
Delta.append(self.a[idx].T*delta[-(idx+1)])
self.weight_gradient = []
for idx in range(len(Delta)):
self.weight_gradient.append(numpy.nan_to_num(1/m*Delta[idx] + numpy.vstack((numpy.zeros((1, self.weights[idx].shape[1])), lamda/m*self.weights[idx][1:, :]))))
def train(self, input, targets, alpha, lamda, iterations = 1000):
#alpha: learning rate
#lamda: regularization term
for i in range(iterations):
self.update(input)
self.backPropagate(targets, lamda)
self.weights = [self.weights[idx] - alpha*self.weight_gradient[idx] for idx in range(len(self.weights))]
def autoparam(self, data, alpha = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3], lamda = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]):
#data: numpy matrix with targets in last column
#alpha: learning rate
#lamda: regularization term
#Create training, cross validation, and test sets
while 1:
try:
numpy.seterr(invalid = 'raise')
numpy.random.shuffle(data) #Shuffle data
training_set = data[0:data.shape[0]/10*6, 0:-1]
self.ntraining_set = (training_set-training_set.mean(axis=0))/training_set.std(axis=0)
self.training_tgt = numpy.matrix(data[0:data.shape[0]/10*6, -1]).T
cv_set = data[data.shape[0]/10*6:data.shape[0]/10*8, 0:-1]
self.ncv_set = (cv_set-cv_set.mean(axis=0))/cv_set.std(axis=0)
self.cv_tgt = numpy.matrix(data[data.shape[0]/10*6:data.shape[0]/10*8, -1]).T
test_set = data[data.shape[0]/10*8:, 0:-1]
self.ntest_set = (test_set-test_set.mean(axis=0))/test_set.std(axis=0)
self.test_tgt = numpy.matrix(data[data.shape[0]/10*8:, -1]).T
break
except FloatingPointError:
pass
numpy.seterr(invalid = 'warn')
cost = 999999
for i in alpha:
for j in lamda:
self.__init__(self.sl)
self.train(self.ntraining_set, self.training_tgt, i, j, 2000)
current_cost = 1/float(cv_set.shape[0])*sum(numpy.square(self.predict(self.ncv_set) - self.cv_tgt)).tolist()[0][0]
print current_cost
if current_cost < cost:
cost = current_cost
self.learning_rate = i
self.regularization = j
self.__init__(self.sl)
def predict(self, input):
self.update(input)
return self.a[-1]
我认为你的偏差应该从加权输入中减去(或设置为-1)。从我在你的代码中看到的,神经元添加了所有的输入,包括偏差(设置为+1。我在一些简单的示例上运行了您的代码,它似乎运行正常。您认为它为什么不起作用?在您的示例中,成本在最初的100次迭代中下降得非常快,然后保持不变,这就是我所说的预期行为。感谢您运行我的代码。您能检查一下您的tr吗n.predict(A)的输出是什么?对我来说,无论输入特征如何,所有预测值都是相同的(通常接近平均值(目标值)。例如,目标值=[4,5,6,7]n.predict(特征值)=[5.5,5.5,5.5,5.5]。将熊猫用于时间序列数据!您好。我将迭代次数减少到默认的1000次,并获得了更大的结构化数据集(518个示例)。这些不是统计数据,需要添加和学习偏差。您能解释一下为什么您认为这会有帮助吗?
import numpy
class NN:
def __init__(self, sl):
#sl = number of units (not counting bias unit) in layer l
self.sl = sl
self.layers = len(sl)
#Create weights
self.weights = []
for idx in range(1, self.layers):
self.weights.append(numpy.matrix(numpy.random.rand(self.sl[idx-1]+1, self.sl[idx]))/5)
self.cost = []
def update(self, input):
if input.shape[1] != self.sl[0]:
raise ValueError, 'The first layer must have a node for every feature'
self.z = []
self.a = []
#Input activations. Expected inputs as numpy matrix (Examples x Featrues)
self.a.append(numpy.hstack((numpy.ones((input.shape[0], 1)), input)))#Set inputs ai + bias unit
#Hidden activations
for weight in self.weights:
self.z.append(self.a[-1]*weight)
self.a.append(numpy.hstack((numpy.ones((self.z[-1].shape[0], 1)), 1/(1+numpy.exp(-self.z[-1]))))) #sigmoid
#Output activation
self.a[-1] = self.z[-1] #Not logistic regression thus no sigmoid function
del self.z[-1]
def backPropagate(self, targets, lamda):
m = float(targets.shape[0]) #m is number of examples
#Calculate cost
Cost = -1/m*sum(numpy.power(self.a[-1] - targets, 2))
for weight in self.weights:
Cost = Cost + lamda/(2*m)*numpy.power(weight[1:, :], 2).sum()
self.cost.append(abs(float(Cost)))
#Calculate error for each layer
delta = []
delta.append(self.a[-1] - targets)
for idx in range(1, self.layers-1): #No delta for the input layer because it is the input
weight = self.weights[-idx][1:, :] #Ignore bias unit
dsigmoid = numpy.multiply(self.a[-(idx+1)][:,1:], 1-self.a[-(idx+1)][:,1:]) #dsigmoid is a(l).*(1-a(l))
delta.append(numpy.multiply(delta[-1]*weight.T, dsigmoid)) #Ignore Regularization
Delta = []
for idx in range(self.layers-1):
Delta.append(self.a[idx].T*delta[-(idx+1)])
self.weight_gradient = []
for idx in range(len(Delta)):
self.weight_gradient.append(numpy.nan_to_num(1/m*Delta[idx] + numpy.vstack((numpy.zeros((1, self.weights[idx].shape[1])), lamda/m*self.weights[idx][1:, :]))))
def train(self, input, targets, alpha, lamda, iterations = 1000):
#alpha: learning rate
#lamda: regularization term
for i in range(iterations):
self.update(input)
self.backPropagate(targets, lamda)
self.weights = [self.weights[idx] - alpha*self.weight_gradient[idx] for idx in range(len(self.weights))]
def autoparam(self, data, alpha = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3], lamda = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]):
#data: numpy matrix with targets in last column
#alpha: learning rate
#lamda: regularization term
#Create training, cross validation, and test sets
while 1:
try:
numpy.seterr(invalid = 'raise')
numpy.random.shuffle(data) #Shuffle data
training_set = data[0:data.shape[0]/10*6, 0:-1]
self.ntraining_set = (training_set-training_set.mean(axis=0))/training_set.std(axis=0)
self.training_tgt = numpy.matrix(data[0:data.shape[0]/10*6, -1]).T
cv_set = data[data.shape[0]/10*6:data.shape[0]/10*8, 0:-1]
self.ncv_set = (cv_set-cv_set.mean(axis=0))/cv_set.std(axis=0)
self.cv_tgt = numpy.matrix(data[data.shape[0]/10*6:data.shape[0]/10*8, -1]).T
test_set = data[data.shape[0]/10*8:, 0:-1]
self.ntest_set = (test_set-test_set.mean(axis=0))/test_set.std(axis=0)
self.test_tgt = numpy.matrix(data[data.shape[0]/10*8:, -1]).T
break
except FloatingPointError:
pass
numpy.seterr(invalid = 'warn')
cost = 999999
for i in alpha:
for j in lamda:
self.__init__(self.sl)
self.train(self.ntraining_set, self.training_tgt, i, j, 2000)
current_cost = 1/float(cv_set.shape[0])*sum(numpy.square(self.predict(self.ncv_set) - self.cv_tgt)).tolist()[0][0]
print current_cost
if current_cost < cost:
cost = current_cost
self.learning_rate = i
self.regularization = j
self.__init__(self.sl)
def predict(self, input):
self.update(input)
return self.a[-1]
data = numpy.loadtxt(open('FF-data.csv', 'rb'), delimiter = ',', skiprows = 1)#Load
numpy.random.shuffle(data)
features = data[:,0:11]
nfeatures = (features-features.mean(axis=0))/features.std(axis=0)
targets = numpy.matrix(data[:, 12]).T
n = NN([11, 50, 1])
n.train(nfeatures, targets, 0.07, 0.0, 2000)
import matplotlib.pyplot
matplotlib.pyplot.subplot(221)
matplotlib.pyplot.plot(n.cost)
matplotlib.pyplot.title('Cost vs. Iteration')
matplotlib.pyplot.subplot(222)
matplotlib.pyplot.scatter(n.predict(nfeatures).tolist(), targets.tolist())
matplotlib.pyplot.plot(targets.tolist(), targets.tolist(), c = 'r')
matplotlib.pyplot.title('Data vs. Predicted')
matplotlib.pyplot.savefig('Report.png', format = 'png')
matplotlib.pyplot.close()