C++ 让一个简单的神经网络在C+中从头开始工作+;
我一直在尝试让一个简单的双异或神经网络工作,但我在用反向传播来训练一个非常简单的前馈神经网络时遇到了问题。C++ 让一个简单的神经网络在C+中从头开始工作+;,c++,neural-network,C++,Neural Network,我一直在尝试让一个简单的双异或神经网络工作,但我在用反向传播来训练一个非常简单的前馈神经网络时遇到了问题。 我一直在尝试按照指南来获得神经网络,但充其量只是制作了学习速度极慢的程序 据我所知,神经网络: 通过从该神经元的所有输入之和中提取sigmoid函数的结果来计算值。然后使用每个神经元的权重将其馈送到下一层 在运行结束时,为输出神经元计算误差,然后使用权重,通过简单地乘以值,然后在每个神经元处求和,将误差反向传播 当计算所有误差时,通过δ=连接权重*S形导数调整权重(神经元权重值将变为)*连
我一直在尝试按照指南来获得神经网络,但充其量只是制作了学习速度极慢的程序 据我所知,神经网络:
是我目前正在努力工作的一堆代码。我还有很多其他尝试,但我尝试使用的主要反向传播函数是Net中的第293行。cpp查看一下,它应该可以让您开始使用。我觉得您正在与backprop进行斗争,您上面描述的内容与我对其工作的理解不太相符,你的描述有点模棱两可 将要反向传播的输出误差项计算为预测值与实际值之间的差值乘以传递函数的导数。就是那个错误值,然后向后传播。sigmoid的导数可以简单地计算为y(1-y),其中y是输出值。网络上有很多这方面的证据 对于内层上的节点,将输出误差乘以两个节点之间的权重,并将所有这些乘积相加,作为从外层传播到内层节点的总误差。然后,将与内部节点相关联的误差乘以应用于原始输出值的传递函数的导数。下面是一些伪代码:
total_error = sum(output_errors * weights)
node_error = sigmoid_derivative(node_output) * total_error
然后,该错误以相同的方式通过输入层权重向后传播
使用这些误差项和节点的输出值调整权重
weight_change = outer_error * inner_output_value
学习率很重要,因为输入数据中的每个模式/行/观察值都会计算权重变化。您希望缓和每一行的权重变化,这样任何一行都不会过度改变权重,并且所有行都会对权重产生影响。学习率会给你这个结果,你可以通过乘以它来调整权重的变化
weight_change = outer_error * inner_output_value * learning_rate
在不同时期(迭代)之间记住这些更改并将其中的一小部分添加到更改中也是正常的。所加的分数称为动量,它可以使你在误差面上没有太多变化的区域加速,在有细节的区域减慢速度
weight_change = (outer_error*inner_output_value*learning_rate) + (last_change*momentum)
有一些算法可以在训练过程中调整学习速度和动量
然后通过添加更改来更新权重
new_weight = old_weight + weight_change
我看了一下你们的代码,但并没有更正它,而是说我认为最好为你们描述一下back-prop,这样你们就可以自己编写代码了。如果你理解它,你也可以根据自己的情况调整它
祝你好运。这个开源代码怎么样。它定义了一个简单的1隐藏层网络(2输入,2隐藏,1输出),并解决了XOR问题: 我写了一个简单的“教程”,你可以在下面查看 这是感知器模型的简单实现。你可以把感知器想象成只有一个神经元的神经网络。有一个诅咒代码,你可以测试出我写在C++中。我一步一步地检查代码,这样您就不会有任何问题 虽然感知器并不是真正的“神经网络”,但如果你想开始使用它,它确实很有帮助,可能会帮助你更好地理解完整的神经网络是如何工作的 希望有帮助! 干杯^_^
在这个例子中,我将通过C++中的感知器模型的实现,以便您可以更好地了解它是如何工作的。 首先,写下一个简单的算法是一个很好的实践
算法:class perceptron
{
public:
perceptron(float eta,int epochs);
float netInput(vector<float> X);
int predict(vector<float> X);
void fit(vector< vector<float> > X, vector<float> y);
private:
};
- 我们需要一个函数来计算网络的输入(例如,将输入乘以权重)
- 一个阶跃函数,我们可以得到1或-1的预测
- 以及一个函数,用于查找权重的理想值
class perceptron
{
public:
private:
};
现在,让我们添加所需的函数
class perceptron
{
public:
perceptron(float eta,int epochs);
float netInput(vector<float> X);
int predict(vector<float> X);
void fit(vector< vector<float> > X, vector<float> y);
private:
};
正如你所看到的,我们将要做的是非常简单的事情。让我们继续讨论另一个简单的函数。预测函数(int predict(vector X);)。记住,allpredict函数的作用是获取净输入,如果netInput大于0,则返回值1,否则返回值为-1
int perceptron::predict(vector<float> X)
{
return netInput(X) > 0 ? 1 : -1; //Step Function
}
int感知器::预测(向量X)
{
返回netInput(X)>0?1:-1;//步进函数
}
注意,我们使用了一个内联if语句来简化我们的生活。以下是内联if语句的工作原理:
int perceptron::predict(vector<float> X)
{
return netInput(X) > 0 ? 1 : -1; //Step Function
}
float perceptron::netInput(vector<float> X)
{
// Sum(Vector of weights * Input vector) + bias
float probabilities = m_w[0]; // In this example I am adding the perceptron first
for (int i = 0; i < X.size(); i++)
{
probabilities += X[i] * m_w[i + 1]; // Notice that for the weights I am counting
// from the 2nd element since w0 is the bias and I already added it first.
}
return probabilities;
}
void perceptron::fit(vector< vector<float> > X, vector<float> y)
{
for (int i = 0; i < X[0].size() + 1; i++) // X[0].size() + 1 -> I am using +1 to add the bias term
{
m_w.push_back(0); // Setting each weight to 0 and making the size of the vector
// The same as the number of features (X[0].size()) + 1 for the bias term
}
for (int i = 0; i < m_epochs; i++) // Iterating through each epoch
{
for (int j = 0; j < X.size(); j++) // Iterating though each vector in our training Matrix
{
float update = m_eta * (y[j] - predict(X[j])); //we calculate the change for the weights
for (int w = 1; w < m_w.size(); w++){ m_w[w] += update * X[j][w - 1]; } // we update each weight by the update * the training sample
m_w[0] = update; // We update the Bias term and setting it equal to the update
}
}
}
class perceptron
{
public:
perceptron(float eta,int epochs);
float netInput(vector<float> X);
int predict(vector<float> X);
void fit(vector< vector<float> > X, vector<float> y);
void printErrors();
void exportWeights(string filename);
void importWeights(string filename);
void printWeights();
private:
float m_eta;
int m_epochs;
vector < float > m_w;
vector < float > m_errors;
};
perceptron::perceptron(float eta, int epochs)
{
m_epochs = epochs;
m_eta = eta;
}
void perceptron::fit(vector< vector<float> > X, vector<float> y)
{
for (int i = 0; i < X[0].size() + 1; i++) // X[0].size() + 1 -> I am using +1 to add the bias term
{
m_w.push_back(0);
}
for (int i = 0; i < m_epochs; i++)
{
int errors = 0;
for (int j = 0; j < X.size(); j++)
{
float update = m_eta * (y[j] - predict(X[j]));
for (int w = 1; w < m_w.size(); w++){ m_w[w] += update * X[j][w - 1]; }
m_w[0] = update;
errors += update != 0 ? 1 : 0;
}
m_errors.push_back(errors);
}
}
float perceptron::netInput(vector<float> X)
{
// Sum(Vector of weights * Input vector) + bias
float probabilities = m_w[0];
for (int i = 0; i < X.size(); i++)
{
probabilities += X[i] * m_w[i + 1];
}
return probabilities;
}
int perceptron::predict(vector<float> X)
{
return netInput(X) > 0 ? 1 : -1; //Step Function
}
void perceptron::printErrors()
{
printVector(m_errors);
}
void perceptron::exportWeights(string filename)
{
ofstream outFile;
outFile.open(filename);
for (int i = 0; i < m_w.size(); i++)
{
outFile << m_w[i] << endl;
}
outFile.close();
}
void perceptron::importWeights(string filename)
{
ifstream inFile;
inFile.open(filename);
for (int i = 0; i < m_w.size(); i++)
{
inFile >> m_w[i];
}
}
void perceptron::printWeights()
{
cout << "weights: ";
for (int i = 0; i < m_w.size(); i++)
{
cout << m_w[i] << " ";
}
cout << endl;
}
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>
#include <math.h>
#include "MachineLearning.h"
using namespace std;
using namespace MachineLearning;
vector< vector<float> > getIrisX();
vector<float> getIrisy();
int main()
{
vector< vector<float> > X = getIrisX();
vector<float> y = getIrisy();
vector<float> test1;
test1.push_back(5.0);
test1.push_back(3.3);
test1.push_back(1.4);
test1.push_back(0.2);
vector<float> test2;
test2.push_back(6.0);
test2.push_back(2.2);
test2.push_back(5.0);
test2.push_back(1.5);
//printVector(X);
//for (int i = 0; i < y.size(); i++){ cout << y[i] << " "; }cout << endl;
perceptron clf(0.1, 14);
clf.fit(X, y);
clf.printErrors();
cout << "Now Predicting: 5.0,3.3,1.4,0.2(CorrectClass=-1,Iris-setosa) -> " << clf.predict(test1) << endl;
cout << "Now Predicting: 6.0,2.2,5.0,1.5(CorrectClass=1,Iris-virginica) -> " << clf.predict(test2) << endl;
system("PAUSE");
return 0;
}
vector<float> getIrisy()
{
vector<float> y;
ifstream inFile;
inFile.open("y.data");
string sampleClass;
for (int i = 0; i < 100; i++)
{
inFile >> sampleClass;
if (sampleClass == "Iris-setosa")
{
y.push_back(-1);
}
else
{
y.push_back(1);
}
}
return y;
}
vector< vector<float> > getIrisX()
{
ifstream af;
ifstream bf;
ifstream cf;
ifstream df;
af.open("a.data");
bf.open("b.data");
cf.open("c.data");
df.open("d.data");
vector< vector<float> > X;
for (int i = 0; i < 100; i++)
{
char scrap;
int scrapN;
af >> scrapN;
bf >> scrapN;
cf >> scrapN;
df >> scrapN;
af >> scrap;
bf >> scrap;
cf >> scrap;
df >> scrap;
float a, b, c, d;
af >> a;
bf >> b;
cf >> c;
df >> d;
X.push_back(vector < float > {a, b, c, d});
}
af.close();
bf.close();
cf.close();
df.close();
return X;
}
//Had a lot of trouble with shuffle
#include <iostream>
#include<vector>
#include <list>
#include <cstdlib>
#include <math.h>
#define PI 3.141592653589793238463
#define N
#define epsilon 0.1
#define epoch 2000
using namespace std;
// Just for GNU Plot issues
extern "C" FILE *popen(const char *command, const char *mode);
// Defining activation functions
//double sigmoid(double x) { return 1.0f / (1.0f + exp(-x)); }
//double dsigmoid(double x) { return x * (1.0f - x); }
double tanh(double x) { return (exp(x)-exp(-x))/(exp(x)+exp(-x)) ;}
double dtanh(double x) {return 1.0f - x*x ;}
double lin(double x) { return x;}
double dlin(double x) { return 1.0f;}
double init_weight() { return (2*rand()/RAND_MAX -1); }
double MAXX = -9999999999999999; //maximum value of input example
// Network Configuration
static const int numInputs = 1;
static const int numHiddenNodes = 7;
static const int numOutputs = 1;
// Learning Rate
const double lr = 0.05f;
double hiddenLayer[numHiddenNodes];
double outputLayer[numOutputs];
double hiddenLayerBias[numHiddenNodes];
double outputLayerBias[numOutputs];
double hiddenWeights[numInputs][numHiddenNodes];
double outputWeights[numHiddenNodes][numOutputs];
static const int numTrainingSets = 50;
double training_inputs[numTrainingSets][numInputs];
double training_outputs[numTrainingSets][numOutputs];
// Shuffling the data with each epoch
void shuffle(int *array, size_t n)
{
if (n > 1) //If no. of training examples > 1
{
size_t i;
for (i = 0; i < n - 1; i++)
{
size_t j = i + rand() / (RAND_MAX / (n - i) + 1);
int t = array[j];
array[j] = array[i];
array[i] = t;
}
}
}
// Forward Propagation. Only used after training is done.
void predict(double test_sample[])
{
for (int j=0; j<numHiddenNodes; j++)
{
double activation=hiddenLayerBias[j];
for (int k=0; k<numInputs; k++)
{
activation+=test_sample[k]*hiddenWeights[k][j];
}
hiddenLayer[j] = tanh(activation);
}
for (int j=0; j<numOutputs; j++)
{
double activation=outputLayerBias[j];
for (int k=0; k<numHiddenNodes; k++)
{
activation+=hiddenLayer[k]*outputWeights[k][j];
}
outputLayer[j] = lin(activation);
}
//std::cout<<outputLayer[0]<<"\n";
//return outputLayer[0];
//std::cout << "Input:" << training_inputs[i][0] << " " << training_inputs[i][1] << " Output:" << outputLayer[0] << " Expected Output: " << training_outputs[i][0] << "\n";
}
int main(int argc, const char * argv[])
{
///TRAINING DATA GENERATION
for (int i = 0; i < numTrainingSets; i++)
{
double p = (2*PI*(double)i/numTrainingSets);
training_inputs[i][0] = (p);
training_outputs[i][0] = sin(p);
///FINDING NORMALIZING FACTOR
for(int m=0; m<numInputs; ++m)
if(MAXX < training_inputs[i][m])
MAXX = training_inputs[i][m];
for(int m=0; m<numOutputs; ++m)
if(MAXX < training_outputs[i][m])
MAXX = training_outputs[i][m];
}
///NORMALIZING
for (int i = 0; i < numTrainingSets; i++)
{
for(int m=0; m<numInputs; ++m)
training_inputs[i][m] /= 1.0f*MAXX;
for(int m=0; m<numOutputs; ++m)
training_outputs[i][m] /= 1.0f*MAXX;
cout<<"In: "<<training_inputs[i][0]<<" out: "<<training_outputs[i][0]<<endl;
}
///WEIGHT & BIAS INITIALIZATION
for (int i=0; i<numInputs; i++) {
for (int j=0; j<numHiddenNodes; j++) {
hiddenWeights[i][j] = init_weight();
}
}
for (int i=0; i<numHiddenNodes; i++) {
hiddenLayerBias[i] = init_weight();
for (int j=0; j<numOutputs; j++) {
outputWeights[i][j] = init_weight();
}
}
for (int i=0; i<numOutputs; i++) {
//outputLayerBias[i] = init_weight();
outputLayerBias[i] = 0;
}
///FOR INDEX SHUFFLING
int trainingSetOrder[numTrainingSets];
for(int j=0; j<numInputs; ++j)
trainingSetOrder[j] = j;
///TRAINING
//std::cout<<"start train\n";
vector<double> performance, epo; ///STORE MSE, EPOCH
for (int n=0; n < epoch; n++)
{
double MSE = 0;
shuffle(trainingSetOrder,numTrainingSets);
std::cout<<"epoch :"<<n<<"\n";
for (int i=0; i<numTrainingSets; i++)
{
//int i = trainingSetOrder[x];
int x=i;
//std::cout<<"Training Set :"<<x<<"\n";
/// Forward pass
for (int j=0; j<numHiddenNodes; j++)
{
double activation=hiddenLayerBias[j];
//std::cout<<"Training Set :"<<x<<"\n";
for (int k=0; k<numInputs; k++) {
activation+=training_inputs[x][k]*hiddenWeights[k][j];
}
hiddenLayer[j] = tanh(activation);
}
for (int j=0; j<numOutputs; j++) {
double activation=outputLayerBias[j];
for (int k=0; k<numHiddenNodes; k++)
{
activation+=hiddenLayer[k]*outputWeights[k][j];
}
outputLayer[j] = lin(activation);
}
//std::cout << "Input:" << training_inputs[x][0] << " " << " Output:" << outputLayer[0] << " Expected Output: " << training_outputs[x][0] << "\n";
for(int k=0; k<numOutputs; ++k)
MSE += (1.0f/numOutputs)*pow( training_outputs[x][k] - outputLayer[k], 2);
/// Backprop
/// For V
double deltaOutput[numOutputs];
for (int j=0; j<numOutputs; j++) {
double errorOutput = (training_outputs[i][j]-outputLayer[j]);
deltaOutput[j] = errorOutput*dlin(outputLayer[j]);
}
/// For W
double deltaHidden[numHiddenNodes];
for (int j=0; j<numHiddenNodes; j++) {
double errorHidden = 0.0f;
for(int k=0; k<numOutputs; k++) {
errorHidden+=deltaOutput[k]*outputWeights[j][k];
}
deltaHidden[j] = errorHidden*dtanh(hiddenLayer[j]);
}
///Updation
/// For V and b
for (int j=0; j<numOutputs; j++) {
//b
outputLayerBias[j] += deltaOutput[j]*lr;
for (int k=0; k<numHiddenNodes; k++)
{
outputWeights[k][j]+= hiddenLayer[k]*deltaOutput[j]*lr;
}
}
/// For W and c
for (int j=0; j<numHiddenNodes; j++) {
//c
hiddenLayerBias[j] += deltaHidden[j]*lr;
//W
for(int k=0; k<numInputs; k++) {
hiddenWeights[k][j]+=training_inputs[i][k]*deltaHidden[j]*lr;
}
}
}
//Averaging the MSE
MSE /= 1.0f*numTrainingSets;
//cout<< " MSE: "<< MSE<<endl;
///Steps to PLOT PERFORMANCE PER EPOCH
performance.push_back(MSE*100);
epo.push_back(n);
}
// Print weights
std::cout << "Final Hidden Weights\n[ ";
for (int j=0; j<numHiddenNodes; j++) {
std::cout << "[ ";
for(int k=0; k<numInputs; k++) {
std::cout << hiddenWeights[k][j] << " ";
}
std::cout << "] ";
}
std::cout << "]\n";
std::cout << "Final Hidden Biases\n[ ";
for (int j=0; j<numHiddenNodes; j++) {
std::cout << hiddenLayerBias[j] << " ";
}
std::cout << "]\n";
std::cout << "Final Output Weights";
for (int j=0; j<numOutputs; j++) {
std::cout << "[ ";
for (int k=0; k<numHiddenNodes; k++) {
std::cout << outputWeights[k][j] << " ";
}
std::cout << "]\n";
}
std::cout << "Final Output Biases\n[ ";
for (int j=0; j<numOutputs; j++) {
std::cout << outputLayerBias[j] << " ";
}
std::cout << "]\n";
/* This part is just for plotting the results.
This requires installing GNU Plot. You can also comment it out.
*/
//Plot the results
vector<float> x;
vector<float> y1, y2;
//double test_input[1000][numInputs];
int numTestSets = numTrainingSets;
for (float i = 0; i < numTestSets; i=i+0.25)
{
double p = (2*PI*(double)i/numTestSets);
x.push_back(p);
y1.push_back(sin(p));
double test_input[1];
test_input[0] = p/MAXX;
predict(test_input);
y2.push_back(outputLayer[0]*MAXX);
}
FILE * gp = popen("gnuplot", "w");
fprintf(gp, "set terminal wxt size 600,400 \n");
fprintf(gp, "set grid \n");
fprintf(gp, "set title '%s' \n", "f(x) = x sin (x)");
fprintf(gp, "set style line 1 lt 3 pt 7 ps 0.1 lc rgb 'green' lw 1 \n");
fprintf(gp, "set style line 2 lt 3 pt 7 ps 0.1 lc rgb 'red' lw 1 \n");
fprintf(gp, "plot '-' w p ls 1, '-' w p ls 2 \n");
///Exact f(x) = sin(x) -> Green Graph
for (int k = 0; k < x.size(); k++) {
fprintf(gp, "%f %f \n", x[k], y1[k]);
}
fprintf(gp, "e\n");
///Neural Network Approximate f(x) = xsin(x) -> Red Graph
for (int k = 0; k < x.size(); k++) {
fprintf(gp, "%f %f \n", x[k], y2[k]);
}
fprintf(gp, "e\n");
fflush(gp);
///FILE POINTER FOR SECOND PLOT (PERFORMANCE GRAPH)
FILE * gp1 = popen("gnuplot", "w");
fprintf(gp1, "set terminal wxt size 600,400 \n");
fprintf(gp1, "set grid \n");
fprintf(gp1, "set title '%s' \n", "Performance");
fprintf(gp1, "set style line 1 lt 3 pt 7 ps 0.1 lc rgb 'green' lw 1 \n");
fprintf(gp1, "set style line 2 lt 3 pt 7 ps 0.1 lc rgb 'red' lw 1 \n");
fprintf(gp1, "plot '-' w p ls 1 \n");
for (int k = 0; k < epo.size(); k++) {
fprintf(gp1, "%f %f \n", epo[k], performance[k]);
}
fprintf(gp1, "e\n");
fflush(gp1);
system("pause");
//_pclose(gp);
return 0;
}