Python 完成第一个神经网络后,下一步是什么?

Python 完成第一个神经网络后,下一步是什么?,python,neural-network,mnist,Python,Neural Network,Mnist,我是youtube 3Blue1Brown频道的超级粉丝,他关于神经网络的系列节目真的让我对这个话题感到兴奋。 我决定从头开始用python创建自己的神经网络,深入研究数学。因此,在MNIST数据库的帮助下,我开始手写数字,并在两周后成功完成了这项任务。 从那以后,我一直在进一步开发我的代码,以便我可以在代码中整齐地调整神经元和隐藏层的数量。 我还尝试了不同的激活功能。 我得到的最好的准确率是95%左右,2个隐藏层,16个神经元,5分钟的训练 现在,我的问题相当模糊,但我现在正在寻找该领域的下一

import pickle
import gzip
import numpy as np
import random
import time

class mnistClass:
    def __init__(self, inputAmount=784, layers=2, layerSize=16, outputSize=10, loops=1, sampleSize=100):
        with'mnist.pkl.gz', 'rb') as f:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        self.A, self.y = train_set
        self.V, self.v2 = valid_set
        self.dataSize = len(self.A)
        self.inputAmount = inputAmount
        self.layers = layers
        self.layerSize = layerSize
        self.outputSize = outputSize
        self.loops = loops
        self.sampleSize = sampleSize
        self.iterations = int(self.dataSize/self.sampleSize)
        self.clock = time.time()
        self.Weights = []
        self.Biases = []
        print("Accuracy: " + str(self.getAccuracy()) + "%")

    def initializeArrays(self):
        for i in range(self.layers):
            if self.layers - i > 2:     #Adding middle layers
                self.Weights.append(np.random.rand(self.layerSize, self.layerSize)-0.5) 
            if self.layers - i > 1: 
        if self.layers > 1:
            self.Weights.insert(0, np.random.rand(self.layerSize, self.inputAmount)-0.5)
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.layerSize)-0.5)
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.inputAmount)-0.5)
        self.Biases.insert(len(self.Biases), np.random.rand(self.outputSize)-0.5)

    def sigmoid(self, x, shiftType):
        if shiftType == 0:
            result = 1/(1+np.exp(-x))
        elif shiftType == 1:
            result = 2 * (1/(1+np.exp(-x))) - 1
        return result

    def sigmoidPrime(self, x, shiftType):
        if shiftType == 0:
            result = self.sigmoid(x, 0) - self.sigmoid(x, 0)**2
        elif shiftType == 1:
            result = 2*np.exp(-x)/(1+np.exp(-x))**2
        return result

    def Rdependance(self, Z, layer1, layer2, multi=False):  #How R depends on a preceeding R
        multi = layer1-layer2 > 1
        if not multi:
            if layer1 == self.layers-1:
                shiftType = 0
                shiftType = 1           
            R1_R2_differential = np.multiply(self.Weights[layer1], self.sigmoidPrime(Z[layer1]+self.Biases[layer1], shiftType)[:, np.newaxis])
            result = R1_R2_differential
            chainRule = []
            for i in reversed(range(layer2, layer1)):
                chainRule.append(self.Rdependance(Z, i+1, i))
            result = chainRule[0]
            for i in range(len(chainRule)-1):
                result =, chainRule[i+1])
        return result

    def RWdependance(self, R, Z, dataCaseNo, layer):   #How R depends on connecting Weights
        if layer == self.layers-1:
            shiftType = 0
            shiftType = 1
        R_W_differential = self.Weights[layer]/self.Weights[layer]
        mergeW_Z = np.multiply(R_W_differential, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis])
        if layer == 0:
            R_W_differential = np.multiply(mergeW_Z.T, self.A[dataCaseNo][:, np.newaxis]).T
            R_W_differential = np.multiply(mergeW_Z.T, R[layer-1][:, np.newaxis]).T
        return R_W_differential

    def RBdependance(self, Z, layer):   #How R depends on internal Biases
        if layer == self.layers-1:
            shiftType = 0
            shiftType = 1
        R_B_differential = np.multiply(self.Rdependance(Z, self.layers-1, layer).T, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis]).T
        return R_B_differential

    def integralWeightCost(self, R, Z, dataCaseNo, quadDifferential, layer): # Cost of system for weights
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
            nodes = self.Rdependance(Z, self.layers-1, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        cost_differential = np.sum(cost_differential, 0)
        result = np.multiply(self.RWdependance(R, Z, dataCaseNo, layer), cost_differential[:, np.newaxis])
        return result

    def integralBiasCost(self, Z, quadDifferential, layer): # Cost of system for biases
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
            nodes = self.RBdependance(Z, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        result = np.sum(cost_differential, 0)
        return result

    def initializeTraining(self):
        for loop in range(self.loops):
            for iteration in range(self.iterations):
                avg_cost = 0
                avg_deltaWeights = []
                avg_deltaBiases = []
                for i in range(len(self.Weights)):  #Creating zeros of weight arrays           
                for i in range(len(self.Biases)):                 
                for dataCaseNo in range(iteration*self.sampleSize, iteration*self.sampleSize + self.sampleSize):
                    if self.layers == 1:
                        shiftType = 0
                        shiftType = 1
                    Y1 = np.zeros(self.outputSize)
                    Y1[self.y[dataCaseNo]] = 1
                    Z = []
                    Z.append([0], self.A[dataCaseNo]))
                    R = []
                    R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
                    for i in range(1, self.layers):
                        if i == self.layers-1:
                            shiftType = 0
                            shiftType = 1
                        Z.append([i], R[i-1]))
                        R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

                    C = np.sum((R[-1] - Y1)**2)
                    avg_cost += C
                    quadDifferential = 2 * (R[-1]-Y1)

                    for i in range(self.layers):
                        avg_deltaWeights[i] += self.integralWeightCost(R, Z, dataCaseNo, quadDifferential, i)
                        avg_deltaBiases[i] += self.integralBiasCost(Z, quadDifferential, i)

                avg_cost = avg_cost/self.sampleSize
                for i in range(self.layers):
                    self.Weights[i] = self.Weights[i] - avg_deltaWeights[i]/self.sampleSize
                    self.Biases[i] = self.Biases[i] - avg_deltaBiases[i]/self.sampleSize
                print("Average cost: " + str(round(avg_cost, 4)))
            print("\n" + "*"*25 + " " + str(loop+1) +" " + "*"*25 + "\n")
        executionEndTime = round((time.time() - self.clock), 2)
        print("Completed " + str(self.loops) + " rounds of " + str(self.sampleSize*self.iterations) + " samples (sampleSize: " + str(self.sampleSize) + "), " + " in " + str(executionEndTime) + " seconds..")
        print("Layers: " + str(self.layers))
        print("Middle layer nodes: " + str(self.layerSize))
        print("Input amount: " + str(self.inputAmount))
        amountVariables = 0
        for i in range(self.layers):
            amountVariables += self.Weights[i].size
            amountVariables += self.Biases[i].size
        print("Variables: " + str(amountVariables))
        print("Output size: " + str(self.outputSize))

    def getAccuracy(self):
        runs = 10000
        correct = 0
        print("Testing validation set accuracy over " + str(runs) + " samples...\n")
        for i in range(runs):
            if self.layers == 1:
                shiftType = 0
                shiftType = 1
            ran = i
            Y1 = np.zeros(self.outputSize)
            Y1[self.v2[ran]] = 1
            Z = []
            Z.append([0], self.V[ran]))
            R = []
            R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
            for i in range(1, self.layers):
                if i == self.layers-1:
                    shiftType = 0
                    shiftType = 1
                Z.append([i], R[i-1]))
                R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

            result = np.where(R[-1] == np.amax(R[-1]))
            maxNum = result[0][0]
            if int(self.v2[ran]) == int(maxNum):
                correct += 1

        accuracy = correct*100/runs
        return accuracy              

instance = mnistClass(784, 3, 16, 10, 2, 100)
#(input, layers, layer size, output, loops, sample subsize)

#input          - amount of nodes in input data
#layers         - amount of layers including last output layer but not first input layer
#layer size     - amount of nodes in hidden layers
#output         - amount of nodes in output layer
#loops          - how many times to train through the entire data set
#sample subsize - what quantity of data samples to average the gradient on

我很高兴听到新面孔加入ML领域, 这真是一个了不起的成就,你说你已经取得了这样的成就,首先向你致敬。 至于你的问题,我建议你退后一步,了解数据探索和特征提取的概念,以及为什么这些很重要,以及我建议你如何做到这一点,通过探索一些关于机器学习的kaggle教程,尝试从那里对数据集进行一些基本分类,如泰坦尼克号数据集等


