Python 完成第一个神经网络后,下一步是什么?

Python 完成第一个神经网络后,下一步是什么?,python,neural-network,mnist,Python,Neural Network,Mnist,我是youtube 3Blue1Brown频道的超级粉丝,他关于神经网络的系列节目真的让我对这个话题感到兴奋。 我决定从头开始用python创建自己的神经网络,深入研究数学。因此,在MNIST数据库的帮助下,我开始手写数字,并在两周后成功完成了这项任务。 从那以后,我一直在进一步开发我的代码,以便我可以在代码中整齐地调整神经元和隐藏层的数量。 我还尝试了不同的激活功能。 我得到的最好的准确率是95%左右,2个隐藏层,16个神经元,5分钟的训练 现在,我的问题相当模糊,但我现在正在寻找该领域的下一

我是youtube 3Blue1Brown频道的超级粉丝,他关于神经网络的系列节目真的让我对这个话题感到兴奋。 我决定从头开始用python创建自己的神经网络,深入研究数学。因此,在MNIST数据库的帮助下,我开始手写数字,并在两周后成功完成了这项任务。 从那以后,我一直在进一步开发我的代码,以便我可以在代码中整齐地调整神经元和隐藏层的数量。 我还尝试了不同的激活功能。 我得到的最好的准确率是95%左右,2个隐藏层,16个神经元,5分钟的训练

现在,我的问题相当模糊,但我现在正在寻找该领域的下一个挑战,你们有什么建议吗

我现在已经建立了框架,所以我希望有一个更大的数据集或其他一些新类型的问题,或者我应该对我现有的问题做更多的工作,以进一步提高输出的准确性

你们觉得怎么样

你的, 埃米尔

如果有人感兴趣,这里有代码

import pickle
import gzip
import numpy as np
import random
import time

import pickle
import gzip
import numpy as np
import random
import time

class mnistClass:
    def __init__(self, inputAmount=784, layers=2, layerSize=16, outputSize=10, loops=1, sampleSize=100):
        with gzip.open('mnist.pkl.gz', 'rb') as f:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        self.A, self.y = train_set
        self.V, self.v2 = valid_set
        self.dataSize = len(self.A)
        self.inputAmount = inputAmount
        self.layers = layers
        self.layerSize = layerSize
        self.outputSize = outputSize
        self.loops = loops
        self.sampleSize = sampleSize
        self.iterations = int(self.dataSize/self.sampleSize)
        self.clock = time.time()
        self.Weights = []
        self.Biases = []
        self.initializeArrays()
        self.initializeTraining()
        print("Accuracy: " + str(self.getAccuracy()) + "%")

    def initializeArrays(self):
        for i in range(self.layers):
            if self.layers - i > 2:     #Adding middle layers
                self.Weights.append(np.random.rand(self.layerSize, self.layerSize)-0.5) 
            if self.layers - i > 1: 
                self.Biases.append(np.random.rand(self.layerSize)-0.5)
        if self.layers > 1:
            self.Weights.insert(0, np.random.rand(self.layerSize, self.inputAmount)-0.5)
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.layerSize)-0.5)
        else:
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.inputAmount)-0.5)
        self.Biases.insert(len(self.Biases), np.random.rand(self.outputSize)-0.5)

    def sigmoid(self, x, shiftType):
        if shiftType == 0:
            result = 1/(1+np.exp(-x))
        elif shiftType == 1:
            result = 2 * (1/(1+np.exp(-x))) - 1
        return result

    def sigmoidPrime(self, x, shiftType):
        if shiftType == 0:
            result = self.sigmoid(x, 0) - self.sigmoid(x, 0)**2
        elif shiftType == 1:
            result = 2*np.exp(-x)/(1+np.exp(-x))**2
        return result

    def Rdependance(self, Z, layer1, layer2, multi=False):  #How R depends on a preceeding R
        multi = layer1-layer2 > 1
        if not multi:
            if layer1 == self.layers-1:
                shiftType = 0
            else:
                shiftType = 1           
            R1_R2_differential = np.multiply(self.Weights[layer1], self.sigmoidPrime(Z[layer1]+self.Biases[layer1], shiftType)[:, np.newaxis])
            result = R1_R2_differential
        else:
            chainRule = []
            for i in reversed(range(layer2, layer1)):
                chainRule.append(self.Rdependance(Z, i+1, i))
            result = chainRule[0]
            for i in range(len(chainRule)-1):
                result = np.dot(result, chainRule[i+1])
        return result

    def RWdependance(self, R, Z, dataCaseNo, layer):   #How R depends on connecting Weights
        if layer == self.layers-1:
            shiftType = 0
        else:
            shiftType = 1
        R_W_differential = self.Weights[layer]/self.Weights[layer]
        mergeW_Z = np.multiply(R_W_differential, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis])
        if layer == 0:
            R_W_differential = np.multiply(mergeW_Z.T, self.A[dataCaseNo][:, np.newaxis]).T
        else:
            R_W_differential = np.multiply(mergeW_Z.T, R[layer-1][:, np.newaxis]).T
        return R_W_differential

    def RBdependance(self, Z, layer):   #How R depends on internal Biases
        if layer == self.layers-1:
            shiftType = 0
        else:
            shiftType = 1
        R_B_differential = np.multiply(self.Rdependance(Z, self.layers-1, layer).T, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis]).T
        return R_B_differential

    def integralWeightCost(self, R, Z, dataCaseNo, quadDifferential, layer): # Cost of system for weights
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
        else:
            nodes = self.Rdependance(Z, self.layers-1, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        cost_differential = np.sum(cost_differential, 0)
        result = np.multiply(self.RWdependance(R, Z, dataCaseNo, layer), cost_differential[:, np.newaxis])
        return result

    def integralBiasCost(self, Z, quadDifferential, layer): # Cost of system for biases
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
        else:
            nodes = self.RBdependance(Z, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        result = np.sum(cost_differential, 0)
        return result



    def initializeTraining(self):
        for loop in range(self.loops):
            for iteration in range(self.iterations):
                avg_cost = 0
                avg_deltaWeights = []
                avg_deltaBiases = []
                for i in range(len(self.Weights)):  #Creating zeros of weight arrays           
                    avg_deltaWeights.append(self.Weights[i]*0)
                for i in range(len(self.Biases)):                 
                    avg_deltaBiases.append(self.Biases[i]*0)
                for dataCaseNo in range(iteration*self.sampleSize, iteration*self.sampleSize + self.sampleSize):
                    if self.layers == 1:
                        shiftType = 0
                    else:
                        shiftType = 1
                    Y1 = np.zeros(self.outputSize)
                    Y1[self.y[dataCaseNo]] = 1
                    Z = []
                    Z.append(np.dot(self.Weights[0], self.A[dataCaseNo]))
                    R = []
                    R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
                    for i in range(1, self.layers):
                        if i == self.layers-1:
                            shiftType = 0
                        else:
                            shiftType = 1
                        Z.append(np.dot(self.Weights[i], R[i-1]))
                        R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

                    C = np.sum((R[-1] - Y1)**2)
                    avg_cost += C
                    quadDifferential = 2 * (R[-1]-Y1)

                    for i in range(self.layers):
                        avg_deltaWeights[i] += self.integralWeightCost(R, Z, dataCaseNo, quadDifferential, i)
                        avg_deltaBiases[i] += self.integralBiasCost(Z, quadDifferential, i)

                avg_cost = avg_cost/self.sampleSize
                for i in range(self.layers):
                    self.Weights[i] = self.Weights[i] - avg_deltaWeights[i]/self.sampleSize
                    self.Biases[i] = self.Biases[i] - avg_deltaBiases[i]/self.sampleSize
                print("Average cost: " + str(round(avg_cost, 4)))
            print("\n" + "*"*25 + " " + str(loop+1) +" " + "*"*25 + "\n")
        executionEndTime = round((time.time() - self.clock), 2)
        print("Completed " + str(self.loops) + " rounds of " + str(self.sampleSize*self.iterations) + " samples (sampleSize: " + str(self.sampleSize) + "), " + " in " + str(executionEndTime) + " seconds..")
        print("Layers: " + str(self.layers))
        print("Middle layer nodes: " + str(self.layerSize))
        print("Input amount: " + str(self.inputAmount))
        amountVariables = 0
        for i in range(self.layers):
            amountVariables += self.Weights[i].size
            amountVariables += self.Biases[i].size
        print("Variables: " + str(amountVariables))
        print("Output size: " + str(self.outputSize))
        time.sleep(2)

    def getAccuracy(self):
        runs = 10000
        correct = 0
        print("Testing validation set accuracy over " + str(runs) + " samples...\n")
        for i in range(runs):
            if self.layers == 1:
                shiftType = 0
            else:
                shiftType = 1
            ran = i
            Y1 = np.zeros(self.outputSize)
            Y1[self.v2[ran]] = 1
            Z = []
            Z.append(np.dot(self.Weights[0], self.V[ran]))
            R = []
            R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
            for i in range(1, self.layers):
                if i == self.layers-1:
                    shiftType = 0
                else:
                    shiftType = 1
                Z.append(np.dot(self.Weights[i], R[i-1]))
                R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

            result = np.where(R[-1] == np.amax(R[-1]))
            maxNum = result[0][0]
            if int(self.v2[ran]) == int(maxNum):
                correct += 1

        accuracy = correct*100/runs
        return accuracy              


instance = mnistClass(784, 3, 16, 10, 2, 100)
#(input, layers, layer size, output, loops, sample subsize)

#input          - amount of nodes in input data
#layers         - amount of layers including last output layer but not first input layer
#layer size     - amount of nodes in hidden layers
#output         - amount of nodes in output layer
#loops          - how many times to train through the entire data set
#sample subsize - what quantity of data samples to average the gradient on

我很高兴听到新面孔加入ML领域, 这真是一个了不起的成就,你说你已经取得了这样的成就,首先向你致敬。 至于你的问题,我建议你退后一步,了解数据探索和特征提取的概念,以及为什么这些很重要,以及我建议你如何做到这一点,通过探索一些关于机器学习的kaggle教程,尝试从那里对数据集进行一些基本分类,如泰坦尼克号数据集等

进入机器学习领域


祝你好运

谢谢,我会查一查的。我以前真的不知道ML和DL之间有什么区别DL是ML的一个子主题,当然,根据我的经验,kaggle教程很棒!顺便说一句,如果你能提出我的答案或者为了我的名誉接受我的答案,我将不胜感激;