Python softmax函数导数的计算

Python softmax函数导数的计算,python,python-3.x,machine-learning,math,neural-network,Python,Python 3.x,Machine Learning,Math,Neural Network,我试图理解神经网络的数学。一直在计算softmax函数的导数。我发现这样的解释:。下面是cartpole示例的代码: import numpy as np import gym env = gym.make('CartPole-v0') env = env.unwrapped env.seed(1) max_episodes=1000 state_size = 4 action_size = env.action_space.n def relu(x,deriv): if der

我试图理解神经网络的数学。一直在计算softmax函数的导数。我发现这样的解释:。下面是cartpole示例的代码:

import numpy as np
import gym
env = gym.make('CartPole-v0')
env = env.unwrapped
env.seed(1)

max_episodes=1000
state_size = 4
action_size = env.action_space.n



def relu(x,deriv):
    if deriv:
        x[x<=0] = 0
        x[x>0] = 1
        return x
    else:
        return np.maximum(0,x)



def softmax(X, train):
    if train:
        max_prob = np.max(X, axis=1).reshape((-1, 1))
        X -= max_prob
        np.exp(X, X)
        sum_prob = np.sum(X, axis=0).reshape((-1, 1))
        X /= sum_prob[0]
    else:
        max_prob = np.max(X, axis=0).reshape((-1, 1))
        X -= max_prob[0]
        np.exp(X, X)
        sum_prob = np.sum(X, axis=0).reshape((-1, 1))
        X /= sum_prob[0]
    return X

def softmax_grad(s):
    # Reshape the 1-d softmax to 2-d so that np.dot will do the matrix multiplication
    s = s.reshape(-1,1)
    return np.diagflat(s) - np.dot(s, s.T) 

np.random.seed(1)

syn0 = 2*np.random.random((4,8)) - 1
syn1 = 2*np.random.random((8,2)) - 1



def nn(x,y,train):
    global syn1,syn0
    l0 = x
    l1 = relu(np.dot(l0,syn0),deriv=False) 
    l2 = softmax(np.dot(l1,syn1),train=False)
    if train:
        y=np.array(y)
        l2_error = y - l2  
        l2_error = l2_error.reshape(20,1)#<--- shape change for multiplication
        l2_delta = l2_error*softmax_grad(l2)#<--- ERROR
        l1_error = l2_delta.dot(syn1.T)
        l1_delta = l1_error * relu(l1,deriv=True)
        syn1 += l1.T.dot(l2_delta)
        syn0 += l0.T.dot(l1_delta)
    else:
        return l2

allRewards = []
total_rewards = 0
maximumRewardRecorded = 0
episode = 0
episode_states, episode_actions, episode_rewards = [],[],[]

for episode in range(max_episodes):
        episode_rewards_sum = 0
        state = env.reset()
        env.render()
        while True:
            action_probability_distribution = nn(x=state,y=_,train=False)
            action_probability_distribution=action_probability_distribution/sum(action_probability_distribution)
            action = np.random.choice(range(len(action_probability_distribution)), p=action_probability_distribution.ravel())
            new_state, reward, done, info = env.step(action)
            episode_states.append(state)
            action_ = np.zeros(action_size)
            action_[action] = 1
            episode_actions.append(action_)
            episode_rewards.append(reward)

            if done:
                episode_rewards_sum = np.sum(episode_rewards)
                allRewards.append(episode_rewards_sum)
                total_rewards = np.sum(allRewards)
                mean_reward = np.divide(total_rewards, episode+1)
                maximumRewardRecorded = np.amax(allRewards)

                print("==========================================")
                print("Episode: ", episode)
                print("Reward: ", episode_rewards_sum)
                print("Mean Reward", mean_reward)
                print("Max reward so far: ", maximumRewardRecorded)

                episode_states = np.array(episode_states)   
                nn(x=episode_states,y=episode_actions,train=True)               
                episode_states, episode_actions, episode_rewards = [],[],[]

                break

            state = new_state
将numpy导入为np
进口健身房
env=健身房品牌(“CartPole-v0”)
env=env.unwrapped
环境种子(1)
最大事件数=1000
状态_大小=4
action\u size=env.action\u space.n
def relu(x,deriv):
如果deriv:
x[x0]=1
返回x
其他:
返回np.最大值(0,x)
def softmax(X系列):
如果列车:
max_prob=np.max(X,轴=1)。重塑(-1,1))
X-=最大概率
np.exp(X,X)
sum_prob=np.sum(X,轴=0)。重塑(-1,1))
X/=sum_prob[0]
其他:
max_prob=np.max(X,轴=0)。重塑(-1,1))
X-=最大概率[0]
np.exp(X,X)
sum_prob=np.sum(X,轴=0)。重塑(-1,1))
X/=sum_prob[0]
返回X
def softmax_梯度:
#将1-d softmax重塑为2-d,以便np.dot执行矩阵乘法
s=s。重塑(-1,1)
返回np.DIAGPLAT(s)-np.dot(s,s.T)
np.随机种子(1)
syn0=2*np.random.random((4,8))-1
syn1=2*np.random.random((8,2))-1
def nn(x、y、序列):
全局syn1,syn0
l0=x
l1=relu(np.dot(l0,syn0),deriv=False)
l2=软最大值(np.dot(l1,syn1),序列=假)
如果列车:
y=np.数组(y)
l2_错误=y-l2
l2_error=l2_error.Reformate(20,1)#发现了这个问题,它帮助了我。发现了这个问题,它帮助了我。