Python Keras:ArgMax预测
我试图通过强化学习训练一个物体从A点移动到B点。我正在创建模型并用Python进行测试,但在完成后将其转移到Netlogo。我发现了这个网站:我正试图利用在那里找到的部分代码。对我来说最大的区别是我输入到模型中的变量只是整数而不是数组Python Keras:ArgMax预测,python,tensorflow,keras,model,reinforcement-learning,Python,Tensorflow,Keras,Model,Reinforcement Learning,我试图通过强化学习训练一个物体从A点移动到B点。我正在创建模型并用Python进行测试,但在完成后将其转移到Netlogo。我发现了这个网站:我正试图利用在那里找到的部分代码。对我来说最大的区别是我输入到模型中的变量只是整数而不是数组 class DQN: def __init__(self): self.memory = deque(maxlen=2000) self.gamma = 0.85 self.epsilon = 1.0
class DQN:
def __init__(self):
self.memory = deque(maxlen=2000)
self.gamma = 0.85
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.005
self.tau = .125
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = Sequential()
model.add(Dense(24, input_dim=1, activation='relu'))
model.add(Dense(48, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(4))
model.compile(loss='mean_squared_error', optimizer=Adam(lr=self.learning_rate))
return model
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
print('Grabbed a random act')
return randrange(0,4)
print('Grabbed a predicted act')
print(str(state))
#state = [25,25]
return np.argmax(self.model.predict(state)[0])
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
for sample in samples:
state, action, reward, new_state, done = sample
target = self.target_model.predict(state)
if done:
target[0][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
target[0][action] = reward + Q_future * self.gamma
self.model.fit(state, target, epochs=1, verbose=0)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
self.target_model.set_weights(target_weights)
def save_model(self, fn):
self.model.save(fn)
gamma = 0.9
epsilon = .95
dqn_agent = DQN()
steps = []
currentDistance = 10
nlReward = 25
nlDone = False
previousDistance = 25
while True:
action = dqn_agent.act(previousDistance)
dqn_agent.remember(previousDistance, action, nlReward, currentDistance, nlDone)
dqn_agent.replay()
dqn_agent.target_train()
DQN类:
定义初始化(自):
self.memory=deque(maxlen=2000)
self.gamma=0.85
self.epsilon=1.0
self.epsilon\u min=0.01
自ε衰变=0.995
自学习率=0.005
self.tau=.125
self.model=self.create_model()
self.target\u model=self.create\u model()
def创建_模型(自):
模型=顺序()
添加(密集(24,输入尺寸=1,激活=relu'))
model.add(密集(48,activation='relu'))
model.add(密集(24,activation='relu'))
模型.添加(密度(4))
compile(loss='mean_squared_error',optimizer=Adam(lr=self.learning_rate))
回归模型
def法案(自身、州):
self.epsilon*=self.epsilon\u衰变
self.epsilon=max(self.epsilon\u min,self.epsilon)
如果np.random.random()
我正处于模型的初始测试阶段,遇到了返回np.argmax(self.model.predict(state)[0])的问题。给了我一个列表索引超出范围的错误。经过一些测试,它看起来像是在寻找一个长度为2的数组,但我不知道为什么。我要传递的所有值,currentDistance
,previousDistance
,nlReward
和nlDone
都是单整数