Python 如何创建一个开放的AI健身房定制环境,在每个州执行不同的操作,并从keras rl运行代理?

Python 如何创建一个开放的AI健身房定制环境,在每个州执行不同的操作,并从keras rl运行代理?,python,keras,openai-gym,Python,Keras,Openai Gym,我正在努力为我大学的一个项目打造一个定制的开放式AI健身房环境。我创建了一个与Q-learning一起工作的定制环境,但我在与kera rl(DQNAgent)的代理进行培训时遇到了问题。经过一些研究和尝试很多事情,甚至我自己的代理,我得出结论,主要问题是我对每个状态有不同的操作,这使得我的环境与代理不兼容 我的想法是让我的行动空间所有可能的行动和“惩罚”我的代理人与消极的奖励,如果一个无效的行动被选中。但有更好的解决办法吗 这就是我必须实现的环境 请注意,中间状态(SA1、SA2等)的数量不

我正在努力为我大学的一个项目打造一个定制的开放式AI健身房环境。我创建了一个与Q-learning一起工作的定制环境,但我在与kera rl(DQNAgent)的代理进行培训时遇到了问题。经过一些研究和尝试很多事情,甚至我自己的代理,我得出结论,主要问题是我对每个状态有不同的操作,这使得我的环境与代理不兼容

我的想法是让我的行动空间所有可能的行动和“惩罚”我的代理人与消极的奖励,如果一个无效的行动被选中。但有更好的解决办法吗

这就是我必须实现的环境

请注意,中间状态(SA1、SA2等)的数量不是固定的。它最多可以是n个中间状态

我创建的自定义环境如下所示:

import networkx as nx
import matplotlib.pyplot as plt
import random


#classes for the linked list
class Node:
  def __init__(self,state):
    self.state = state
    self.nextStates = []

class LinkedList:
  def __init__(self):
    self.head = None

def getState_number(s,n): #(s= state and return number of state)
    if s=="Sn":
      return 0
    elif s=="Sd":
      return 1
    elif s=="Se":
      return n+2
    else:
      return int(s[2:])+1

class RL_Module():

  def __init__(self, states, cost):

    #creating a networkx Graph for visualizaition   
    self.G = nx.Graph()
    self.G.add_edges_from([("Sn","Sd"),("Se","Sn"), ("Sd","Se")])
    self.nodes = ["Sn","Sd","Se"]
    self.edges = [("Sn","Sd"),("Se","Sn"), ("Sd","Se")]

    #Creating linked list
    self.start = LinkedList()

    #Creating our 3 constant states
    self.start.head = Node("Sn") #Stable state
    self.failure = Node("Se")   #State of failure
    self.decision = Node("Sd") #State to take a decision
    self.state = self.start.head
    self.reward = 0

    #Numberof middle sates
    self.middle_states = len(states)

    self.start.head.nextStates.append((self.decision,0))
    self.decision.nextStates.append((self.failure,0))
    self.failure.nextStates.append((self.start.head,cost))

    #appending middle states
    i = 1
    for s in states: # states = [...] -> Si = (action to go to state Si, action for Se, action for Sn) 
      name = "Sa"+ str(i)
      temp = Node(name)
      temp.nextStates.append((self.failure,s[1]))
      temp.nextStates.append((self.start.head,s[2]))    
      self.decision.nextStates.append((temp,s[0]))
      i = i + 1
      #Graph
      self.nodes.append(name)
      self.G.add_edges_from([("Se",name), ("Sd",name), ("Sn",name)])
      self.edges.extend([("Se",name), ("Sd",name),("Sn",name)])

  def step(self,action):
    done = False
    next_state = 0
    s = self.state
    next = s.nextStates[action]
    # print("s:",s)
    # print("next:",next)
    s_next = next[0]
    # self.reward += next[1]
    reward = next[1]
    self.state = s_next
    state=getState_number(self.state.state,self.middle_states)
    if self.state.state=="Sn":
      done = True
    return (state,reward,done,"all good")

  def action_space_n(self):
    l=[]
    p=0
    for n in self.state.nextStates:
      l.append(p)
      p +=1
    return l

  def action_space(self):
    return self.state.nextStates

  def reset(self):
    self.state = self.start.head
    return 0





  #not needed functions
  
  def render(self): #plots the Graph
    G = self.G
    pos = nx.circular_layout(G) 
    nx.draw_networkx_nodes(G, pos, node_size=700, nodelist=self.nodes)
    nx.draw_networkx_labels(G, pos, font_size=20, font_family='sans-serif')
    nx.draw_networkx_edges(G, pos, edgelist=self.edges,width=6)

    nx.draw_networkx_nodes(G, pos, node_size=1400, nodelist=[self.state.state], node_color='red')
    nx.draw_networkx_edge_labels(G,pos,edge_labels={('Se','Sn'):'Hi'},font_color='red')

    plt.axis('off')
    plt.show()

  def plot_frame(self): #returns the ploted the Graph
    G = self.G
    pos = nx.circular_layout(G) 
    nx.draw_networkx_nodes(G, pos, node_size=700, nodelist=self.nodes)
    nx.draw_networkx_labels(G, pos, font_size=20, font_family='sans-serif')
    nx.draw_networkx_edges(G, pos, edgelist=self.edges,width=6)

    nx.draw_networkx_nodes(G, pos, node_size=1400, nodelist=[self.state.state], node_color='red')
    nx.draw_networkx_edge_labels(G,pos,edge_labels={('Se','Sn'):'Hi'},font_color='red')

    plt.axis('off')
    return plt

  def random_step(self): #Random step
    s = self.state
    print(getState_number(s.state,self.middle_states))
    next = random.choice(s.nextStates)
    print("s:",s)
    print("next:",next)
    s_next = next[0]
    # self.reward += next[1]
    reward = next[1]
    self.state = s_next
    state=getState_number(self.state.state,self.middle_states)
    print(getState_number(s_next.state,self.middle_states))
    return (state,reward,False,"all good")
networkx只是将环境可视化

您可以按如下方式运行环境:

model = RL_Module([(-1,0,0),(-3,0,0),(-5,0,0)],-10)  # [(-1,0,0),(-3,0,0),(-5,0,0)] are the middle states
model.render()
step = model.random_step()
#or .step() given a valid action
model.step(action) 
正如我所说,我在RL是新的,所以任何对我的代码或链接的评论,以帮助我更好地理解一些东西将非常感谢!谢谢大家!