Python 3.x &引用;发生异常:AttributeError';列表';对象没有属性';获取'&引用;当迁移到稳定的_基线3时

Python 3.x &引用;发生异常:AttributeError';列表';对象没有属性';获取'&引用;当迁移到稳定的_基线3时,python-3.x,stable-baselines,Python 3.x,Stable Baselines,我已经在稳定的基础上进行了试验,效果不错,并且一直想在稳定的基础上进行试验3 我正在使用A2C模型,来训练一个股票环境。所使用的自定义环境目前正在使用稳定的_基线。我看到了训练过程中的不稳定性,并希望迁移到稳定的_基线3以防万一 def train_A2C(env_train, model_name,timesteps = 50000, i=0 ): start = time.time() # policy_kwargs = dict(net_arch=[128, 128])

我已经在稳定的基础上进行了试验,效果不错,并且一直想在稳定的基础上进行试验3

我正在使用A2C模型,来训练一个股票环境。所使用的自定义环境目前正在使用稳定的_基线。我看到了训练过程中的不稳定性,并希望迁移到稳定的_基线3以防万一

def train_A2C(env_train, model_name,timesteps = 50000, i=0 ):
    start = time.time()
    # policy_kwargs = dict(net_arch=[128, 128])
    policy_kwargs=dict(optimizer_class=RMSpropTFLike)
    model = A2C(MlpPolicy,env_train,verbose = 1,tensorboard_log='./tensorboard/tensorboard_A2C/', 
            learning_rate =0.0001, vf_coef = 0.05, ent_coef = 0.005, policy_kwargs=policy_kwargs)
        model.learn(total_timesteps = timesteps,tb_log_name = f"A2C_{i}")
    end = time.time()

    model.save(f'{config.TRAINED_MODEL_DIR}/{model_name}')
    print(f'Training Time A2C : ', (end - start) /60, ' minutes')
    return model
自定义环境如下所示。调试时,错误会在环境中的第一个“步骤”之后立即弹出。可能是因为退货问题。我确实将状态的返回类型更改为np.array,但问题没有得到解决

STOCK_DIM = 422 # INITIAL_BALANCE, Amount_of_shares,  60 days * (Norm_close, 1m/2m/3m/1y ret, macd,rsi) 
INITIAL_ACCOUNT_BALANCE = 1000000
TRANSACTION_FEE_PERCENT = 0.01
REWARD_SCALING = 1e-5
HMAX_NORMALIZE = 100
N = config.PREV_DATA_POINTS


class StockEnv_Train(gym.Env):
    def __init__(self,df,day = 374):
        # day set to 260 since we need to account for the annual returns that only start around 252 days,
        # macd only starts from 254
        # we add 63 days to this to account for the historical data
        self.day = day
        self.df = df
        self.action_space = spaces.Box(low = -1,high = 1,shape =(1,) )
        self.observation_space = spaces.Box(low = 0, high = np.inf, shape = (STOCK_DIM,))
        self.terminal = False

        self.state = [INITIAL_ACCOUNT_BALANCE] + [0] + self.df.iloc[self.day-N:self.day,1].tolist() + \
                        self.df.iloc[self.day-N:self.day,2].tolist() + self.df.iloc[self.day-N:self.day,3].tolist() + \
                        self.df.iloc[self.day-N:self.day,4].tolist() + self.df.iloc[self.day-N:self.day,5].tolist() + \
                        self.df.iloc[self.day-N:self.day,6].tolist() + self.df.iloc[self.day-N:self.day,7].tolist()

        self.reward = 0
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.cost = 0
        self.rewards_memory = []
        self.trades = 0

        self._seed()

    
    def _sell_stock(self,action):
        if self.state[1] > 0:
            self.state[0] += (self.df.iloc[self.day,-1]*min(abs(action),self.state[1]) * \
                            (1- TRANSACTION_FEE_PERCENT)).item()
            self.cost +=self.df.iloc[self.day,-1]*min(abs(action),self.state[1]) * \
                            TRANSACTION_FEE_PERCENT
            self.state[1] -= (min(abs(action), self.state[1]))
            self.trades+=1
        else:
            pass

    def _buy_stock(self,action):
        # perform buy action based on the sign of the action
        available_amount = self.state[0] // self.df.iloc[self.day,-1]
        # print('available_amount:{}'.format(available_amount))

        #update balance
        self.state[0] -= (self.df.iloc[self.day,-1]*min(available_amount, action)* \
                          (1+ TRANSACTION_FEE_PERCENT)).item()

        self.state[1] += (min(available_amount, action))

        self.cost+=self.df.iloc[self.day,-1]*min(available_amount, action)* \
                          TRANSACTION_FEE_PERCENT
        self.trades+=1

    def step(self,action):
        self.terminal = self.day >= len(self.df.Date.unique()) - 1

        if self.terminal:
            plt.plot(self.asset_memory,'r')
            plt.savefig('results/account_value_train.png')
            plt.close()

            df_total_value = pd.DataFrame(self.asset_memory)
            df_total_value.to_csv('results/account_value_train.csv')

            df_total_value.columns = ['account_value']
            df_total_value['daily_return'] = df_total_value.pct_change(1)
            
            df_rewards = pd.DataFrame(self.rewards_memory)
            
            return self.state, self.reward*REWARD_SCALING,self.terminal, {}

        else:
            # print(np.array(self.state[1:29]))

            action = action * HMAX_NORMALIZE
            #actions = (actions.astype(int))
            
            begin_total_asset = self.state[0]+ self.state[1]*self.df.iloc[self.day,-1]
            
            if action<0:
                self._sell_stock( action)
            else:
                self._buy_stock(action)

            self.day += 1        
            #load next state
            # print("stock_shares:{}".format(self.state[29:]))

            self.state = [self.state[0]] + [self.state[1]] + self.df.iloc[self.day-N:self.day,1].tolist() + \
                        self.df.iloc[self.day-N:self.day,2].tolist() + self.df.iloc[self.day-N:self.day,3].tolist() + \
                        self.df.iloc[self.day-N:self.day,4].tolist() + self.df.iloc[self.day-N:self.day,5].tolist() + \
                        self.df.iloc[self.day-N:self.day,6].tolist() + self.df.iloc[self.day-N:self.day,7].tolist()
            
            end_total_asset = self.state[0]+ self.state[1]*self.df.iloc[self.day,-1]

            try:
                self.asset_memory.append(end_total_asset.item())
            except:
                self.asset_memory.append(end_total_asset)

            #print("end_total_asset:{}".format(end_total_asset))
            
            self.reward = end_total_asset - begin_total_asset            
            self.rewards_memory.append(self.reward)
            self.reward = self.reward*REWARD_SCALING
            
        return np.array([self.state]),np.array([self.reward]),self.terminal,{}

    
    def reset(self):
        self.asset_memory = [INITIAL_ACCOUNT_BALANCE]
        self.day = 374
        self.cost = 0
        self.trades = 0
        self.terminal = False
        self.rewards_memory = []

        self.state = [INITIAL_ACCOUNT_BALANCE] + [0] + self.df.iloc[self.day-N:self.day,1].tolist() + \
                        self.df.iloc[self.day-N:self.day,2].tolist() + self.df.iloc[self.day-N:self.day,3].tolist() + \
                        self.df.iloc[self.day-N:self.day,4].tolist() + self.df.iloc[self.day-N:self.day,5].tolist() + \
                        self.df.iloc[self.day-N:self.day,6].tolist() + self.df.iloc[self.day-N:self.day,7].tolist()
        return self.state

    def render(self,mode='human'):
        return self.state

    def _seed(self,seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

STOCK DIM=422#初始余额,股份金额,60天*(正常收盘,100万/200万/300万/年回购,macd,rsi)
初始账户余额=1000000
交易费用百分比=0.01
奖励比例=1e-5
HMAX_NORMALIZE=100
N=config.PREV_数据_点
班级StockEnv_Train(gym.Env):
定义初始(自我,df,日=374):
#天数设置为260,因为我们需要考虑仅从252天左右开始的年度回报,
#macd仅从254开始
#我们在此基础上增加了63天来解释历史数据
self.day=天
self.df=df
self.action_space=spaces.Box(低=1,高=1,形状=(1,))
self.observation\u space=spaces.Box(低=0,高=np.inf,形状=(库存尺寸,)
self.terminal=False
self.state=[INITIAL\u ACCOUNT\u BALANCE]+[0]+self.df.iloc[self.day-N:self.day,1].tolist()+\
self.df.iloc[self.day-N:self.day,2].tolist()+self.df.iloc[self.day-N:self.day,3].tolist()+\
self.df.iloc[self.day-N:self.day,4].tolist()+self.df.iloc[self.day-N:self.day,5].tolist()+\
self.df.iloc[self.day-N:self.day,6].tolist()+self.df.iloc[self.day-N:self.day,7].tolist()
自我奖励=0
self.asset\u memory=[初始账户\u余额]
自身成本=0
self.rewards_memory=[]
self.trades=0
self._seed()
def_出售_股票(自我、行动):
如果self.state[1]>0:
self.state[0]+=(self.df.iloc[self.day,-1]*min(abs(动作),self.state[1])*\
(1-交易费用(百分比))。项目()
self.cost+=self.df.iloc[self.day,-1]*min(abs(动作),self.state[1])*\
交易费用百分比
自状态[1]-=(最小值(abs(动作),自状态[1]))
自我交易+=1
其他:
通过
定义购买股票(自我、行动):
#根据操作的符号执行购买操作
可用金额=self.state[0]//self.df.iloc[self.day,-1]
#打印('可用金额:{}'。格式(可用金额))
#更新余额
self.state[0]-=(self.df.iloc[self.day,-1]*min(可用量,动作)*\
(1+交易费(百分比))。项目()
自身状态[1]+=(最小值(可用金额,动作))
self.cost+=self.df.iloc[self.day,-1]*分钟(可用金额,行动)*\
交易费用百分比
自我交易+=1
def步骤(自我、行动):
self.terminal=self.day>=len(self.df.Date.unique())-1
如果是自助终端:
plt.plot(自资源_内存,'r')
plt.savefig('results/account\u value\u train.png'))
plt.close()
df_total_value=pd.DataFrame(self.asset_内存)
df_总值到csv('results/account_value_train.csv'))
df_total_value.columns=['account_value']
df_总值['daily_return']=df_总值。pct_变化(1)
df_奖励=pd.DataFrame(自我奖励_内存)
返回self.state,self.return*return\u SCALING,self.terminal,{}
其他:
#打印(np.array(self.state[1:29]))
动作=动作*HMAX\u正常化
#actions=(actions.astype(int))
begin\u total\u asset=self.state[0]+self.state[1]*self.df.iloc[self.day,-1]
如果行动