Python 蒙特卡罗树搜索随机选择

Python 蒙特卡罗树搜索随机选择,python,tree,poker,monte-carlo-tree-search,Python,Tree,Poker,Monte Carlo Tree Search,我的IS-MCTS实现总是选择allin,我不知道为什么。也许你们能帮我 我已经尝试将节点中保存的值从wins更改为value,这意味着赚取的芯片数量,但结果也很糟糕。该算法甚至输给了一个随机玩家和唯一的调用玩家 mcts方法有什么问题吗?如果不是,则可能是ucb1方法或“节点”类 我猜它一定是node类中选择了错误的子类 import math import random class Node: def __init__(self, action=None, parent=None, act

我的IS-MCTS实现总是选择allin,我不知道为什么。也许你们能帮我

我已经尝试将节点中保存的值从wins更改为value,这意味着赚取的芯片数量,但结果也很糟糕。该算法甚至输给了一个随机玩家和唯一的调用玩家

mcts方法有什么问题吗?如果不是,则可能是ucb1方法或“节点”类

我猜它一定是node类中选择了错误的子类

import math
import random
class Node:

def __init__(self, action=None, parent=None, acted_player=None):
    self.action = action
    self.parent_node = parent
    self.child_nodes = []
    self.wins = 0
    self.visits = 0
    self.acted_player = acted_player

def get_untried_actions(self, valid_actions):
    tried_actions = [child.action for child in self.child_nodes]
    return [action for action in valid_actions if action not in tried_actions]

def select_child(self, exploration=0.7):
    # Get the child with the highest UCB score
    c = max(self.child_nodes, key=lambda node: node.calc_ucb1_score(exploration))
    return c

def add_child(self, a, p):
    n = Node(action=a, parent=self, acted_player=p)
    self.child_nodes.append(n)
    return n

def update(self, terminal_state):
    self.visits += 1
    if self.acted_player is not None:
        self.wins += terminal_state.get_result(self.acted_player)

def calc_ucb1_score(self, exploration):
    if self.visits == 0:
        return 0
    else:
        return self._calc_avg_wins() + exploration * sqrt(2 * log(self.parent_node.visits) / float(self.visits))

def _calc_avg_wins(self):
    if self.wins == 0:
        return 0.0
    elif self.visits == 0:
        return 0.0
    else:
        return float(self.wins) / float(self.visits)

你在导入随机数吗?@FrasherGray我在导入随机数和数学
import math
import random
class Node:

def __init__(self, action=None, parent=None, acted_player=None):
    self.action = action
    self.parent_node = parent
    self.child_nodes = []
    self.wins = 0
    self.visits = 0
    self.acted_player = acted_player

def get_untried_actions(self, valid_actions):
    tried_actions = [child.action for child in self.child_nodes]
    return [action for action in valid_actions if action not in tried_actions]

def select_child(self, exploration=0.7):
    # Get the child with the highest UCB score
    c = max(self.child_nodes, key=lambda node: node.calc_ucb1_score(exploration))
    return c

def add_child(self, a, p):
    n = Node(action=a, parent=self, acted_player=p)
    self.child_nodes.append(n)
    return n

def update(self, terminal_state):
    self.visits += 1
    if self.acted_player is not None:
        self.wins += terminal_state.get_result(self.acted_player)

def calc_ucb1_score(self, exploration):
    if self.visits == 0:
        return 0
    else:
        return self._calc_avg_wins() + exploration * sqrt(2 * log(self.parent_node.visits) / float(self.visits))

def _calc_avg_wins(self):
    if self.wins == 0:
        return 0.0
    elif self.visits == 0:
        return 0.0
    else:
        return float(self.wins) / float(self.visits)