Python 蒙特卡罗树搜索随机选择
我的IS-MCTS实现总是选择allin,我不知道为什么。也许你们能帮我 我已经尝试将节点中保存的值从wins更改为value,这意味着赚取的芯片数量,但结果也很糟糕。该算法甚至输给了一个随机玩家和唯一的调用玩家 mcts方法有什么问题吗?如果不是,则可能是ucb1方法或“节点”类 我猜它一定是node类中选择了错误的子类Python 蒙特卡罗树搜索随机选择,python,tree,poker,monte-carlo-tree-search,Python,Tree,Poker,Monte Carlo Tree Search,我的IS-MCTS实现总是选择allin,我不知道为什么。也许你们能帮我 我已经尝试将节点中保存的值从wins更改为value,这意味着赚取的芯片数量,但结果也很糟糕。该算法甚至输给了一个随机玩家和唯一的调用玩家 mcts方法有什么问题吗?如果不是,则可能是ucb1方法或“节点”类 我猜它一定是node类中选择了错误的子类 import math import random class Node: def __init__(self, action=None, parent=None, act
import math
import random
class Node:
def __init__(self, action=None, parent=None, acted_player=None):
self.action = action
self.parent_node = parent
self.child_nodes = []
self.wins = 0
self.visits = 0
self.acted_player = acted_player
def get_untried_actions(self, valid_actions):
tried_actions = [child.action for child in self.child_nodes]
return [action for action in valid_actions if action not in tried_actions]
def select_child(self, exploration=0.7):
# Get the child with the highest UCB score
c = max(self.child_nodes, key=lambda node: node.calc_ucb1_score(exploration))
return c
def add_child(self, a, p):
n = Node(action=a, parent=self, acted_player=p)
self.child_nodes.append(n)
return n
def update(self, terminal_state):
self.visits += 1
if self.acted_player is not None:
self.wins += terminal_state.get_result(self.acted_player)
def calc_ucb1_score(self, exploration):
if self.visits == 0:
return 0
else:
return self._calc_avg_wins() + exploration * sqrt(2 * log(self.parent_node.visits) / float(self.visits))
def _calc_avg_wins(self):
if self.wins == 0:
return 0.0
elif self.visits == 0:
return 0.0
else:
return float(self.wins) / float(self.visits)
你在导入随机数吗?@FrasherGray我在导入随机数和数学
import math
import random
class Node:
def __init__(self, action=None, parent=None, acted_player=None):
self.action = action
self.parent_node = parent
self.child_nodes = []
self.wins = 0
self.visits = 0
self.acted_player = acted_player
def get_untried_actions(self, valid_actions):
tried_actions = [child.action for child in self.child_nodes]
return [action for action in valid_actions if action not in tried_actions]
def select_child(self, exploration=0.7):
# Get the child with the highest UCB score
c = max(self.child_nodes, key=lambda node: node.calc_ucb1_score(exploration))
return c
def add_child(self, a, p):
n = Node(action=a, parent=self, acted_player=p)
self.child_nodes.append(n)
return n
def update(self, terminal_state):
self.visits += 1
if self.acted_player is not None:
self.wins += terminal_state.get_result(self.acted_player)
def calc_ucb1_score(self, exploration):
if self.visits == 0:
return 0
else:
return self._calc_avg_wins() + exploration * sqrt(2 * log(self.parent_node.visits) / float(self.visits))
def _calc_avg_wins(self):
if self.wins == 0:
return 0.0
elif self.visits == 0:
return 0.0
else:
return float(self.wins) / float(self.visits)