Java Alpha-beta修剪没有产生好的结果 -------- 实际问题 --------
好吧,真正的问题不是alpha-beta剪枝和minimax算法。问题是,当在树中时,minimax算法将只给出最佳解决方案,而alpha-beta将给出正确的值,但多个子项具有该最佳值,其中一些子项不应具有该值 我想最终的问题是,在根节点的tie子节点的情况下,获得最佳结果的最有效方法可能是多个 该算法生成正确的值,但多个节点与该值关联,即使某些移动明显错误 例如: 滴答声Java Alpha-beta修剪没有产生好的结果 -------- 实际问题 --------,java,algorithm,performance,graph-algorithm,alpha-beta-pruning,Java,Algorithm,Performance,Graph Algorithm,Alpha Beta Pruning,好吧,真正的问题不是alpha-beta剪枝和minimax算法。问题是,当在树中时,minimax算法将只给出最佳解决方案,而alpha-beta将给出正确的值,但多个子项具有该最佳值,其中一些子项不应具有该值 我想最终的问题是,在根节点的tie子节点的情况下,获得最佳结果的最有效方法可能是多个 该算法生成正确的值,但多个节点与该值关联,即使某些移动明显错误 例如: 滴答声 -|-|O -|X|- -|X|- 将生成以下值: 0,1和1,0,其值为-0.06 0,1是正确的值,因为它将阻止我
-|-|O
-|X|-
-|X|-
将生成以下值:
0,1和1,0,其值为-0.06
0,1是正确的值,因为它将阻止我的X,但0,1是错误的,因为下一步我可以将X设置为0,1并获胜
当我运行相同的算法时,没有
if(beta<=alpha)
break;
我想你可能误解了修剪 AB修剪应该会给你和MinMax一样的结果,这只是一种不走下某些树枝的方法,因为你知道这样做会比你检查的另一个更糟糕,当你有巨大的树时,这会有帮助
此外,如果不使用启发式并切断搜索,MinMax将始终是不可抵抗的,因为您已经计算了到达每个终止状态的每个可能路径。所以我本以为AB修剪和MinMax都是无敌的,所以我认为你的AB修剪有问题。如果你的最小-最大值是不可抵抗的,那么你的方法也应该使用AB修剪。我肯定我的AB修剪有问题,但我很难弄清楚那是什么。你能解释一下我的实现有什么问题吗?这并不能回答我的问题。我意识到AB修剪应该得到与MinMax相同的结果,这只是切割更差的分支。我知道到达每一条可能的终止状态的路径都是不可抵抗的。仅供参考,我确实使用了一种启发式方法,尤其是在checkers实现中,它可能不是最好的启发式方法,但它仍然是一种启发式方法。我真正的问题是我的方法有什么问题?我很确定我的错误在于如何获得最佳价值。我还不确定如何修复它,或者到底是什么问题,但我仍在努力解决它。我遇到了与您相同的关于坏节点绑定最佳正确值的问题。你解决问题了吗?没有,我只是放弃了alpha-beta,坚持使用minimax
if(beta<=alpha) break;
private static double minimax(Node<Integer,Integer> parent, int player, final int[][] board, double alpha, double beta, int depth) {
List<Pair<Integer, Integer>> moves = getAvailableMoves(board);
int bs = getBoardScore(board);
if (moves.isEmpty() || Math.abs(bs) == board.length)//leaf node
return bs+(player==X?-1:1)*depth/10.;
double bestVal = player == X ? -Integer.MAX_VALUE : Integer.MAX_VALUE;
for(Pair<Integer, Integer> s : moves){
int[][] b = clone(board);
b[s.getFirst()][s.getSecond()]=player;
Node<Integer, Integer> n = new Node<>(bs,b.hashCode());
parent.getChildren().add(n);
n.setParent(parent);
double score = minimax(n,player==O?X:O,b,alpha,beta, depth+1);
n.getValues().put("score",score);
n.getValues().put("pair",s);
if(player == X) {
bestVal = Math.max(bestVal, score);
alpha = Math.max(alpha,bestVal);
} else {
bestVal = Math.min(bestVal, score);
beta = Math.min(beta,bestVal);
}
/*
If i comment these two lines out it works as expected
if(beta<= alpha)
break;
*/
}
return bestVal;
}
private double alphaBeta(BitCheckers checkers, int depth, int absDepth, double alpha, double beta){
if(checkers.movesWithoutAnything >= 40)
return 0;//tie game//needs testing
if(depth == 0 || checkers.getVictoryState() != INVALID)
return checkers.getVictoryState()==INVALID?checkers.getBoardScore()-checkers.getPlayer()*moves/100.:
checkers.getPlayer() == checkers.getVictoryState() ? Double.MAX_VALUE*checkers.getPlayer():
-Double.MAX_VALUE*checkers.getPlayer();
List<Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>> moves;
if(absDepth == maxDepth)
moves = (List<Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>>) node.getValues().get("moves");
else
moves = checkers.getAllPlayerMoves();
if(moves.isEmpty()) //no moves left? then this player loses
return checkers.getPlayer() * -Double.MAX_VALUE;
double v = checkers.getPlayer() == WHITE ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
for(Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> i : moves){
BitCheckers c = checkers.clone();
c.movePiece(i.getFirst().getFirst(),i.getFirst().getSecond(),i.getSecond().getFirst(),i.getSecond().getSecond());
int newDepth = c.getPlayer() == checkers.getPlayer() ? depth : depth - 1;
if(checkers.getPlayer() == WHITE) {
v = Math.max(v, alphaBeta(c, newDepth, absDepth - 1, alpha, beta));
alpha = Math.max(alpha,v);
}else {
v = Math.min(v, alphaBeta(c, newDepth, absDepth - 1, alpha, beta));
beta = Math.min(beta,v);
}
if(absDepth == maxDepth) {
double finalScore = v;
for(Node n : node.getChildren())
if(n.getData().equals(i)){
n.setValue(finalScore);
break;
}
}
/*
If i comment these two lines out it works as expected
if(beta<= alpha)
break;
*/
}
return v;
}
public double pvs(BitCheckers checkers, int depth, int absDepth, double alpha, double beta){
if(checkers.movesWithoutAnything >= 40)
return 0;//tie game//needs testing
if(depth == 0 || checkers.getVictoryState() != INVALID)
return checkers.getVictoryState()==INVALID?checkers.getBoardScore()-checkers.getPlayer()*moves/100.:
checkers.getPlayer() == checkers.getVictoryState() ? Double.MAX_VALUE*checkers.getPlayer():
-Double.MAX_VALUE*checkers.getPlayer();
List<Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>> moves;
if(absDepth == maxDepth)
moves = (List<Pair<Pair<Integer, Integer>, Pair<Integer, Integer>>>) node.getValues().get("moves");
else
moves = checkers.getAllPlayerMoves();
if(moves.isEmpty()) //no moves left? then this player loses
return checkers.getPlayer() * -Double.MAX_VALUE;
int j = 0;
double score;
for(Pair<Pair<Integer, Integer>, Pair<Integer, Integer>> i : moves){
BitCheckers c = checkers.clone();
c.movePiece(i.getFirst().getFirst(),i.getFirst().getSecond(),i.getSecond().getFirst(),i.getSecond().getSecond());
int newDepth = c.getPlayer() == checkers.getPlayer() ? depth : depth - 1;
double sign = c.getPlayer() == checkers.getPlayer()? -1 : 1;
if(j++==0)
score = -pvs(c,newDepth,absDepth-1,sign*-beta,sign*-alpha);
else {
score = -pvs(c,newDepth, absDepth-1,sign*-(alpha+1),sign*-alpha);
if(alpha<score || score<beta)
score = -pvs(c,newDepth,absDepth-1,sign*-beta,sign*-score);
}
if(absDepth == maxDepth) {
double finalScore = score;
for(Node n : node.getChildren())
if(n.getData().equals(i)){
n.setValue(finalScore);
break;
}
}
alpha = Math.max(alpha,score);
if(alpha>=beta)
break;
}
return alpha;
}
public class MinimaxAlphaBetaTest {
public static void main(String[] args) {
Node<Double,Double> parent = new Node<>(0.,0.);
int depth = 10;
createTree(parent,depth);
Timer t = new Timer().start();
double ab = alphabeta(parent,depth+1,Double.NEGATIVE_INFINITY,Double.POSITIVE_INFINITY,true);
t.stop();
System.out.println("Alpha Beta: "+ab+", time: "+t.getTime());
t = new Timer().start();
double mm = minimax(parent,depth+1,true);
t.stop();
System.out.println("Minimax: "+mm+", time: "+t.getTime());
t = new Timer().start();
double pv = pvs(parent,depth+1,Double.NEGATIVE_INFINITY,Double.POSITIVE_INFINITY,1);
t.stop();
System.out.println("PVS: "+pv+", time: "+t.getTime());
if(ab != mm)
System.out.println(ab+"!="+mm);
}
public static void createTree(Node n, int depth){
if(depth == 0) {
n.getChildren().add(new Node<>(0.,(double) randBetween(1, 100)));
return;
}
for (int i = 0; i < randBetween(2,10); i++) {
Node nn = new Node<>(0.,0.);
n.getChildren().add(nn);
createTree(nn,depth-1);
}
}
public static Random r = new Random();
public static int randBetween(int min, int max){
return r.nextInt(max-min+1)+min;
}
public static double pvs(Node<Double,Double> node, int depth, double alpha, double beta, int color){
if(depth == 0 || node.getChildren().isEmpty())
return color*node.getValue();
int i = 0;
double score;
for(Node<Double,Double> child : node.getChildren()){
if(i++==0)
score = -pvs(child,depth-1,-beta,-alpha,-color);
else {
score = -pvs(child,depth-1,-alpha-1,-alpha,-color);
if(alpha<score || score<beta)
score = -pvs(child,depth-1,-beta,-score,-color);
}
alpha = Math.max(alpha,score);
if(alpha>=beta)
break;
}
return alpha;
}
public static double alphabeta(Node<Double,Double> node, int depth, double alpha, double beta, boolean maximizingPlayer){
if(depth == 0 || node.getChildren().isEmpty())
return node.getValue();
double v = maximizingPlayer ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
for(Node<Double,Double> child : node.getChildren()){
if(maximizingPlayer) {
v = Math.max(v, alphabeta(child, depth - 1, alpha, beta, false));
alpha = Math.max(alpha, v);
}else {
v = Math.min(v,alphabeta(child,depth-1,alpha,beta,true));
beta = Math.min(beta,v);
}
if(beta <= alpha)
break;
}
return v;
}
public static double minimax(Node<Double,Double> node, int depth, boolean maximizingPlayer){
if(depth == 0 || node.getChildren().isEmpty())
return node.getValue();
double v = maximizingPlayer ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
for(Node<Double,Double> child : node.getChildren()){
if(maximizingPlayer)
v = Math.max(v,minimax(child,depth-1,false));
else
v = Math.min(v,minimax(child,depth-1,true));
}
return v;
}
}
Alpha Beta: 28.0, time: 25.863126 milli seconds
Minimax: 28.0, time: 512.6119160000001 milli seconds
PVS: 28.0, time: 93.357653 milli seconds