卡住用alpha beta修剪的minmax算法

我正在嘗試用alpha beta修剪在java的tic tac toe遊戲中實現minmax算法。當我編寫代碼時，我立即發現了一個例外，因此我試圖讓一些終端輸出自己找到錯誤，並且發現它是由於最終返回中的錯誤結果導致的：算法最終返回[-1][-1]，得分-2147483646，當代碼的其餘部分嘗試移動並將座標放在字段中時，會導致異常。我做了一些方案來模擬一些動作和一些可能的樹，但我找不到錯誤。卡住用alpha beta修剪的minmax算法

/* 
    * int field[][] is the board array, it may contains 0(empty), 1(opponent's seed), 2(computer's seed) 
    * nComputer = 2 (computer's seed) 
    * nPlayer = 1 (opponent's seed) 
    * computerMove = new int[3] 
    * remainingMoves has been calculated before the main call 
    */ 

    // Main call 
    computerMove = cMove(remainingMoves, nComputer,Integer.MIN_VALUE + 1, Integer.MAX_VALUE - 1); 
    field[computerMove[1]][computerMove[2]] = nComputer; // This line cause the exception!! 
    // MinMax alpha-beta pruning algorithm 
    private static int[] cMove(int depth, int player, int alpha, int beta) { 

     int[][] moveList = new int[3][10]; 
     moveList = generateMoves(field); // See below for details 

     int temp; 
     int score; 
     int bestR = -1; 
     int bestC = -1; 

     // check function retunrns 1(opponent wins), 2(computer wins), 0(draw) or -1(nothing) 
     if(moveList[0][0] == 0 || depth == 0) { 
      score = cScore(player); 

      return new int[] { score, bestR, bestC }; 
     } else { 
      for (int i = 1;i < moveList[0][0]; i++) { 
       // Trying to make a move 
       field[moveList[1][i]][moveList[2][i]] = player; 

       if(player == nComputer) { // Maximazing player 
        score = cMove(depth -1, nPlayer, alpha, beta)[0]; 
        if(score > alpha) { 
         alpha = score; 
         bestR = moveList[1][i]; 
         bestC = moveList[2][i]; 
        } 
       } else { // Minimizing player 
        score = cMove(depth -1, nComputer, alpha, beta)[0]; 
        if(score < beta) { 
         beta = score; 
         bestR = moveList[1][i]; 
         bestC = moveList[2][i]; 
        } 
       } 

       field[moveList[1][i]][moveList[2][i]] = 0; // Undo move 
       if(alpha >= beta) i = 10; // Cut-off 
      } 

      if(player == nComputer) temp = alpha; 
      else temp = beta; 

      return new int[] { temp, bestR, bestC }; 

     } 
    } 

    /* 
    * generateMoves function returns an array 3x10 where [0][0] is the number 
    * of possible moves and [0,1,2][1-9] are the score and the 
    * coordinates(rows and columns) of all the possible moves 
    */ 
    private static int[][] generateMoves(int[][] field) { 
     int[][] result = new int[3][10]; 
     int k = 0; 

     if(check(4) != -1) { 
      return result; 
     } 

     for (int i = 0; i < field.length; i++) { 
      for (int j = 0; j < field[0].length; j++) { 
       if (field[i][j] == 0) { 
        k++; 
        result[1][k] = i; 
        result[2][k] = j; 
       } 
      } 
     } 

     result[0][0] = k; 

     return result; 
    } 

    // cScore function assign a score for the actual node with an heuristic evaluation 
private static int cScore(int p) { 
    int score = 0; 
    score += cRow(p, 0, 0, 0, 1, 0, 2); 
    score += cRow(p, 1, 0, 1, 1, 1, 2); 
    score += cRow(p, 2, 0, 2, 1, 2, 2); 
    score += cRow(p, 0, 0, 1, 0, 2, 0); 
    score += cRow(p, 0, 1, 1, 1, 2, 1); 
    score += cRow(p, 0, 2, 1, 2, 2, 2); 
    score += cRow(p, 0, 0, 1, 1, 2, 2); 
    score += cRow(p, 0, 2, 1, 1, 2, 0); 
    return score; 
} 

private static int cRow(int player, int rOne, int cOne, int rTwo, int cTwo, int rThr, int cThr) { 
    int score = 0; 

    if (field[rOne][cOne] == nComputer) { 
     score = 1; 
    } else if (field[rOne][cOne] == nPlayer) { 
     score = -1; 
    } 

    if (field[rTwo][cTwo] == nComputer) { 
     if (score == 1) { 
      score = 10; 
     } else if (score == -1) { 
      return 0; 
     } else { 
      score = 1; 
     } 
    } else if (field[rTwo][cTwo] == nPlayer) { 
     if (score == -1) { 
      score = -10; 
     } else if (score == 1) { 
      return 0; 
     } else { 
      score = -1; 
     } 
    } 

    if (field[rThr][cThr] == nComputer) { 
     if (score > 0) { 
      score *= 10; 
     } else if (score < 0) { 
      return 0; 
     } else { 
      score = 1; 
     } 
    } else if (field[rThr][cThr] == nPlayer) { 
     if (score < 0) { 
      score *= 10; 
     } else if (score > 1) { 
      return 0; 
     } else { 
      score = -1; 
     } 
    } 

    return score; 
}

我被困在這個問題上一個星期，我要瘋了！在此先感謝和抱歉的英語不好，但它不是我的主要語言，我慢慢地想學習它

--------------------- - - - - - - - - - - - - - - - - - - - - - - 編輯 - - - --------------------------------------------------根據要求-----

添加校驗功能：

// check function first check the state of 5 cells that needs to be filled to won([0,0][0,1][0,2][1,0][2,0]) 
public static int check(int nMove) { 
    int state = -1; 

    if(field[0][0] != 0) { 
     state = col(0,1); 
     if(state == 1 || state == 2) return state; // Win on first col 
     state = row(0,1); 
     if(state == 1 || state == 2) return state; // Win on first row 
     state = diagonal(1); 
     if(state == 1 || state == 2) return state; // Win on first diagonal 
    } 
    if (field[0][1] != 0) { 
     state = col(1,2); 
     if(state == 1 || state == 2) return state; // Win on second col 
    } 
    if (field[0][2] != 0) { 
     state = col(2,3); 
     if(state == 1 || state == 2) return state; // Win on third col 
     state = diagonal(2); 
     if(state == 1 || state == 2) return state; // Win on second diagonal 
    } 
    if (field[1][0] != 0) { 
     state = row(1,2); 
     if(state == 1 || state == 2) return state; // Win on second row 
    } 
    if (field[2][0] != 0) { 
     state = row(2,3); 
     if(state == 1 || state == 2) return state; // Win on third row 
    } 

    if(nMove == 8) return 0; // Draw 

    return state; 
} 
// Check if the entire row is filled (check rows from starting to n points) 
private static int row(int start, int n) { 
    int s = -1; 
    int k = 0; 
    int h = 0; 

    for (int i = start; i < n; i++) { 
     for (int j = 0; j < (field[0]).length; j++) { 
      if(field[i][j] == 1) { 
       k++; 
       if(k==3) s = 1; 
      } else if(field[i][j] == 2) { 
        h++; 
        if(h==3) s = 2; 
      } 
     } 
     k=0; 
     h=0; 
    } 

    return s; 
} 
// Check if the entire col is filled (check cols from starting to n points) 
private static int col(int start, int n) { 
    int s = -1; 
    int k = 0; 
    int h = 0; 

    for (int i = start; i < n; i++) { 
     for (int j = 0; j < (field).length; j++) { 
      if(field[j][i] == 1) { 
       k++; 
       if(k==3) s = 1; 
      } else if(field[j][i] == 2) { 
        h++; 
        if(h==3) s = 2; 
      } 
     } 
     k=0; 
     h=0; 
    } 

    return s; 
} 
// Check if the entire diagonal is filled (check first diagonal if n=1 and second diagonal if n=2) 
private static int diagonal(int n) { 
    int s = -1; 
    int k = 0; 
    int h = 0; 

    if(n == 1) { 
     for (int i = 0; i < (field).length; i++) { 
      int j = i; 
      if(field[i][j]== 1) { 
       k++; 
       if(k==3) s = 1; 
      } else if(field[i][j] == 2) { 
       h++; 
       if(h==3) s = 2; 
      } 
     } 
    } else if (n == 2) { 
     int j = 2; 
     for (int i = 0; i < (field).length; i++) { 
      if(field[i][j] == 1) { 
       k++; 
       if(k==3) s = 1; 
      } 
      else if(field[i][j] == 2) { 
       h++; 
       if(h==3) s = 2; 
      } 
      j--; 
     } 
    } else { } 

    return s; 
}

來源

2016-03-08 ludovico

在此爲你解答帶病上班，但你可以給我你所有的班？有很多東西缺失，我不知道它是什麼。只需複製並粘貼它們。讓我知道你是否導入了任何罐子，或者類似的東西。 –

這個測試的整個代碼很長（大約1k行），爲了更容易理解它，我決定只分享感興趣的部分。這個函數是一個名爲IA的類的一部分，我成功編譯了它。但整個算法結果總是[-1] [ - 1]，得分大約爲2 * 10^9。這讓我覺得在遞歸中存在一個問題，但我無法理解在哪裏。如果它可以幫助我也可以發佈檢查算法，如果有必要，也可以發佈整個代碼，但我認爲它只能混淆。感謝您的快速回答 – ludovico

使用調試器。如果您知道故障以及您的代碼應該做什麼，那麼在您逐步運行代碼時，調試器實際上會爲您提供值。我無法在腦海中做到這一點。 –

-1

假設你的板子比3×3大，否則蜱粘性腳趾有4個爲中獎條件並沒有什麼太大的意義，你的例外將在這裏引起：

for (int i = 0; i < field.length; i++) { 
     for (int j = 0; j < field[0].length; j++) { 
      if (field[i][j] == 0) { 
       k++; 
       result[1][k] = i; // out of bounds 
       result[2][k] = j; // out of bounds 
      } 
     } 
    }

對於大小爲A×B的字段，當板卡爲空時，k將變得與A*B - 1一樣大。對於A = B = 7，這將變爲48，大於9，這是result[i]中允許的最大索引。

//編輯==================== =============
我有點困惑和不確定，你試圖優化什麼（計算機或玩家的最佳分數？），但我找到了解釋結果的東西。

在每次遞歸調用中，您都有一個變量player。
根據其值更新alpha或beta。
在遞歸步驟結束時，您根據player的值返回alpha或beta。
但是，您更新alpha如果player == 2

if(player == 2) { // Maximazing player 
    score = cMove(depth -1, nPlayer, alpha, beta)[0]; 
    if(score > alpha) {

但返回beta如果player == 2

if(player == 1) temp = alpha; 
    else temp = beta;

所以，你總是alpha或MAX_VALUE - 1返回MIN_VALUE + 1爲beta。
因此，if(score < alpha) {或if(score < beta) {對於每個步驟都將是錯誤的，不會調用基本情況。你的遞歸看起來是這樣的：

深度= 4，調用深度= 3
- 深度= 3，調用深度= 2
  - 深度= 2，PLAYER1奪得，成績是42
- 深度= 3，更新阿爾法= 42，返回的β= MAX_VALUE - 1作爲得分
深度= 4，call3返回MAX_VALUE - 1這是我測試所以沒有什麼改變，bestR和bestC逗留-1

來源

2016-03-08 18:12:47 Aracurunir

目前我第一次嘗試使它與3x3板一起使用。我只包含了generateMoves函數，因爲它可能是異常的起源超出界限是在我嘗試執行結果爲[-1] [ - 1]的移動時，界限。澄清當我調用函數之後，'field [cMove [1]] [cMove [2]] = nComputer;'無論如何回答這個問題並且對於缺乏清晰度感到抱歉 – ludovico

我用建議的改進更新了代碼，但它仍然返回與以前相同的結果 – ludovico

卡住用alpha beta修剪的minmax算法

回答

相關問題