返回bestMove用於tictactoe的minimax算法

我試圖編寫在Russel Norvig的人工智能書中給出的tic-tac-toe的minimax算法。除了將bestMove返回給用戶的方式之外，它具有一切。我正在努力返回bestMove，但無法決定何時選擇bestMove。幫助，任何人？返回bestMove用於tictactoe的minimax算法

moveT MiniMax(stateT state) 
{ 
    moveT bestMove; 

    max_move(state,bestMove); 

    return bestMove; 

} 

int max_move(stateT state,int & bestMove) 
{ 
    int v = -10000; 
    if(GameIsOver(state)) 
    { 
     return EvaluateStaticPosition(state); 

    } 

    vector<moveT> moveList; 
    GenerateMoveList(state, moveList); 
    int nMoves = moveList.size(); 

    for(int i = 0 ; i < nMoves ; i++) 
    { 
     moveT move = moveList[i]; 
     MakeMove(state, move); 

     int curValue = min_move(state,bestMove); 

      if(curValue > v) 
      { 
       v = curValue; 
       bestMove = move; 
      } 
     RetractMove(state, move); 

    } 

    return v; 

} 

int min_move(stateT state, int &bestMove) 
{ 
    int v = 10000; 
    if(GameIsOver(state)) 
    { 
     return EvaluateStaticPosition(state); 

    } 
    vector<moveT> moveList; 
    GenerateMoveList(state, moveList); 

    int nMoves = moveList.size(); 

    for(int i = 0 ; i < nMoves; i++) 
    { 
     moveT move = moveList[i]; 
     MakeMove(state, move); 

     int curValue = max_move(state,depth+1,bestMove); 

      if(curValue < v) 
      { 
       curValue = v; 
      } 
     RetractMove(state, move); 

    } 
    return v; 
}

P.S .:還有其他的僞碼可以找到minmax值。不過，他們只關注井字遊戲，我正試圖將其擴展到其他遊戲。謝謝。

更新：整個代碼可以在這裏找到：http://ideone.com/XPswCl

來源

2012-11-23 motiur

是您發佈上面的代碼跟上時代的？因爲它看起來不像應該編譯。在'min_move'中，你用三個參數調用'max_move'，但max_move只能有兩個參數。 – Kevin

@Kevin：糟糕，它現在已更新。我試圖在某個時候限制深度。 – motiur

感謝您的更新，但不正確的行仍然存在：'int curValue = max_move（state，depth + 1，bestMove）;'這讓我擔心;它讓我懷疑你發佈的代碼不是你正在編譯的代碼。這使潛在的回答者發現問題變得更具挑戰性。我們將在發佈的代碼中確定實際代碼中不存在的錯誤，如果它們不在發佈的代碼中，我們將無法找到真實代碼中的錯誤。 – Kevin

在極小的簡單版本，第一個玩家希望最大限度地發揮他的得分，第二個玩家希望最小化第一玩家的分數。由於第一和第二的球員只關心第一個球員的得分，EvaluateStaticPosition應該返回指示板狀態有多好，是第一個球員的值。這是不是相關的。

int EvaluateStaticPosition(stateT state) 
{ 
     if(CheckForWin(state, FIRST_PLAYER)) 
     { 
       return WINNING_POSITION; 
     } 
     if(CheckForWin(state, Opponent(FIRST_PLAYER))) 
     { 
       return LOSING_POSITION; 
     } 
     return NEUTRAL_POSITION; 
}

現在，當您想要對第一個玩家最好的移動時，請致電MaxMove。當你想要對第二個玩家最好的移動時，請致電MinMove。

moveT MiniMax(stateT state) 
{ 
    moveT bestMove; 
    int i = 0; 
    if (state.whoseTurn == FIRST_PLAYER){ 
     i = MaxMove(state, bestMove); 
    } 
    else{ 
     i = MinMove(state,bestMove); 
    } 
    cout<<"i is "<<i<<endl; 
    return bestMove; 
}

最後，你有MinMove和MaxMove裏面的一些問題。當你在任何一個指定curRating，你不應該在bestMove傳遞的第二個參數來MaxMove或MinMove。然後它將把對手的最佳進軍bestMove，這沒有任何意義。相反，聲明一個opponentsBestMove對象並將其作爲第二個參數傳遞。（你實際上不會使用這個對象，或者甚至沒有看到它的值，但沒關係）。隨着這一變化，你從來不會在MinMove內分配任何東西給bestMove，所以你應該在if(curRating < v)塊內部這樣做。

int MaxMove(stateT state, moveT &bestMove) 
{ 
     if(GameIsOver(state)) 
     { 
      return EvaluateStaticPosition(state); 
     } 
     vector<moveT> moveList; 
     GenerateMoveList(state, moveList); 
     int nMoves = moveList.size(); 
     int v = -1000; 
     for(int i = 0 ;i<nMoves; i++) 
     { 
       moveT move = moveList[i]; 
       MakeMove(state, move); 
       moveT opponentsBestMove; 
       int curRating = MinMove(state, opponentsBestMove); 
       if (curRating > v) 
       { 
         v = curRating; 
         bestMove = move; 
       } 
       RetractMove(state, move); 
     } 
     return v; 

} 
int MinMove(stateT state, moveT &bestMove) 
{ 
     if(GameIsOver(state)) 
     { 
       return EvaluateStaticPosition(state); 
     } 
     vector<moveT>moveList; 
     GenerateMoveList(state, moveList); 
     int nMoves = moveList.size(); 
     int v = 1000; 
     for(int i = 0 ; i<nMoves; i++) 
     { 
       moveT move = moveList[i]; 
       MakeMove(state , move); 
       moveT opponentsBestMove; 
       int curRating = MaxMove(state,opponentsBestMove); 
       if(curRating < v) 
       { 
         v = curRating; 
         bestMove = move; 
       } 
       RetractMove(state, move); 
     } 
     return v; 
}

在這一點上，你應該有一個無與倫比的AI！

The final position looks like this: 

O | O | X 
---+---+--- 
X | X | O 
---+---+--- 
O | X | X 

Cat's game.

一種替代方法利用這樣的事實即井字棋是一種零和博弈優勢。換句話說，在遊戲結束時，玩家的總分將等於零。對於雙人遊戲，這意味着一個玩家的得分永遠是其他玩家的負值。這對我們來說很方便，因爲最小化其他玩家的分數與最大化自己的分數相同。所以不是一個玩家最大化他的分數，而是一個玩家最小化其他玩家的分數，我們可以讓兩個玩家都試圖最大化他們自己的分數。

變化EvaluateStaticPosition恢復到原來的形狀，所以，它給基於董事會狀態是當前球員多麼優秀的得分。

int EvaluateStaticPosition(stateT state) 
{ 
     if(CheckForWin(state, state.whoseTurn)) 
     { 
       return WINNING_POSITION; 
     } 
     if(CheckForWin(state, Opponent(state.whoseTurn))) 
     { 
       return LOSING_POSITION; 
     } 
     return NEUTRAL_POSITION; 
}

刪除MinMove，因爲我們只關心最大化。重寫MaxMove，以便它選擇讓對手得到最差分數的動作。最佳動作的分數是其他玩家最差分數的負值。

int MaxMove(stateT state, moveT &bestMove) 
{ 
     if(GameIsOver(state)) 
     { 
       return EvaluateStaticPosition(state); 
     } 
     vector<moveT> moveList; 
     GenerateMoveList(state, moveList); 
     int nMoves = moveList.size(); 
     int v = -1000; 
     for(int i = 0 ;i<nMoves; i++) 
     { 
       moveT move = moveList[i]; 
       MakeMove(state, move); 
       moveT opponentsBestMove; 
       int curRating = -MaxMove(state, opponentsBestMove); 
       if (curRating > v) 
       { 
         v = curRating; 
         bestMove = move; 
       } 
       RetractMove(state, move); 
     } 
     return v; 

}

由於MaxMove用於兩個球員，我們不再需要玩家在MiniMax功能之間進行區分。

moveT MiniMax(stateT state) 
{ 
    moveT bestMove; 
    int i = 0; 
    i = MaxMove(state, bestMove); 
    cout<<"i is "<<i<<endl; 
    return bestMove; 
}

來源

2012-12-28 14:34:20 Kevin

如果我沒有弄錯，你是不是把bestMove =當minRove在curRating motiur

是的，我在''CurRating Kevin

工作的人，我希望我能給你一個擁抱;無論你在哪裏，謝謝。讓我進一步測試，等等。 – motiur

嗯，它看起來像MiniMax正確選擇它爲你，只是初始狀態和深度調用它。（除非根據狀態的第一個玩家是第二個玩家，那麼你應該在MiniMax中調用min_move。）

編輯：是的，我忽略了一些東西，bestMove目前沒什麼意義。在max_move中的程序中，您可以像這樣更改循環：

for(int i = 0 ; i < nMoves ; i++) 
{ 
    moveT move = moveList[i]; 
    MakeMove(state, move); 

    int new_value = min_move(state, depth+1); 
    if(new_value > v) 
    { 
     v=new_value; 
    } 
    RetractMove(state, move); 

}

之後，您可以考慮一下bestMove的含義了嗎？我的想法是，你有興趣爲井字遊戲找到「最好的」一系列動作之一。爲此，你需要一個矢量或更好的stack。但是這也意味着有std::stack<int>* best_moves作爲最後一個參數。

對於堆棧實現，在min_move中返回下一個步驟，如果它們的值最好，則將move放在best_moves堆棧的頂部。當然，在遊戲結束時，你只需返回空棧。它需要一個面向對象的方法才能正確地完成它，當我有一些時間時我會這樣做。

如果你需要的僅僅是最好的下一步行動那麼我建議你改變返回類型min_move和max_moe的一些結構是這樣的：

struct Value_move{ 
    int value; 
    moveT best_move; 
};

然後將新實施max_move的看起來像如下：

const int MOVE_INVALID = -12345; 
const int MOVE_NOTHING = -12346; 

Value_move max_move(stateT state, int depth) 
{ 
    Value_move best; 
    best.value = -10000; best.best_move = MOVE_INVALID; 

    if(GameIsOver(state)) 
    { 
     best.value = EvaluateStaticPosition(state); 
     best.best_move = MOVE_NOTHING; 
     return best; 
    } 

    vector<moveT> moveList; 
    GenerateMoveList(state, moveList); 
    int nMoves = moveList.size(); 

    for(int i = 0 ; i < nMoves ; i++) 
    { 
     moveT move = moveList[i]; 
     MakeMove(state, move); 
     Value_move curr = min_move(state, depth+1); 
     if(curr.value > best.value) 
     { 
      best.value = curr.value; 
      best.best_move = move; 
     } 
     RetractMove(state, move); 

    } 

    return v; 

}

您只需在MiniMax函數的返回結構中選取best_move字段即可。

備註：
你必須承認，雖然這不像C++程序在許多方面，而是一個c程序。否則，在CapitalCamelCase中的所有函數都應該是類方法，您應該通過（const）ref而不是value來傳遞狀態 - 但是隻有當狀態實際上是typedef後面的指針時，整個代碼纔有意義。

來源

2012-11-23 05:46:09

...但cout和向量作爲C代碼沒有意義，只有C++。 – Mike

真實，更正，抱歉，+1。不過，我希望我們同意在這裏使用C++程序的不良做法。這段代碼介於兩者之間。它也開始在一些地方使用參考。 :) –

@BarnabasSzabolcs我很關心curValue> v，它是否正確放置在for循環中。 curValue未初始化，它不可能大於v，即迄今爲止獲得的最大值，例如+10。怎麼樣，我可以改變代碼，使'curValue'代表v = max（v，min_move（state，depth + 1，bestMove）），也是一種存儲和比較迄今爲止獲得的最佳值與curValue的方法。我在這裏有點模糊。 – motiur

您的代碼找到了正確的值，但通過向下傳遞相同的參考來覆蓋它。

int curValue = min_move(state,bestMove);

應該成爲

moveT nextMove; // No need to actually do anything with this value 
int curValue = min_move(state,nextMove);

你也需要做出同樣的一種變化在min_move功能。

注意：在min_move您的代碼調用max_move的參數多於您爲該函數定義的參數。

來源

2012-12-27 06:42:46

返回bestMove用於tictactoe的minimax算法

回答

相關問題