game_player/
minimax.rs

1//! Minimax Game Tree Search Implementation Module
2//!
3//! This module implements a game tree search using min-max strategy, alpha-beta pruning, and a transposition table. The
4//! game-specific components are provided by the user using the traits defined in other modules.
5//!
6//! # Example
7//!
8//! ```rust,ignore
9//! use std::cell::RefCell;
10//! use std::rc::Rc;
11//! use crate::minimax::{search, ResponseGenerator};
12//! use crate::transposition_table::TranspositionTable;
13//!
14//! // Assuming you have implemented the required traits for your game
15//! let tt = Rc::new(RefCell::new(TranspositionTable::new(1000, 100)));
16//! let static_evaluator = MyStaticEvaluator::new();
17//! let response_generator = MyResponseGenerator::new();
18//! let initial_state = Rc::new(MyGameState::new());
19//!
20//! if let Some(best_move) = search(&tt, &static_evaluator, &response_generator, &initial_state, 6) {
21//!     println!("Best move found: {:?}", best_move);
22//! }
23//! ```
24//!
25//! # Notes
26//! - The search assumes a two-player zero-sum game with perfect information.
27//! - The transposition table can be reused across multiple searches for efficiency and support of iterative deepening.
28
29use std::cell::RefCell;
30use std::rc::Rc;
31
32use crate::state::*;
33use crate::static_evaluator::*;
34use crate::transposition_table::*;
35
36static SEF_QUALITY: i16 = 0; // Quality of a value returned by the static evaluation function
37
38// Holds evaluation information about a response.
39struct Response<S> {
40    // Reference to the resulting state
41    state: Rc<S>,
42    // Value of the state
43    value: f32,
44    // Quality of the value. Quality is the number of plies searched to find the value.
45    quality: i16,
46}
47
48// Holds static information pertaining to the search.
49struct Context<'a, S: State> {
50    max_depth: i32,
51    rg: &'a dyn ResponseGenerator<State = S>,
52    sef: &'a dyn StaticEvaluator<S>,
53    tt: &'a Rc<RefCell<TranspositionTable>>,
54}
55/// Response generator function object trait.
56///
57/// This trait defines the interface for generating all possible responses from a given state. Implementers should provide
58/// game-specific logic for move generation.
59///
60/// # Examples
61/// ```rust,ignore
62/// use std::rc::Rc;
63/// use crate::minimax::ResponseGenerator;
64///
65/// struct MyResponseGenerator;
66///
67/// impl ResponseGenerator for MyResponseGenerator {
68///     type State = MyGameState;
69///
70///     fn generate(&self, state: &Rc<Self::State>, depth: i32) -> Vec<Box<Self::State>> {
71///         // Generate all valid moves for the current player
72///         let mut responses = Vec::new();
73///
74///         // Game-specific logic to generate moves
75///         for possible_move in get_all_valid_moves(state) {
76///             let new_state = state.apply_move(possible_move);
77///             responses.push(Box::new(new_state));
78///         }
79///
80///         responses
81///     }
82/// }
83/// ```
84///
85/// # Implementation Notes
86/// - Return an empty vector if no moves are available (player cannot respond)
87/// - If passing is allowed in the game, include a "pass" move as a valid response
88/// - The depth parameter can be used for depth-dependent move generation optimizations
89pub trait ResponseGenerator {
90    /// The type representing game states that this generator works with
91    type State: State;
92
93    /// Generates a list of all possible responses to the given state.
94    ///
95    /// This method should return all legal moves available to the current player in the given state. The implementation
96    /// should be game-specific and handle all rules and constraints of the particular game being played.
97    ///
98    /// # Arguments
99    /// * `state` - The current state to generate responses for
100    /// * `depth` - Current search depth (ply number), useful for optimizations
101    ///
102    /// # Returns
103    /// A vector of boxed game states representing all possible moves.
104    /// Returns an empty vector if no moves are available.
105    ///
106    /// # Examples
107    /// ```rust,ignore
108    /// let response_gen = MyResponseGenerator::new();
109    /// let current_state = Rc::new(MyGameState::new());
110    ///
111    /// let possible_moves = response_gen.generate(&current_state, 0);
112    /// println!("Found {} possible moves", possible_moves.len());
113    /// ```
114    ///
115    /// # Note
116    /// The caller gains ownership of the returned states.
117    ///
118    /// # Note
119    /// Returning no responses indicates that the player cannot respond. It does not necessarily indicate that the game is
120    /// over or that the player has passed. If passing is allowed, then a "pass" state should be a valid response.
121    fn generate(&self, state: &Rc<Self::State>, depth: i32) -> Vec<Box<Self::State>>;
122}
123
124/// A minimax search implementation using alpha-beta pruning and a transposition table.
125///
126/// This function performs a complete minimax search to find the best move for the current player. It uses alpha-beta pruning for
127/// efficiency and a transposition table to avoid redundant calculations.
128///
129/// # Arguments
130/// * `tt` - A transposition table for caching previously computed positions. Can be reused across multiple searches.
131/// * `sef` - The static evaluation function
132/// * `rg` - The response generator
133/// * `s0` - The state to search from
134/// * `max_depth` - Maximum search depth in plies
135///
136/// # Returns
137/// `Some(Rc<S>)` containing the best move found, or `None` if no valid moves exist
138///
139/// # Examples
140///
141/// ```rust,ignore
142/// use std::cell::RefCell;
143/// use std::rc::Rc;
144/// use crate::minimax::search;
145/// use crate::transposition_table::TranspositionTable;
146///
147/// // Set up the search components
148/// let transposition_table = Rc::new(RefCell::new(TranspositionTable::new(1000, 100)));
149/// let evaluator = MyStaticEvaluator::new();
150/// let move_generator = MyResponseGenerator::new();
151/// let game_state = Rc::new(MyGameState::initial_position());
152///
153/// // Search to depth 6
154/// match search(&transposition_table, &evaluator, &move_generator, &game_state, 6) {
155///     Some(best_move) => println!("Best move: {:?}", best_move),
156///     None => println!("No moves available"),
157/// }
158/// ```
159///
160/// # Algorithm Details
161///
162/// The search uses:
163/// - **Minimax**: Alternating maximizing and minimizing players
164/// - **Alpha-beta pruning**: Early termination of unpromising branches
165/// - **Transposition table**: Caching of previously evaluated positions
166/// - **Move ordering**: Better moves searched first for more effective pruning
167pub fn search<S: State>(
168    tt: &Rc<RefCell<TranspositionTable>>,
169    sef: &impl StaticEvaluator<S>,
170    rg: &impl ResponseGenerator<State = S>,
171    s0: &Rc<S>,
172    max_depth: i32,
173) -> Option<Rc<S>> {
174    let context = Context { tt, sef, rg, max_depth };
175    if s0.whose_turn() == PlayerId::ALICE as u8 {
176        if let Some(response) = alice_search(&context, s0, -f32::INFINITY, f32::INFINITY, 0) {
177            return Some(Rc::clone(&response.state));
178        }
179    } else if let Some(response) = bob_search(&context, s0, -f32::INFINITY, f32::INFINITY, 0) {
180        return Some(Rc::clone(&response.state));
181    }
182    None
183}
184
185// Evaluates all of Alice's possible responses to the given state. The returned response is the one with the highest value.
186fn alice_search<S: State>(context: &Context<S>, state: &Rc<S>, mut alpha: f32, beta: f32, depth: i32) -> Option<Response<S>> {
187    // Depth of responses to this state
188    let response_depth = depth + 1;
189    // Quality of a response as a result of a search at this depth.
190    let search_quality = (context.max_depth - response_depth) as i16;
191
192    // Generate a list of the possible responses to this state by Alice. The responses are initialized with preliminary values.
193    let mut responses = generate_responses(context, state, depth);
194
195    // If there are no responses, return without a response. It's up to the caller to decide how to handle this case.
196    if responses.is_empty() {
197        return None;
198    }
199
200    // Sort from highest to lowest in order to increase the chance of triggering a beta cutoff earlier.
201    responses.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap_or(std::cmp::Ordering::Equal));
202
203    // Evaluate each of the responses and choose the one with the highest value
204    let mut best_state: Option<&Rc<S>> = None;
205    let mut best_value = -f32::INFINITY;
206    let mut best_quality = -1;
207    let mut pruned = false;
208
209    for response in &responses {
210        // Replace the preliminary value and quality of this response with the value and quality of Bob's subsequent response
211        // to it.
212        // The following conditions will cause the search to be skipped:
213        // 1. The preliminary value indicates a win for Alice.
214        // 2. The preliminary quality is more than the quality of a search. This can be a result of obtaining the preliminary
215        //    value from the result of a previous search stored in the transposition table.
216        // 3. The search has reached its maximum depth.
217        let mut value = response.value;
218        let mut quality = response.quality;
219        if value < context.sef.alice_wins_value() && response_depth < context.max_depth && quality < search_quality {
220            // Update the value of Alice's response by evaluating Bob's responses to it. If Bob has no response, then
221            // leave the response's value and quality as is.
222            if let Some(bob_response) = bob_search(context, &response.state, alpha, beta, response_depth) {
223                value = bob_response.value;
224                quality = bob_response.quality;
225            }
226        }
227
228        // Determine if this response's value is the best so far. If so, then save the value and do alpha-beta pruning
229        if value > best_value {
230            // Save it
231            best_state = Some(&response.state);
232            best_value = value;
233            best_quality = quality;
234
235            // If Alice wins with this response, then there is no reason to look for anything better
236            if best_value >= context.sef.alice_wins_value() {
237                break;
238            }
239
240            // alpha-beta pruning (beta cutoff) Here's how it works:
241            //
242            // Bob is looking for the lowest value. The 'beta' is the value of Bob's best response found so far in the previous
243            // ply. If the value of this response is higher than the beta, then Bob will never choose a response leading to this
244            // response because the result is worse than the result of a response Bob has already found. As such, there is no
245            // reason to continue.
246            if best_value > beta {
247                // Beta cutoff
248                pruned = true;
249                break;
250            }
251
252            // alpha-beta pruning (alpha) Here's how it works:
253            //
254            // Alice is looking for the highest value. The 'alpha' is the value of Alice's best response found so far. If the
255            // value of this response is higher than the alpha, then it is a better response for Alice. The alpha is
256            // subsequently passed to Bob's search so that if Bob finds a response with a lower value than the alpha, then
257            // there is no reason to continue because Alice already has a better response and will choose it instead of allowing
258            // Bob to make a move with a lower value.
259            if best_value > alpha {
260                alpha = best_value;
261            }
262        }
263    }
264
265    assert!(best_value > -f32::INFINITY); // Sanity check
266    assert!(best_quality >= 0); // Sanity check
267    assert!(best_state.is_some()); // Sanity check
268
269    // Just in case
270    best_state.as_ref()?;
271
272    // At this point, the value of this state becomes the value of the best response to it, and the quality becomes its
273    // quality + 1.
274    //
275    // Save the value of this state in the T-table if the ply was not pruned. Pruning results in an incorrect value because the
276    // search was interrupted and potentially better responses were not considered.
277    if !pruned {
278        context
279            .tt
280            .borrow_mut()
281            .update(state.fingerprint(), best_value, best_quality + 1);
282    }
283
284    Some(Response::<S> {
285        state: Rc::clone(best_state?),
286        value: best_value,
287        quality: best_quality + 1,
288    })
289}
290
291// Evaluates all of Bob's possible responses to the given state. The returned response is the one with the lowest value.
292fn bob_search<S: State>(context: &Context<S>, state: &Rc<S>, alpha: f32, mut beta: f32, depth: i32) -> Option<Response<S>> {
293    // Depth of responses to this state
294    let response_depth = depth + 1;
295    // Quality of a response as a result of a search at this depth.
296    let search_quality = (context.max_depth - response_depth) as i16;
297
298    // Generate a list of the possible responses to this state by Bob. The responses are initialized with preliminary values.
299    let mut responses = generate_responses(context, state, depth);
300
301    // If there are no responses, return without a response. It's up to the caller to decide how to handle this case.
302    if responses.is_empty() {
303        return None;
304    }
305
306    // Sort from lowest to highest in order to increase the chance of triggering an alpha cutoff earlier
307    responses.sort_by(|a, b| a.value.partial_cmp(&b.value).unwrap_or(std::cmp::Ordering::Equal));
308
309    // Evaluate each of the responses and choose the one with the lowest value
310    let mut best_state: Option<&Rc<S>> = None;
311    let mut best_value = f32::INFINITY;
312    let mut best_quality = -1;
313    let mut pruned = false;
314
315    for response in &responses {
316        // Replace the preliminary value and quality of this response with the value and quality of Alice's subsequent response
317        // to it.
318        // The following conditions will cause the search to be skipped:
319        // 1. The preliminary value indicates a win for Bob.
320        // 2. The preliminary quality is more than the quality of a search. This can be a result of obtaining the preliminary
321        //    value from the result of a previous search stored in the transposition table.
322        // 3. The search has reached its maximum depth.
323        let mut value = response.value;
324        let mut quality = response.quality;
325        if value > context.sef.bob_wins_value() && response_depth < context.max_depth && quality < search_quality {
326            // Update the value of Bob's response by evaluating Alice's responses to it. If Alice has no response, then
327            // leave the response's value and quality as is.
328            if let Some(alice_response) = alice_search(context, &response.state, alpha, beta, response_depth) {
329                value = alice_response.value;
330                quality = alice_response.quality;
331            }
332        }
333
334        // Determine if this response's value is the best so far. If so, then save the value and do alpha-beta pruning
335        if value < best_value {
336            // Save it
337            best_state = Some(&response.state);
338            best_value = value;
339            best_quality = quality;
340
341            // If Bob wins with this response, then there is no reason to look for anything better
342            if best_value <= context.sef.bob_wins_value() {
343                break;
344            }
345
346            // alpha-beta pruning (alpha cutoff) Here's how it works:
347            //
348            // Alice is looking for the highest value. The 'alpha' is the value of Alice's best move found so far in the previous
349            // ply. If the value of this response is lower than the alpha, then Alice will never choose a response leading to this
350            // response because the result is worse than the result of a response Alice has already found. As such, there is no
351            // reason to continue.
352
353            if best_value < alpha {
354                // Alpha cutoff
355                pruned = true;
356                break;
357            }
358
359            // alpha-beta pruning (beta) Here's how it works:
360            //
361            // Bob is looking for the lowest value. The 'beta' is the value of Bob's best response found so far. If the
362            // value of this response is lower than the beta, then it is a better response for Bob. The beta is subsequently passed
363            // to Alice's search so that if Alice finds a response with a higher value than the beta, then there is no reason to
364            // continue because Bob already has a better response and will choose it instead of allowing Alice to make a move with
365            // a higher value.
366            if best_value < beta {
367                beta = best_value;
368            }
369        }
370    }
371
372    assert!(best_value < f32::INFINITY); // Sanity check
373    assert!(best_quality >= 0); // Sanity check
374    assert!(best_state.is_some()); // Sanity check
375
376    // Just in case
377    best_state.as_ref()?;
378
379    // At this point, the value of this state becomes the value of the best response to it, and the quality becomes its
380    // quality + 1.
381    //
382    // Save the value of this state in the T-table if the ply was not pruned. Pruning results in an incorrect value because the
383    // search was interrupted and potentially better responses were not considered.
384    if !pruned {
385        context
386            .tt
387            .borrow_mut()
388            .update(state.fingerprint(), best_value, best_quality + 1);
389    }
390
391    Some(Response::<S> {
392        state: Rc::clone(best_state?),
393        value: best_value,
394        quality: best_quality + 1,
395    })
396}
397
398// Generates a list of responses to the given node
399fn generate_responses<S: State>(context: &Context<S>, state: &Rc<S>, depth: i32) -> Vec<Response<S>> {
400    // Handle the case where node.state might be None
401    let responses = context.rg.generate(state, depth);
402    responses
403        .into_iter()
404        .map(|state| {
405            let rc_state = Rc::from(state);
406            let (value, quality) = get_preliminary_value(context, &rc_state);
407            Response::<S> {
408                state: rc_state,
409                value,
410                quality,
411            }
412        })
413        .collect()
414}
415
416// Get a preliminary value of the state from the static evaluator or the transposition table
417fn get_preliminary_value<S: State>(context: &Context<S>, state: &Rc<S>) -> (f32, i16) {
418    // SEF optimization:
419    // Since any value of any state in the T-table has already been computed by search and/or SEF, it has a quality that is at
420    // least as good as the quality of the value returned by the SEF. So, if the state being evaluated is in the T-table, then
421    // the value in the T-table is used instead of running the SEF because T-table lookup is so much faster than the SEF.
422
423    // If it is in the T-table then use that value, otherwise evaluate the state and save the value.
424    let fingerprint = state.fingerprint();
425
426    // First, check if the value is in the transposition table
427    if let Some(cached_value) = context.tt.borrow_mut().check(fingerprint, -1) {
428        return cached_value;
429    }
430
431    // Value not in table, so evaluate with static evaluator and store result
432    let value = context.sef.evaluate(state);
433    context.tt.borrow_mut().update(fingerprint, value, SEF_QUALITY);
434    (value, SEF_QUALITY)
435}