game_player/minimax.rs
1//! Minimax Game Tree Search Implementation Module
2//!
3//! This module implements a game tree search using min-max strategy, alpha-beta pruning, and a transposition table. The
4//! game-specific components are provided by the user using the traits defined in other modules.
5//!
6//! # Example
7//!
8//! ```rust,ignore
9//! use std::cell::RefCell;
10//! use std::rc::Rc;
11//! use crate::minimax::{search, ResponseGenerator};
12//! use crate::transposition_table::TranspositionTable;
13//!
14//! // Assuming you have implemented the required traits for your game
15//! let tt = Rc::new(RefCell::new(TranspositionTable::new(1000, 100)));
16//! let static_evaluator = MyStaticEvaluator::new();
17//! let response_generator = MyResponseGenerator::new();
18//! let initial_state = Rc::new(MyGameState::new());
19//!
20//! if let Some(best_move) = search(&tt, &static_evaluator, &response_generator, &initial_state, 6) {
21//! println!("Best move found: {:?}", best_move);
22//! }
23//! ```
24//!
25//! # Notes
26//! - The search assumes a two-player zero-sum game with perfect information.
27//! - The transposition table can be reused across multiple searches for efficiency and support of iterative deepening.
28
29use std::cell::RefCell;
30use std::rc::Rc;
31
32use crate::state::*;
33use crate::static_evaluator::*;
34use crate::transposition_table::*;
35
36static SEF_QUALITY: i16 = 0; // Quality of a value returned by the static evaluation function
37
38// Holds evaluation information about a response.
39struct Response<S> {
40 // Reference to the resulting state
41 state: Rc<S>,
42 // Value of the state
43 value: f32,
44 // Quality of the value. Quality is the number of plies searched to find the value.
45 quality: i16,
46}
47
48// Holds static information pertaining to the search.
49struct Context<'a, S: State> {
50 max_depth: i32,
51 rg: &'a dyn ResponseGenerator<State = S>,
52 sef: &'a dyn StaticEvaluator<S>,
53 tt: &'a Rc<RefCell<TranspositionTable>>,
54}
55/// Response generator function object trait.
56///
57/// This trait defines the interface for generating all possible responses from a given state. Implementers should provide
58/// game-specific logic for move generation.
59///
60/// # Examples
61/// ```rust,ignore
62/// use std::rc::Rc;
63/// use crate::minimax::ResponseGenerator;
64///
65/// struct MyResponseGenerator;
66///
67/// impl ResponseGenerator for MyResponseGenerator {
68/// type State = MyGameState;
69///
70/// fn generate(&self, state: &Rc<Self::State>, depth: i32) -> Vec<Box<Self::State>> {
71/// // Generate all valid moves for the current player
72/// let mut responses = Vec::new();
73///
74/// // Game-specific logic to generate moves
75/// for possible_move in get_all_valid_moves(state) {
76/// let new_state = state.apply_move(possible_move);
77/// responses.push(Box::new(new_state));
78/// }
79///
80/// responses
81/// }
82/// }
83/// ```
84///
85/// # Implementation Notes
86/// - Return an empty vector if no moves are available (player cannot respond)
87/// - If passing is allowed in the game, include a "pass" move as a valid response
88/// - The depth parameter can be used for depth-dependent move generation optimizations
89pub trait ResponseGenerator {
90 /// The type representing game states that this generator works with
91 type State: State;
92
93 /// Generates a list of all possible responses to the given state.
94 ///
95 /// This method should return all legal moves available to the current player in the given state. The implementation
96 /// should be game-specific and handle all rules and constraints of the particular game being played.
97 ///
98 /// # Arguments
99 /// * `state` - The current state to generate responses for
100 /// * `depth` - Current search depth (ply number), useful for optimizations
101 ///
102 /// # Returns
103 /// A vector of boxed game states representing all possible moves.
104 /// Returns an empty vector if no moves are available.
105 ///
106 /// # Examples
107 /// ```rust,ignore
108 /// let response_gen = MyResponseGenerator::new();
109 /// let current_state = Rc::new(MyGameState::new());
110 ///
111 /// let possible_moves = response_gen.generate(¤t_state, 0);
112 /// println!("Found {} possible moves", possible_moves.len());
113 /// ```
114 ///
115 /// # Note
116 /// The caller gains ownership of the returned states.
117 ///
118 /// # Note
119 /// Returning no responses indicates that the player cannot respond. It does not necessarily indicate that the game is
120 /// over or that the player has passed. If passing is allowed, then a "pass" state should be a valid response.
121 fn generate(&self, state: &Rc<Self::State>, depth: i32) -> Vec<Box<Self::State>>;
122}
123
124/// A minimax search implementation using alpha-beta pruning and a transposition table.
125///
126/// This function performs a complete minimax search to find the best move for the current player. It uses alpha-beta pruning for
127/// efficiency and a transposition table to avoid redundant calculations.
128///
129/// # Arguments
130/// * `tt` - A transposition table for caching previously computed positions. Can be reused across multiple searches.
131/// * `sef` - The static evaluation function
132/// * `rg` - The response generator
133/// * `s0` - The state to search from
134/// * `max_depth` - Maximum search depth in plies
135///
136/// # Returns
137/// `Some(Rc<S>)` containing the best move found, or `None` if no valid moves exist
138///
139/// # Examples
140///
141/// ```rust,ignore
142/// use std::cell::RefCell;
143/// use std::rc::Rc;
144/// use crate::minimax::search;
145/// use crate::transposition_table::TranspositionTable;
146///
147/// // Set up the search components
148/// let transposition_table = Rc::new(RefCell::new(TranspositionTable::new(1000, 100)));
149/// let evaluator = MyStaticEvaluator::new();
150/// let move_generator = MyResponseGenerator::new();
151/// let game_state = Rc::new(MyGameState::initial_position());
152///
153/// // Search to depth 6
154/// match search(&transposition_table, &evaluator, &move_generator, &game_state, 6) {
155/// Some(best_move) => println!("Best move: {:?}", best_move),
156/// None => println!("No moves available"),
157/// }
158/// ```
159///
160/// # Algorithm Details
161///
162/// The search uses:
163/// - **Minimax**: Alternating maximizing and minimizing players
164/// - **Alpha-beta pruning**: Early termination of unpromising branches
165/// - **Transposition table**: Caching of previously evaluated positions
166/// - **Move ordering**: Better moves searched first for more effective pruning
167pub fn search<S: State>(
168 tt: &Rc<RefCell<TranspositionTable>>,
169 sef: &impl StaticEvaluator<S>,
170 rg: &impl ResponseGenerator<State = S>,
171 s0: &Rc<S>,
172 max_depth: i32,
173) -> Option<Rc<S>> {
174 let context = Context { tt, sef, rg, max_depth };
175 if s0.whose_turn() == PlayerId::ALICE as u8 {
176 if let Some(response) = alice_search(&context, s0, -f32::INFINITY, f32::INFINITY, 0) {
177 return Some(Rc::clone(&response.state));
178 }
179 } else if let Some(response) = bob_search(&context, s0, -f32::INFINITY, f32::INFINITY, 0) {
180 return Some(Rc::clone(&response.state));
181 }
182 None
183}
184
185// Evaluates all of Alice's possible responses to the given state. The returned response is the one with the highest value.
186fn alice_search<S: State>(context: &Context<S>, state: &Rc<S>, mut alpha: f32, beta: f32, depth: i32) -> Option<Response<S>> {
187 // Depth of responses to this state
188 let response_depth = depth + 1;
189 // Quality of a response as a result of a search at this depth.
190 let search_quality = (context.max_depth - response_depth) as i16;
191
192 // Generate a list of the possible responses to this state by Alice. The responses are initialized with preliminary values.
193 let mut responses = generate_responses(context, state, depth);
194
195 // If there are no responses, return without a response. It's up to the caller to decide how to handle this case.
196 if responses.is_empty() {
197 return None;
198 }
199
200 // Sort from highest to lowest in order to increase the chance of triggering a beta cutoff earlier.
201 responses.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap_or(std::cmp::Ordering::Equal));
202
203 // Evaluate each of the responses and choose the one with the highest value
204 let mut best_state: Option<&Rc<S>> = None;
205 let mut best_value = -f32::INFINITY;
206 let mut best_quality = -1;
207 let mut pruned = false;
208
209 for response in &responses {
210 // Replace the preliminary value and quality of this response with the value and quality of Bob's subsequent response
211 // to it.
212 // The following conditions will cause the search to be skipped:
213 // 1. The preliminary value indicates a win for Alice.
214 // 2. The preliminary quality is more than the quality of a search. This can be a result of obtaining the preliminary
215 // value from the result of a previous search stored in the transposition table.
216 // 3. The search has reached its maximum depth.
217 let mut value = response.value;
218 let mut quality = response.quality;
219 if value < context.sef.alice_wins_value() && response_depth < context.max_depth && quality < search_quality {
220 // Update the value of Alice's response by evaluating Bob's responses to it. If Bob has no response, then
221 // leave the response's value and quality as is.
222 if let Some(bob_response) = bob_search(context, &response.state, alpha, beta, response_depth) {
223 value = bob_response.value;
224 quality = bob_response.quality;
225 }
226 }
227
228 // Determine if this response's value is the best so far. If so, then save the value and do alpha-beta pruning
229 if value > best_value {
230 // Save it
231 best_state = Some(&response.state);
232 best_value = value;
233 best_quality = quality;
234
235 // If Alice wins with this response, then there is no reason to look for anything better
236 if best_value >= context.sef.alice_wins_value() {
237 break;
238 }
239
240 // alpha-beta pruning (beta cutoff) Here's how it works:
241 //
242 // Bob is looking for the lowest value. The 'beta' is the value of Bob's best response found so far in the previous
243 // ply. If the value of this response is higher than the beta, then Bob will never choose a response leading to this
244 // response because the result is worse than the result of a response Bob has already found. As such, there is no
245 // reason to continue.
246 if best_value > beta {
247 // Beta cutoff
248 pruned = true;
249 break;
250 }
251
252 // alpha-beta pruning (alpha) Here's how it works:
253 //
254 // Alice is looking for the highest value. The 'alpha' is the value of Alice's best response found so far. If the
255 // value of this response is higher than the alpha, then it is a better response for Alice. The alpha is
256 // subsequently passed to Bob's search so that if Bob finds a response with a lower value than the alpha, then
257 // there is no reason to continue because Alice already has a better response and will choose it instead of allowing
258 // Bob to make a move with a lower value.
259 if best_value > alpha {
260 alpha = best_value;
261 }
262 }
263 }
264
265 assert!(best_value > -f32::INFINITY); // Sanity check
266 assert!(best_quality >= 0); // Sanity check
267 assert!(best_state.is_some()); // Sanity check
268
269 // Just in case
270 best_state.as_ref()?;
271
272 // At this point, the value of this state becomes the value of the best response to it, and the quality becomes its
273 // quality + 1.
274 //
275 // Save the value of this state in the T-table if the ply was not pruned. Pruning results in an incorrect value because the
276 // search was interrupted and potentially better responses were not considered.
277 if !pruned {
278 context
279 .tt
280 .borrow_mut()
281 .update(state.fingerprint(), best_value, best_quality + 1);
282 }
283
284 Some(Response::<S> {
285 state: Rc::clone(best_state?),
286 value: best_value,
287 quality: best_quality + 1,
288 })
289}
290
291// Evaluates all of Bob's possible responses to the given state. The returned response is the one with the lowest value.
292fn bob_search<S: State>(context: &Context<S>, state: &Rc<S>, alpha: f32, mut beta: f32, depth: i32) -> Option<Response<S>> {
293 // Depth of responses to this state
294 let response_depth = depth + 1;
295 // Quality of a response as a result of a search at this depth.
296 let search_quality = (context.max_depth - response_depth) as i16;
297
298 // Generate a list of the possible responses to this state by Bob. The responses are initialized with preliminary values.
299 let mut responses = generate_responses(context, state, depth);
300
301 // If there are no responses, return without a response. It's up to the caller to decide how to handle this case.
302 if responses.is_empty() {
303 return None;
304 }
305
306 // Sort from lowest to highest in order to increase the chance of triggering an alpha cutoff earlier
307 responses.sort_by(|a, b| a.value.partial_cmp(&b.value).unwrap_or(std::cmp::Ordering::Equal));
308
309 // Evaluate each of the responses and choose the one with the lowest value
310 let mut best_state: Option<&Rc<S>> = None;
311 let mut best_value = f32::INFINITY;
312 let mut best_quality = -1;
313 let mut pruned = false;
314
315 for response in &responses {
316 // Replace the preliminary value and quality of this response with the value and quality of Alice's subsequent response
317 // to it.
318 // The following conditions will cause the search to be skipped:
319 // 1. The preliminary value indicates a win for Bob.
320 // 2. The preliminary quality is more than the quality of a search. This can be a result of obtaining the preliminary
321 // value from the result of a previous search stored in the transposition table.
322 // 3. The search has reached its maximum depth.
323 let mut value = response.value;
324 let mut quality = response.quality;
325 if value > context.sef.bob_wins_value() && response_depth < context.max_depth && quality < search_quality {
326 // Update the value of Bob's response by evaluating Alice's responses to it. If Alice has no response, then
327 // leave the response's value and quality as is.
328 if let Some(alice_response) = alice_search(context, &response.state, alpha, beta, response_depth) {
329 value = alice_response.value;
330 quality = alice_response.quality;
331 }
332 }
333
334 // Determine if this response's value is the best so far. If so, then save the value and do alpha-beta pruning
335 if value < best_value {
336 // Save it
337 best_state = Some(&response.state);
338 best_value = value;
339 best_quality = quality;
340
341 // If Bob wins with this response, then there is no reason to look for anything better
342 if best_value <= context.sef.bob_wins_value() {
343 break;
344 }
345
346 // alpha-beta pruning (alpha cutoff) Here's how it works:
347 //
348 // Alice is looking for the highest value. The 'alpha' is the value of Alice's best move found so far in the previous
349 // ply. If the value of this response is lower than the alpha, then Alice will never choose a response leading to this
350 // response because the result is worse than the result of a response Alice has already found. As such, there is no
351 // reason to continue.
352
353 if best_value < alpha {
354 // Alpha cutoff
355 pruned = true;
356 break;
357 }
358
359 // alpha-beta pruning (beta) Here's how it works:
360 //
361 // Bob is looking for the lowest value. The 'beta' is the value of Bob's best response found so far. If the
362 // value of this response is lower than the beta, then it is a better response for Bob. The beta is subsequently passed
363 // to Alice's search so that if Alice finds a response with a higher value than the beta, then there is no reason to
364 // continue because Bob already has a better response and will choose it instead of allowing Alice to make a move with
365 // a higher value.
366 if best_value < beta {
367 beta = best_value;
368 }
369 }
370 }
371
372 assert!(best_value < f32::INFINITY); // Sanity check
373 assert!(best_quality >= 0); // Sanity check
374 assert!(best_state.is_some()); // Sanity check
375
376 // Just in case
377 best_state.as_ref()?;
378
379 // At this point, the value of this state becomes the value of the best response to it, and the quality becomes its
380 // quality + 1.
381 //
382 // Save the value of this state in the T-table if the ply was not pruned. Pruning results in an incorrect value because the
383 // search was interrupted and potentially better responses were not considered.
384 if !pruned {
385 context
386 .tt
387 .borrow_mut()
388 .update(state.fingerprint(), best_value, best_quality + 1);
389 }
390
391 Some(Response::<S> {
392 state: Rc::clone(best_state?),
393 value: best_value,
394 quality: best_quality + 1,
395 })
396}
397
398// Generates a list of responses to the given node
399fn generate_responses<S: State>(context: &Context<S>, state: &Rc<S>, depth: i32) -> Vec<Response<S>> {
400 // Handle the case where node.state might be None
401 let responses = context.rg.generate(state, depth);
402 responses
403 .into_iter()
404 .map(|state| {
405 let rc_state = Rc::from(state);
406 let (value, quality) = get_preliminary_value(context, &rc_state);
407 Response::<S> {
408 state: rc_state,
409 value,
410 quality,
411 }
412 })
413 .collect()
414}
415
416// Get a preliminary value of the state from the static evaluator or the transposition table
417fn get_preliminary_value<S: State>(context: &Context<S>, state: &Rc<S>) -> (f32, i16) {
418 // SEF optimization:
419 // Since any value of any state in the T-table has already been computed by search and/or SEF, it has a quality that is at
420 // least as good as the quality of the value returned by the SEF. So, if the state being evaluated is in the T-table, then
421 // the value in the T-table is used instead of running the SEF because T-table lookup is so much faster than the SEF.
422
423 // If it is in the T-table then use that value, otherwise evaluate the state and save the value.
424 let fingerprint = state.fingerprint();
425
426 // First, check if the value is in the transposition table
427 if let Some(cached_value) = context.tt.borrow_mut().check(fingerprint, -1) {
428 return cached_value;
429 }
430
431 // Value not in table, so evaluate with static evaluator and store result
432 let value = context.sef.evaluate(state);
433 context.tt.borrow_mut().update(fingerprint, value, SEF_QUALITY);
434 (value, SEF_QUALITY)
435}