-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathminimax.py
233 lines (203 loc) · 9.6 KB
/
minimax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from bot import Bot
import copy
from common import MINIMAX, EMPTY, ROW_COUNT, COLUMN_COUNT, WINDOW_LENGTH
import random
import math
class MiniMax(Bot):
"""
This class is responsible for the Minimax algorithm.
At each depth, the algorithm will simulate up to 7 boards, each having a piece that has been dropped in a free column. So with depth 1, we will have 7 boards to analyse, with depth 2 : 49 ,...
Through a system of reward each board will be attributed a score. The Minimax will then either try to minimise or maximise the rewards depending on the depth (odd or even). Indeed, because we are using multiple
depth, the minimax algorithm will simulate in alternance the possible moves of the current player and the ones of the adversary (creating Min nodes and max nodes). The player that needs to decide where to
drop a piece on the current board is considered as the maximising player, hence trying to maximise the reward when a max nodes is encountered. The algorithm will also consider that the adversary plays as good as possible (with
the information available with the depth chosen) and hence try to minimise the reward when possible (minimizing player).
So after creating all the boards of the tree, at each depth, a board will be selected based on the reward and on the type of nodes (min or max node) starting from the bottom of the tree.
The final choice is made based on the 7 boards possible with the score updated through the reward procedure describe above.
Note that the larger the depth, the slower the execution.
In order to avoid unnecessary exploration of boards, an alpha beta pruning has been implemented.
"""
def __init__(self, game, depth, pruning=True):
super().__init__(game, bot_type=MINIMAX, depth=depth, pruning=pruning)
def drop_piece(self, board, row, col, piece):
"""
Drop a piece in the board at the specified position
:param board: board with all the pieces that have been placed
:param col: one of the row of the board
:param col: one of the column of the board
:param piece: 1 or -1 depending on whose turn it is
"""
board[col][row] = piece
def get_next_open_row(self, board, col):
"""
Return the first row which does not have a piece in the specified column (col)
:param board: board with all the pieces that have been placed
:param col: one of the column of the board
:return: row number
"""
for r in range(ROW_COUNT):
if board[col][r] == 0:
return r
def winning_move(self, board, piece):
"""
Check if the game has been won
:param board: board with all the pieces that have been placed
:param piece: 1 or -1 depending on whose turn it is
"""
# Check horizontal locations for win
for c in range(COLUMN_COUNT - 3):
for r in range(ROW_COUNT):
if (
board[c][r] == piece
and board[c + 1][r] == piece
and board[c + 2][r] == piece
and board[c + 3][r] == piece
):
return True
# Check vertical locations for win
for c in range(COLUMN_COUNT):
for r in range(ROW_COUNT - 3):
if (
board[c][r] == piece
and board[c][r + 1] == piece
and board[c][r + 2] == piece
and board[c][r + 3] == piece
):
return True
# Check positively sloped diaganols
for c in range(COLUMN_COUNT - 3):
for r in range(ROW_COUNT - 3):
if (
board[c][r] == piece
and board[c + 1][r + 1] == piece
and board[c + 2][r + 2] == piece
and board[c + 3][r + 3] == piece
):
return True
# Check negatively sloped diaganols
for c in range(COLUMN_COUNT - 3):
for r in range(3, ROW_COUNT):
if (
board[c][r] == piece
and board[c + 1][r - 1] == piece
and board[c + 2][r - 2] == piece
and board[c + 3][r - 3] == piece
):
return True
return False
def is_terminal_node(self, board):
"""
Determines wheter the game is finished or not
:param board: board with all the pieces that have been placed
:return: boolean that determines wheter the game is finish or not
"""
return (
self.winning_move(board, self._game._turn * -1)
or self.winning_move(board, self._game._turn)
or self.get_valid_locations(board) is None
)
def evaluate_window(self, window, piece):
"""
Evaluates the score of a portion of the board
:param window: portion of the board with all the pieces that have been placed
:param piece: 1 or -1 depending on whose turn it is
:return: score of the window
"""
score = 0
opp_piece = self._game._turn * -1
if piece == self._game._turn * -1:
opp_piece = self._game._turn
if window.count(piece) == 4:
score += 100
elif window.count(piece) == 3 and window.count(EMPTY) == 1:
score += 5
elif window.count(piece) == 2 and window.count(EMPTY) == 2:
score += 2
if window.count(opp_piece) == 3 and window.count(EMPTY) == 1:
score -= 4
return score
def score_position(self, board, piece):
"""
Main function that handles the scoring mechanism.
Handle the score for the minimax algorithm, the score is computed independently of which piece has just been dropped. This is a global score that looks at the whole board
:param board: board with all the pieces that have been placed
:param piece: 1 or -1 depending on whose turn it is
:return: score of the board
"""
score = 0
# Score center column
center_array = [int(i) for i in list(board[COLUMN_COUNT // 2][:])]
center_count = center_array.count(piece)
score += center_count * 3
# Score Horizontal
for r in range(ROW_COUNT):
row_array = [int(i) for i in list(board[:][r])]
for c in range(COLUMN_COUNT - 3):
window = row_array[c : c + WINDOW_LENGTH]
score += self.evaluate_window(window, piece)
# Score Vertical
for c in range(COLUMN_COUNT):
col_array = [int(i) for i in list(board[c][:])]
for r in range(ROW_COUNT - 3):
window = col_array[r : r + WINDOW_LENGTH]
score += self.evaluate_window(window, piece)
# Score posiive sloped diagonal
for r in range(ROW_COUNT - 3):
for c in range(COLUMN_COUNT - 3):
window = [board[c + i][r + i] for i in range(WINDOW_LENGTH)]
score += self.evaluate_window(window, piece)
for r in range(ROW_COUNT - 3):
for c in range(COLUMN_COUNT - 3):
window = [board[c + i][r + 3 - i] for i in range(WINDOW_LENGTH)]
score += self.evaluate_window(window, piece)
return score
def minimax(self, board, depth, alpha, beta, maximizingPlayer, pruning):
"""
Main function of minimax, called whenever a move is needed.
Recursive function, depth of the recursion being determined by the parameter depth.
:param depth: number of iterations the Minimax algorith will run for
(the larger the depth the longer the algorithm takes)
:alpha: used for the pruning, correspond to the lowest value of the range values of the node
:beta: used for the pruning, correspond to the hihest value of the range values of the node
:maximizingPlayer: boolean to specify if the algorithm should maximize or minimize the reward
:pruning: boolean to specify if the algorithm uses the pruning
:return: column where to place the piece
"""
valid_locations = self.get_valid_locations(board)
is_terminal = self.is_terminal_node(board)
if depth == 0:
return (None, self.score_position(board, self._game._turn))
elif is_terminal:
if self.winning_move(board, self._game._turn):
return (None, math.inf)
elif self.winning_move(board, self._game._turn * -1):
return (None, -math.inf)
else: # Game is over, no more valid moves
return (None, 0)
column = valid_locations[0]
if maximizingPlayer:
value = -math.inf
turn = 1
else:
value = math.inf
turn = -1
for col in valid_locations:
row = self.get_next_open_row(board, col)
b_copy = copy.deepcopy(board)
self.drop_piece(b_copy, row, col, self._game._turn * turn)
new_score = self.minimax(
b_copy, depth - 1, alpha, beta, not maximizingPlayer, pruning
)[1]
if maximizingPlayer:
if new_score > value:
value = new_score
column = col
alpha = max(alpha, value)
else:
if new_score < value:
value = new_score
column = col
beta = min(beta, value)
if pruning:
if alpha >= beta:
break
return column, value