Skip to content

Commit

Permalink
Introduce ELO rating system
Browse files Browse the repository at this point in the history
Implement the ELO rating system to evaluate the relative strengths of
various agents based on their performance. The ELO rating system,
commonly utilized in assessing the skill levels of chess players,
offers a reliable method to gauge the competitive abilities of agents
in our context. This addition enhances our ability to make informed
decisions and comparisons within our system.
  • Loading branch information
visitorckw committed Mar 7, 2024
1 parent 4126718 commit 8a5f32c
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ train
*.bin
rl
mcts
elo
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ MCTS = mcts
RL_CFLAGS := $(CFLAGS) -D USE_RL
MCTS_CFLAGS := $(CFLAGS) -D USE_MCTS
MCTS_LDFLAGS := $(LDFLAGS) -lm
ELO = elo
ELO_CFLAGS := $(CFLAGS) -lm
ELO_LDFLAGS := $(LDFLAGS) -lm

GIT_HOOKS := .git/hooks/applied

Expand All @@ -27,6 +30,7 @@ deps := $(OBJS:%.o=%.d)
deps += $(RL).d
deps += $(TRAIN).d
deps += $(MCTS).d
deps += $(ELO).d

$(PROG): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
Expand All @@ -40,6 +44,9 @@ $(TRAIN): $(TRAIN).c agents/reinforcement_learning.c game.c
$(MCTS): main.c agents/mcts.c game.c
$(CC) -o $@ $^ $(MCTS_CFLAGS) $(MCTS_LDFLAGS)

$(ELO): $(ELO).c agents/negamax.c agents/mcts.c agents/reinforcement_learning.c game.c mt19937-64.c zobrist.c
$(CC) -o $@ $^ $(ELO_CFLAGS) $(ELO_LDFLAGS)

clean:
-$(RM) $(PROG) $(OBJS) $(deps) $(TRAIN) $(RL) $(MCTS)
-$(RM) $(PROG) $(OBJS) $(deps) $(TRAIN) $(RL) $(MCTS) $(ELO)
-$(RM) *.bin
132 changes: 132 additions & 0 deletions elo.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include <assert.h>
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "agents/mcts.h"
#include "agents/negamax.h"
#include "agents/reinforcement_learning.h"
#include "game.h"
#include "zobrist.h"

#define N_GAMES 100
#define ELO_INIT 1500
#define ELO_K 32

const char *agent_name[] = {"Negamax", "MCTS", "RL"};

long long int win[] = {0, 0, 0};
long long int draw[] = {0, 0, 0};
long long int lose[] = {0, 0, 0};

double elo_rating[] = {ELO_INIT, ELO_INIT, ELO_INIT};

static int play_game(int player1, int player2)
{
char table[N_GRIDS];
int player = player1;

memset(table, ' ', N_GRIDS);

printf("Start a game: %s v.s. %s\n", agent_name[player1],
agent_name[player2]);

rl_agent_t agent;
unsigned int state_num = 1;
CALC_STATE_NUM(state_num);
init_rl_agent(&agent, state_num,
'X' * (player1 == 2) + 'O' * (player2 == 2));
load_model(&agent, state_num, MODEL_NAME);

negamax_init();

while (1) {
char win = check_win(table);

if (win != ' ') {
zobrist_destroy_table();
free(agent.state_value);
if (win == 'D')
return 0;
return win == 'X' ? 1 : -1;
}

int move;

if (player == 0)
move = negamax_predict(table, player == player1 ? 'X' : 'O').move;
else if (player == 1)
move = mcts(table, player == player1 ? 'X' : 'O');
else
move = play_rl(table, &agent);

table[move] = player == player1 ? 'X' : 'O';
player ^= player1 ^ player2;
}

assert(false); // Unreachable
return 0;
}

static void dump_elo()
{
printf("%-10s | %-10s | %-10s | %-10s | %-10s\n", "Agent Name",
"Elo Rating", "Win", "Draw", "Lose");
printf("---------------------------------------------------------\n");

for (int i = 0; i < 3; ++i)
printf("%-10s | %-11.2f | %-10lld | %-10lld | %-10lld\n", agent_name[i],
elo_rating[i], win[i], draw[i], lose[i]);
}

int main()
{
srand(time(NULL));

for (int i = 0; i < N_GAMES; i++) {
printf("Running Game #%d\n", i + 1);
double sa, sb, ra, rb, ea, eb;
int player1 = rand() % 3;
int player2 = rand() % 3;

while (player1 == player2) {
player1 = rand() % 3;
player2 = rand() % 3;
}

ra = elo_rating[player1];
rb = elo_rating[player2];

int result = play_game(player1, player2);

if (result == 1) {
win[player1]++;
lose[player2]++;
sa = 1;
sb = 0;
} else if (result == 0) {
draw[player1]++;
draw[player2]++;
sa = 0.5;
sb = 0.5;
} else {
win[player2]++;
lose[player1]++;
sa = 0;
sb = 1;
}

ea = 1 / (1 + pow(10, (rb - ra) / 400));
eb = 1 / (1 + pow(10, (ra - rb) / 400));

elo_rating[player1] = ra + ELO_K * (sa - ea);
elo_rating[player2] = rb + ELO_K * (sb - eb);
}

dump_elo();

return 0;
}

0 comments on commit 8a5f32c

Please sign in to comment.