Skip to content

Commit

Permalink
Disable logging messages for RL agent
Browse files Browse the repository at this point in the history
During gameplay and training, the RL agent would print Q values for
available moves as part of its move selection process. However, this
information, mainly useful for debugging purposes, is not required for
typical usage scenarios. To enhance readability and streamline output,
this patch disables the logging output by default. The related messages
will now only appear if the verbose option is explicitly enabled during
compilation. This change reduces unnecessary verbosity during gameplay
and training, improving overall user experience.
  • Loading branch information
visitorckw committed Mar 21, 2024
1 parent d327beb commit a27cef0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
9 changes: 9 additions & 0 deletions agents/reinforcement_learning.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#include "reinforcement_learning.h"
#include "util.h"

// Uncomment it if you want to see the log output.
// #define VERBOSE

// TODO: Find a more efficient hash, we could not store 5x5 or larger board,
// Since we could have 3^25 states and it might overflow.
int table_to_hash(char *table)
Expand Down Expand Up @@ -62,11 +65,15 @@ int get_action_exploit(char *table, rl_agent_t *agent)
float max_q = -FLT_MAX;
float *state_value = agent->state_value;
int candidate_count = 1;
#ifdef VERBOSE
printf("[ ");
#endif
for_each_empty_grid (i, table) {
table[i] = agent->player;
float new_q = state_value[table_to_hash(table)];
#ifdef VERBOSE
printf("%f ", new_q);
#endif
if (new_q == max_q) {
++candidate_count;
if (rand() % candidate_count == 0) {
Expand All @@ -79,8 +86,10 @@ int get_action_exploit(char *table, rl_agent_t *agent)
}
table[i] = ' ';
}
#ifdef VERBOSE
printf(" ]\n");
printf("exploit %d\n", max_act);
#endif
return max_act;
}

Expand Down
11 changes: 11 additions & 0 deletions train.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
#define EPSILON_START 0.5
#define EPSILON_END 0.001

// Uncomment it if you want to see the log output.
// #define VERBOSE

#define RAND_UNIFORM ((float) rand() / (float) RAND_MAX)

#if EPSILON_GREEDY
Expand Down Expand Up @@ -98,7 +101,9 @@ static int get_action_epsilon_greedy(char *table, rl_agent_t *agent)
if (RAND_UNIFORM < epsilon) { // explore
int *available_moves = get_available_moves(table, &move_cnt);
int act = available_moves[rand() % move_cnt];
#ifdef VERBOSE
printf("explore %d\n", act);
#endif
free(available_moves);
return act;
}
Expand Down Expand Up @@ -136,12 +141,16 @@ static void train(int iter)
char win = ' ';
while (1) {
if (win == 'D') {
#ifdef VERBOSE
draw_board(table);
printf("It is a draw!\n");
#endif
break;
} else if (win != ' ') {
#ifdef VERBOSE
draw_board(table);
printf("%c won!\n", win);
#endif
break;
}
#if EPSILON_GREEDY
Expand All @@ -156,7 +165,9 @@ static void train(int iter)
(1 - REWARD_TRADEOFF) * get_score(table, agent[turn].player) +
REWARD_TRADEOFF * calculate_win_value(win, agent[turn].player);
++episode_len;
#ifdef VERBOSE
draw_board(table);
#endif
turn = !turn;
}
turn = !turn; // the player who makes the last move.
Expand Down

0 comments on commit a27cef0

Please sign in to comment.