Disable logging messages for RL agent

During gameplay and training, the RL agent would print Q values for available moves as part of its move selection process. However, this information, mainly useful for debugging purposes, is not required for typical usage scenarios. To enhance readability and streamline output, this patch disables the logging output by default. The related messages will now only appear if the verbose option is explicitly enabled during compilation. This change reduces unnecessary verbosity during gameplay and training, improving overall user experience.
jserv · Mar 21, 2024 · a27cef0 · a27cef0
1 parent d327beb
commit a27cef0
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 0 deletions.
diff --git a/agents/reinforcement_learning.c b/agents/reinforcement_learning.c
@@ -7,6 +7,9 @@
 #include "reinforcement_learning.h"
 #include "util.h"
 
+// Uncomment it if you want to see the log output.
+// #define VERBOSE
+
 // TODO: Find a more efficient hash, we could not store 5x5 or larger board,
 // Since we could have 3^25 states and it might overflow.
 int table_to_hash(char *table)
@@ -62,11 +65,15 @@ int get_action_exploit(char *table, rl_agent_t *agent)
     float max_q = -FLT_MAX;
     float *state_value = agent->state_value;
     int candidate_count = 1;
+#ifdef VERBOSE
     printf("[ ");
+#endif
     for_each_empty_grid (i, table) {
         table[i] = agent->player;
         float new_q = state_value[table_to_hash(table)];
+#ifdef VERBOSE
         printf("%f ", new_q);
+#endif
         if (new_q == max_q) {
             ++candidate_count;
             if (rand() % candidate_count == 0) {
@@ -79,8 +86,10 @@ int get_action_exploit(char *table, rl_agent_t *agent)
         }
         table[i] = ' ';
     }
+#ifdef VERBOSE
     printf(" ]\n");
     printf("exploit %d\n", max_act);
+#endif
     return max_act;
 }
 

diff --git a/train.c b/train.c
@@ -28,6 +28,9 @@
 #define EPSILON_START 0.5
 #define EPSILON_END 0.001
 
+// Uncomment it if you want to see the log output.
+// #define VERBOSE
+
 #define RAND_UNIFORM ((float) rand() / (float) RAND_MAX)
 
 #if EPSILON_GREEDY
@@ -98,7 +101,9 @@ static int get_action_epsilon_greedy(char *table, rl_agent_t *agent)
     if (RAND_UNIFORM < epsilon) {  // explore
         int *available_moves = get_available_moves(table, &move_cnt);
         int act = available_moves[rand() % move_cnt];
+#ifdef VERBOSE
         printf("explore %d\n", act);
+#endif
         free(available_moves);
         return act;
     }
@@ -136,12 +141,16 @@ static void train(int iter)
     char win = ' ';
     while (1) {
         if (win == 'D') {
+#ifdef VERBOSE
             draw_board(table);
             printf("It is a draw!\n");
+#endif
             break;
         } else if (win != ' ') {
+#ifdef VERBOSE
             draw_board(table);
             printf("%c won!\n", win);
+#endif
             break;
         }
 #if EPSILON_GREEDY
@@ -156,7 +165,9 @@ static void train(int iter)
             (1 - REWARD_TRADEOFF) * get_score(table, agent[turn].player) +
             REWARD_TRADEOFF * calculate_win_value(win, agent[turn].player);
         ++episode_len;
+#ifdef VERBOSE
         draw_board(table);
+#endif
         turn = !turn;
     }
     turn = !turn;  // the player who makes the last move.