Skip to content

Commit

Permalink
Add plotting script to analyze training logs
Browse files Browse the repository at this point in the history
  • Loading branch information
Akram authored and Akram committed Aug 7, 2024
1 parent e66ae2f commit 93d2b39
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 22 deletions.
75 changes: 53 additions & 22 deletions webots/controllers/RL_Supervisor/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
################################################################################
# Imports
################################################################################

import csv
import os
import struct
import numpy as np # pylint: disable=import-error
import tensorflow as tf # pylint: disable=import-error
Expand Down Expand Up @@ -65,7 +66,7 @@
]
MAX_SENSOR_VALUE = 1000
MIN_STD_DEV = 0.1 # Minimum standard deviation
STD_DEV_FACTOR = 0.3995 # Discounter standard deviation factor
STD_DEV_FACTOR = 0.99995 # Discounter standard deviation factor

################################################################################
# Classes
Expand Down Expand Up @@ -110,6 +111,9 @@ def __init__(
self.state = "IDLE"
self.data_sent = True
self.unsent_data = []
self.critic_loss_history = []
self.actor_loss_history = []
self.reward_history = []

def set_train_mode(self):
"""Set the Agent mode to train mode."""
Expand Down Expand Up @@ -155,30 +159,33 @@ def predict_action(self, state):
# Calculation of probabilities by the Actor neural network
probs = self.__neural_network.actor_network(state)

# Create a normal distribution with the calculated probabilities and the standard deviation
dist = tfp.distributions.Normal(probs, self.__std_dev)
if self.train_mode is True:
# Create a normal distribution with the calculated probabilities and the standard deviation
dist = tfp.distributions.Normal(probs, self.__std_dev)

# Sampling an action from the normal distribution
sampled_action = dist.sample()
# Sampling an action from the normal distribution
sampled_action = dist.sample()

# Apply the Tanh transformation to the sampled action
transformed_action = tf.tanh(sampled_action)
# Apply the Tanh transformation to the sampled action
transformed_action = tf.tanh(sampled_action)

# Calculation of the logarithm of the probability density of the sampled action
log_prob = dist.log_prob(sampled_action)
# Calculation of the logarithm of the probability density of the sampled action
log_prob = dist.log_prob(sampled_action)

# Calculation of the Jacobian determinant for the Tanh transformation
jacobian_log_det = tf.math.log(1 - tf.square(transformed_action) + 1e-6)
# Calculation of the Jacobian determinant for the Tanh transformation
jacobian_log_det = tf.math.log(1 - tf.square(transformed_action) + 1e-6)

# Calculation of Adjusted probabilities by the neural network
adjusted_log_prob = log_prob - jacobian_log_det
# Calculation of Adjusted probabilities by the neural network
adjusted_log_prob = log_prob - jacobian_log_det

# calculate the estimated value of a state, which is determined by the Critic network
value = self.__neural_network.critic_network(state)
# calculate the estimated value of a state, which is determined by the Critic network
value = self.__neural_network.critic_network(state)

self.action = transformed_action.numpy()[0]
self.value = value.numpy()[0]
self.adjusted_log_prob = adjusted_log_prob.numpy()[0]
self.action = transformed_action.numpy()[0]
self.value = value.numpy()[0]
self.adjusted_log_prob = adjusted_log_prob.numpy()[0]
else:
self.action = probs.numpy()[0]

return self.action

Expand Down Expand Up @@ -419,18 +426,42 @@ def learn(self, states, actions, old_probs, values, rewards, dones):
with tf.GradientTape() as tape:

critic_value = self.__neural_network.critic_network(states)
critic_value = tf.squeeze(critic_value, 1)
returns = advantages + values

# Generate loss
critic_loss = tf.keras.losses.MSE(critic_value, returns)
critic_loss = tf.math.reduce_mean(tf.math.pow(returns - critic_value, 2))

# calculate gradient
critic_params = self.__neural_network.critic_network.trainable_variables
critic_grads = tape.gradient(critic_loss, critic_params)
self.__neural_network.critic_optimizer.apply_gradients(
zip(critic_grads, critic_params)
)
self.actor_loss_history.append(actor_loss.numpy())
self.critic_loss_history.append(critic_loss.numpy())
self.reward_history.append(sum(rewards))

# saving logs in a CSV file
self.save_logs_to_csv()

def save_logs_to_csv(self):
"""Function for saving logs in a CSV file"""

# Ensure the directory exists
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "training_logs.csv")

with open(log_file, mode="w", encoding="utf-8", newline="") as file:
writer = csv.writer(file)
writer.writerow(["Actor Loss", "Critic Loss", "Reward"])
for indx, reward in enumerate(self.reward_history):
writer.writerow(
[
self.actor_loss_history[indx],
self.critic_loss_history[indx],
reward,
]
)

def perform_training(self):
"""Runs the training process."""
Expand Down
47 changes: 47 additions & 0 deletions webots/controllers/RL_Supervisor/plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
""" Plotting script with Matplotlib """

# Imports
import matplotlib.pyplot as plt
import pandas as pd

# Define the path to the CSV file
LOG_FILE = "logs/training_logs.csv"

# Read the CSV file
data = pd.read_csv(LOG_FILE)

# Define length
data["Mini Batch"] = range(1, len(data) + 1)

# Plotting Actor Loss
plt.figure(figsize=(10, 5))
plt.plot(data["Mini Batch"], data["Actor Loss"], label="Actor Loss")
plt.xlabel("Mini Batch")
plt.ylabel("Loss")
plt.title("Actor Loss Over Mini Batches")
plt.legend()
plt.grid(True)
plt.savefig("logs/actor_loss_plot.png")
plt.show()

# Plotting Critic Loss
plt.figure(figsize=(10, 5))
plt.plot(data["Mini Batch"], data["Critic Loss"], label="Critic Loss")
plt.xlabel("Mini Batch")
plt.ylabel("Loss")
plt.title("Critic Loss Over Mini Batches")
plt.legend()
plt.grid(True)
plt.savefig("logs/critic_loss_plot.png")
plt.show()

# Plotting Total Rewards
plt.figure(figsize=(10, 5))
plt.plot(data["Mini Batch"], data["Reward"], label="Reward")
plt.xlabel("Mini Batch")
plt.ylabel("Total Reward")
plt.title("Total Rewards Over Mini Batches")
plt.legend()
plt.grid(True)
plt.savefig("logs/total_rewards_plot.png")
plt.show()

0 comments on commit 93d2b39

Please sign in to comment.