Emerge-Lab · daphne-cornelisse · Dec 6, 2024 · Dec 7, 2024 · Dec 9, 2024 · Dec 20, 2024
diff --git a/.env.template b/.env.template
@@ -0,0 +1,13 @@
+# .env template
+
+# Path for logs
+LOG_FOLDER=
+
+# Your HPC account code
+NYU_HPC_ACCOUNT=
+
+# NYU ID
+USERNAME=
+
+SINGULARITY_IMAGE=
+OVERLAY_FILE=
diff --git a/.gitignore b/.gitignore
@@ -25,7 +25,7 @@ data/raw/*
 data/processed/validation/*
 data/processed/testing/*
 data/processed/training/*
-data/formatted_json_v2_no_tl_valid/*
+data/processed/sampled/*
 
 # Logging
 /wandb
@@ -189,7 +189,6 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
 .venv
 venv/
 ENV/
@@ -239,4 +238,11 @@ pyrightconfig.json
 
 *~
 
+# Environment variables
+# To be manually created using .env.template
+.env
+
+# Logs
+examples/experiments/scripts/logs/*
+
 # End of https://www.toptal.com/developers/gitignore/api/python,c++
diff --git a/baselines/ippo/config/ippo_ff_puffer.yaml b/baselines/ippo/config/ippo_ff_puffer.yaml
@@ -2,33 +2,33 @@ mode: "train"
 use_rnn: false
 eval_model_path: null
 baseline: false
-data_dir: "data/processed/examples"
+data_dir: "data/processed/training"
 
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
   num_worlds: 100 # Number of parallel environments
   k_unique_scenes: 100 # Number of unique scenes to sample from
-  max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true
   normalize_obs: true
   remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
   use_lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
-  reward_type: "weighted_combination"
-  collision_weight: -0.035
-  off_road_weight: -0.035
+  reward_type: "weighted_combination" #"distance_to_logs"
+  collision_weight: -0.5
+  off_road_weight: -0.5
   goal_achieved_weight: 1.0
   dynamics_model: "classic"
   collision_behavior: "ignore" # Options: "remove", "stop"
-  dist_to_goal_threshold: 3.0
+  dist_to_goal_threshold: 3.5
   polyline_reduction_threshold: 0.2 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
   sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
   obs_radius: 60.0 # Visibility radius of the agents
 wandb:
   entity: ""
   project: "gpudrive"
-  group: "my_group"
+  group: "paper_rl_scale"
   mode: "online" # Options: online, offline, disabled
   tags: ["ppo", "ff"]
 
@@ -46,15 +46,15 @@ train:
   compile_mode: "reduce-overhead"
 
   # # # Data sampling # # #
-  resample_scenes: false
+  resample_scenes: true
   resample_criterion: "global_step"
+  resample_dataset_size: 10_000 # Number of unique scenes to sample from
   resample_interval: 5_000_000
-  resample_limit: 10000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely
-  resample_mode: "random" # Options: random
+  resample_limit: 1000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely
 
   # # # PPO # # #
   torch_deterministic: false
-  total_timesteps: 1_000_000_000
+  total_timesteps: 5_000_000_000
   batch_size: 131_072
   minibatch_size: 16_384
   learning_rate: 3e-4
@@ -77,11 +77,10 @@ train:
 
   # # # Rendering # # #
   render: false # Determines whether to render the environment (note: will slow down training)
-  render_interval: 500 # Render every k iterations
+  render_interval: 20 # Render every k iterations
   render_k_scenarios: 10 # Number of scenarios to render
   render_simulator_state: true # Plot the simulator state from bird's eye view
   render_agent_obs: false # Debugging tool, plot what an agent is seing
-  render_fps: 15 # Frames per second
   render_format: "mp4" # Options: gif, mp4
 
 vec:

diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py
@@ -7,20 +7,27 @@
 """
 
 import os
+from typing import Optional
+from typing_extensions import Annotated
 import yaml
 from datetime import datetime
 import torch
 import wandb
 from box import Box
 from integrations.rl.puffer import ppo
 from integrations.rl.puffer.puffer_env import env_creator
-from integrations.rl.puffer.utils import Policy, LiDARPolicy
+from integrations.rl.puffer.utils import Policy
 
 import pufferlib
 import pufferlib.vector
 import pufferlib.frameworks.cleanrl
 from rich.console import Console
 
+import typer
+from typer import Typer
+
+app = Typer()
+
 
 def load_config(config_path):
     """Load the configuration file."""
@@ -29,7 +36,18 @@ def load_config(config_path):
         config = Box(yaml.safe_load(f))
 
     datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S")
-    config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{str(config["environment"]["k_unique_scenes"])}__{datetime_}'
+
+    if config["train"]["resample_scenes"]:
+
+        if config["train"]["resample_scenes"]:
+            dataset_size = config["train"]["resample_dataset_size"]
+
+        config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__R_{dataset_size}__{datetime_}'
+    else:
+        dataset_size = str(config["environment"]["k_unique_scenes"])
+        config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{dataset_size}__{datetime_}'
+
+    config["environment"]["dataset_size"] = dataset_size
     config["train"]["device"] = config["train"].get("device", "cpu")  # Default to 'cpu' if not set
     if torch.cuda.is_available():
         config["train"]["device"] = "cuda"  # Set to 'cuda' if available
@@ -42,7 +60,7 @@ def make_policy(env):
     return pufferlib.frameworks.cleanrl.Policy(Policy(env))
 
 
-def train(args):
+def train(args, make_env):
     """Main training loop for the PPO agent."""
     args.wandb = init_wandb(args, args.train.exp_id, id=args.train.exp_id)
     args.train.__dict__.update(dict(args.wandb.config.train))
@@ -131,9 +149,84 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
     wandb.agent(sweep_id, lambda: train(args), count=100)
 
 
-if __name__ == "__main__":
+@app.command()
+def run(
+    config_path: Annotated[
+        str, typer.Argument(help="The path to the default configuration file")
+    ] = "baselines/ippo/config/ippo_ff_puffer.yaml",
+    *,
+    # fmt: off
+    # Environment options
+    num_worlds: Annotated[Optional[int], typer.Option(help="Number of parallel envs")] = None,
+    k_unique_scenes: Annotated[Optional[int], typer.Option(help="The number of unique scenes to sample")] = None,
+    collision_weight: Annotated[Optional[float], typer.Option(help="The weight for collision penalty")] = None,
+    off_road_weight: Annotated[Optional[float], typer.Option(help="The weight for off-road penalty")] = None,
+    goal_achieved_weight: Annotated[Optional[float], typer.Option(help="The weight for goal-achieved reward")] = None,
+    dist_to_goal_threshold: Annotated[Optional[float], typer.Option(help="The distance threshold for goal-achieved")] = None,
+    sampling_seed: Annotated[Optional[int], typer.Option(help="The seed for sampling scenes")] = None,
+    obs_radius: Annotated[Optional[float], typer.Option(help="The radius for the observation")] = None,
+    # Train options
+    seed: Annotated[Optional[int], typer.Option(help="The seed for training")] = None,
+    learning_rate: Annotated[Optional[float], typer.Option(help="The learning rate for training")] = None,
+    resample_scenes: Annotated[Optional[int], typer.Option(help="Whether to resample scenes during training; 0 or 1")] = None,
+    resample_interval: Annotated[Optional[int], typer.Option(help="The interval for resampling scenes")] = None,
+    resample_dataset_size: Annotated[Optional[int], typer.Option(help="The size of the dataset to sample from")] = None,
+    total_timesteps: Annotated[Optional[int], typer.Option(help="The total number of training steps")] = None,
+    ent_coef: Annotated[Optional[float], typer.Option(help="Entropy coefficient")] = None,
+    update_epochs: Annotated[Optional[int], typer.Option(help="The number of epochs for updating the policy")] = None,
+    batch_size: Annotated[Optional[int], typer.Option(help="The batch size for training")] = None,
+    minibatch_size: Annotated[Optional[int], typer.Option(help="The minibatch size for training")] = None,
+    # Wandb logging options
+    project: Annotated[Optional[str], typer.Option(help="WandB project name")] = None,
+    entity: Annotated[Optional[str], typer.Option(help="WandB entity name")] = None,
+    group: Annotated[Optional[str], typer.Option(help="WandB group name")] = None,
+):
+    """Run PPO training with the given configuration."""
+    # fmt: on
 
-    config = load_config("baselines/ippo/config/ippo_ff_puffer.yaml")
+    # Load default configs
+    config = load_config(config_path)
+
+    # Override configs with command-line arguments
+    env_config = {
+        "num_worlds": num_worlds,
+        "k_unique_scenes": k_unique_scenes,
+        "collision_weight": collision_weight,
+        "off_road_weight": off_road_weight,
+        "goal_achieved_weight": goal_achieved_weight,
+        "dist_to_goal_threshold": dist_to_goal_threshold,
+        "sampling_seed": sampling_seed,
+        "obs_radius": obs_radius,
+    }
+    config.environment.update(
+        {k: v for k, v in env_config.items() if v is not None}
+    )
+    train_config = {
+        "seed": seed,
+        "learning_rate": learning_rate,
+        "resample_scenes": None
+        if resample_scenes is None
+        else bool(resample_scenes),
+        "resample_interval": resample_interval,
+        "resample_dataset_size": resample_dataset_size,
+        "total_timesteps": total_timesteps,
+        "ent_coef": ent_coef,
+        "update_epochs": update_epochs,
+        "batch_size": batch_size,
+        "minibatch_size": minibatch_size,
+    }
+    config.train.update(
+        {k: v for k, v in train_config.items() if v is not None}
+    )
+
+    wandb_config = {
+        "project": project,
+        "entity": entity,
+        "group": group,
+    }
+    config.wandb.update(
+        {k: v for k, v in wandb_config.items() if v is not None}
+    )
 
     make_env = env_creator(
         data_dir=config.data_dir,
@@ -143,4 +236,8 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
     )
 
     if config.mode == "train":
-        train(config)
+        train(config, make_env)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/baselines/scripts/bash_exec_paper_fig.sh b/baselines/scripts/bash_exec_paper_fig.sh
diff --git a/baselines/scripts/bash_exec_solve_n_scenes.sh b/baselines/scripts/bash_exec_solve_n_scenes.sh
diff --git a/baselines/scripts/sbatch_ippo.sh b/baselines/scripts/sbatch_ippo.sh
diff --git a/baselines/scripts/sbatch_paper_fig.sh b/baselines/scripts/sbatch_paper_fig.sh
diff --git a/baselines/scripts/sbatch_solve_n_scenes.sh b/baselines/scripts/sbatch_solve_n_scenes.sh
diff --git a/data/processed/debug/tfrecord-00005-of-00150_2bd577a009790706.json b/data/processed/debug/tfrecord-00005-of-00150_2bd577a009790706.json
diff --git a/data/processed/waymax/scenario_ab2a72c63f8fd589.pkl b/data/processed/waymax/scenario_ab2a72c63f8fd589.pkl
diff --git a/data/processed/waymax/waymax_scenario_11671609ebfa3185.pkl b/data/processed/waymax/waymax_scenario_11671609ebfa3185.pkl
diff --git a/data/processed/waymax/waymax_scenario_ab2a72c63f8fd589.pkl b/data/processed/waymax/waymax_scenario_ab2a72c63f8fd589.pkl