Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPO development #309

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# .env template

# Path for logs
LOG_FOLDER=

# Your HPC account code
NYU_HPC_ACCOUNT=

# NYU ID
USERNAME=

SINGULARITY_IMAGE=
OVERLAY_FILE=
10 changes: 8 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ data/raw/*
data/processed/validation/*
data/processed/testing/*
data/processed/training/*
data/formatted_json_v2_no_tl_valid/*
data/processed/sampled/*

# Logging
/wandb
Expand Down Expand Up @@ -189,7 +189,6 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
venv/
ENV/
Expand Down Expand Up @@ -239,4 +238,11 @@ pyrightconfig.json

*~

# Environment variables
# To be manually created using .env.template
.env

# Logs
examples/experiments/scripts/logs/*

# End of https://www.toptal.com/developers/gitignore/api/python,c++
25 changes: 12 additions & 13 deletions baselines/ippo/config/ippo_ff_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,33 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: "data/processed/examples"
data_dir: "data/processed/training"

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
normalize_obs: true
remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
use_lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "weighted_combination"
collision_weight: -0.035
off_road_weight: -0.035
reward_type: "weighted_combination" #"distance_to_logs"
collision_weight: -0.5
off_road_weight: -0.5
goal_achieved_weight: 1.0
dynamics_model: "classic"
collision_behavior: "ignore" # Options: "remove", "stop"
dist_to_goal_threshold: 3.0
dist_to_goal_threshold: 3.5
polyline_reduction_threshold: 0.2 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 60.0 # Visibility radius of the agents
wandb:
entity: ""
project: "gpudrive"
group: "my_group"
group: "paper_rl_scale"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

Expand All @@ -46,15 +46,15 @@ train:
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_scenes: true
resample_criterion: "global_step"
resample_dataset_size: 10_000 # Number of unique scenes to sample from
resample_interval: 5_000_000
resample_limit: 10000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely
resample_mode: "random" # Options: random
resample_limit: 1000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely

# # # PPO # # #
torch_deterministic: false
total_timesteps: 1_000_000_000
total_timesteps: 5_000_000_000
batch_size: 131_072
minibatch_size: 16_384
learning_rate: 3e-4
Expand All @@ -77,11 +77,10 @@ train:

# # # Rendering # # #
render: false # Determines whether to render the environment (note: will slow down training)
render_interval: 500 # Render every k iterations
render_interval: 20 # Render every k iterations
render_k_scenarios: 10 # Number of scenarios to render
render_simulator_state: true # Plot the simulator state from bird's eye view
render_agent_obs: false # Debugging tool, plot what an agent is seing
render_fps: 15 # Frames per second
render_format: "mp4" # Options: gif, mp4

vec:
Expand Down
109 changes: 103 additions & 6 deletions baselines/ippo/ippo_pufferlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,27 @@
"""

import os
from typing import Optional
from typing_extensions import Annotated
import yaml
from datetime import datetime
import torch
import wandb
from box import Box
from integrations.rl.puffer import ppo
from integrations.rl.puffer.puffer_env import env_creator
from integrations.rl.puffer.utils import Policy, LiDARPolicy
from integrations.rl.puffer.utils import Policy

import pufferlib
import pufferlib.vector
import pufferlib.frameworks.cleanrl
from rich.console import Console

import typer
from typer import Typer

app = Typer()


def load_config(config_path):
"""Load the configuration file."""
Expand All @@ -29,7 +36,18 @@ def load_config(config_path):
config = Box(yaml.safe_load(f))

datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S")
config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{str(config["environment"]["k_unique_scenes"])}__{datetime_}'

if config["train"]["resample_scenes"]:

if config["train"]["resample_scenes"]:
dataset_size = config["train"]["resample_dataset_size"]

config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__R_{dataset_size}__{datetime_}'
else:
dataset_size = str(config["environment"]["k_unique_scenes"])
config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{dataset_size}__{datetime_}'

config["environment"]["dataset_size"] = dataset_size
config["train"]["device"] = config["train"].get("device", "cpu") # Default to 'cpu' if not set
if torch.cuda.is_available():
config["train"]["device"] = "cuda" # Set to 'cuda' if available
Expand All @@ -42,7 +60,7 @@ def make_policy(env):
return pufferlib.frameworks.cleanrl.Policy(Policy(env))


def train(args):
def train(args, make_env):
"""Main training loop for the PPO agent."""
args.wandb = init_wandb(args, args.train.exp_id, id=args.train.exp_id)
args.train.__dict__.update(dict(args.wandb.config.train))
Expand Down Expand Up @@ -131,9 +149,84 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
wandb.agent(sweep_id, lambda: train(args), count=100)


if __name__ == "__main__":
@app.command()
def run(
config_path: Annotated[
str, typer.Argument(help="The path to the default configuration file")
] = "baselines/ippo/config/ippo_ff_puffer.yaml",
*,
# fmt: off
# Environment options
num_worlds: Annotated[Optional[int], typer.Option(help="Number of parallel envs")] = None,
k_unique_scenes: Annotated[Optional[int], typer.Option(help="The number of unique scenes to sample")] = None,
collision_weight: Annotated[Optional[float], typer.Option(help="The weight for collision penalty")] = None,
off_road_weight: Annotated[Optional[float], typer.Option(help="The weight for off-road penalty")] = None,
goal_achieved_weight: Annotated[Optional[float], typer.Option(help="The weight for goal-achieved reward")] = None,
dist_to_goal_threshold: Annotated[Optional[float], typer.Option(help="The distance threshold for goal-achieved")] = None,
sampling_seed: Annotated[Optional[int], typer.Option(help="The seed for sampling scenes")] = None,
obs_radius: Annotated[Optional[float], typer.Option(help="The radius for the observation")] = None,
# Train options
seed: Annotated[Optional[int], typer.Option(help="The seed for training")] = None,
learning_rate: Annotated[Optional[float], typer.Option(help="The learning rate for training")] = None,
resample_scenes: Annotated[Optional[int], typer.Option(help="Whether to resample scenes during training; 0 or 1")] = None,
resample_interval: Annotated[Optional[int], typer.Option(help="The interval for resampling scenes")] = None,
resample_dataset_size: Annotated[Optional[int], typer.Option(help="The size of the dataset to sample from")] = None,
total_timesteps: Annotated[Optional[int], typer.Option(help="The total number of training steps")] = None,
ent_coef: Annotated[Optional[float], typer.Option(help="Entropy coefficient")] = None,
update_epochs: Annotated[Optional[int], typer.Option(help="The number of epochs for updating the policy")] = None,
batch_size: Annotated[Optional[int], typer.Option(help="The batch size for training")] = None,
minibatch_size: Annotated[Optional[int], typer.Option(help="The minibatch size for training")] = None,
# Wandb logging options
project: Annotated[Optional[str], typer.Option(help="WandB project name")] = None,
entity: Annotated[Optional[str], typer.Option(help="WandB entity name")] = None,
group: Annotated[Optional[str], typer.Option(help="WandB group name")] = None,
):
"""Run PPO training with the given configuration."""
# fmt: on

config = load_config("baselines/ippo/config/ippo_ff_puffer.yaml")
# Load default configs
config = load_config(config_path)

# Override configs with command-line arguments
env_config = {
"num_worlds": num_worlds,
"k_unique_scenes": k_unique_scenes,
"collision_weight": collision_weight,
"off_road_weight": off_road_weight,
"goal_achieved_weight": goal_achieved_weight,
"dist_to_goal_threshold": dist_to_goal_threshold,
"sampling_seed": sampling_seed,
"obs_radius": obs_radius,
}
config.environment.update(
{k: v for k, v in env_config.items() if v is not None}
)
train_config = {
"seed": seed,
"learning_rate": learning_rate,
"resample_scenes": None
if resample_scenes is None
else bool(resample_scenes),
"resample_interval": resample_interval,
"resample_dataset_size": resample_dataset_size,
"total_timesteps": total_timesteps,
"ent_coef": ent_coef,
"update_epochs": update_epochs,
"batch_size": batch_size,
"minibatch_size": minibatch_size,
}
config.train.update(
{k: v for k, v in train_config.items() if v is not None}
)

wandb_config = {
"project": project,
"entity": entity,
"group": group,
}
config.wandb.update(
{k: v for k, v in wandb_config.items() if v is not None}
)

make_env = env_creator(
data_dir=config.data_dir,
Expand All @@ -143,4 +236,8 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
)

if config.mode == "train":
train(config)
train(config, make_env)


if __name__ == "__main__":
app()
20 changes: 0 additions & 20 deletions baselines/scripts/bash_exec_paper_fig.sh

This file was deleted.

4 changes: 0 additions & 4 deletions baselines/scripts/bash_exec_solve_n_scenes.sh

This file was deleted.

14 changes: 0 additions & 14 deletions baselines/scripts/sbatch_ippo.sh

This file was deleted.

17 changes: 0 additions & 17 deletions baselines/scripts/sbatch_paper_fig.sh

This file was deleted.

14 changes: 0 additions & 14 deletions baselines/scripts/sbatch_solve_n_scenes.sh

This file was deleted.

This file was deleted.

Binary file removed data/processed/waymax/scenario_ab2a72c63f8fd589.pkl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading