Release v0.4.12 (#450)

* Fix history vehicles key error + not recreate SMARTS instance (#230) (#439) * Not recreate SMARTS instance during imitation learning * Fix bugs in imitation learning to run interaction dataset * Fixes to improve mini-city (#410) * Fixes to make the mini-city work nicely * Address comments * Remove all vehicle in SUMO when reset (#435) * Fix OpEn for large vehicle (#446) * Option for resetting a scenario without restart or reload SUMO (#441) * Remove all vehicle in SUMO when reset * Reset without restart or reload SUMO * Upgrade to ray 1 0 1 post1 With Fix (#420) * Upgrade ray version * update requirements * Fix rllib_agent * Add jupyter to requirements * Remove jupyter in setup * Remove aggressive mode args * Fix analysis crash * Fix mode param * Fix pandas collumn error * Add mode to get_best_logdir * Bump version 0.4.11->0.4.12 * Fix rllib tests * Fix minicity vehicle type generation * Address unneeded * and self in callback signatures Co-authored-by: Jeffer Jingfei Peng <[email protected]> Co-authored-by: Liam Chen <[email protected]> Co-authored-by: iman512003 <[email protected]>
huawei-noah · Jan 22, 2021 · 151c748 · 151c748
1 parent c4e17f4
commit 151c748
Show file tree

Hide file tree

Showing 20 changed files with 4,201 additions and 8,479 deletions.
diff --git a/examples/history_vehicles_replacement_for_imitation_learning.py b/examples/history_vehicles_replacement_for_imitation_learning.py
@@ -20,6 +20,12 @@ def act(self, obs):
 
 def main(scenarios, headless, seed):
     scenarios_iterator = Scenario.scenario_variations(scenarios, [])
+    smarts = SMARTS(
+        agent_interfaces={},
+        traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True),
+        envision=Envision(),
+    )
+
     for _ in scenarios:
         scenario = next(scenarios_iterator)
         agent_missions = scenario.discover_missions_of_traffic_histories()
@@ -33,14 +39,10 @@ def main(scenarios, headless, seed):
                 ),
                 agent_builder=KeepLaneAgent,
             )
-
             agent = agent_spec.build_agent()
 
-            smarts = SMARTS(
-                agent_interfaces={agent_id: agent_spec.interface},
-                traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True),
-                envision=Envision(),
-            )
+            smarts.switch_ego_agent({agent_id: agent_spec.interface})
+
             observations = smarts.reset(scenario)
 
             dones = {agent_id: False}
@@ -52,7 +54,7 @@ def main(scenarios, headless, seed):
                     {agent_id: agent_action}
                 )
 
-            smarts.destroy()
+    smarts.destroy()
 
 
 if __name__ == "__main__":

diff --git a/examples/rllib.py b/examples/rllib.py
@@ -1,12 +1,21 @@
 import argparse
+from datetime import timedelta
 import logging
 import multiprocessing
+from os import stat
 import random
 from pathlib import Path
+from typing import Dict
 
 import numpy as np
 from ray import tune
+from ray.rllib.env.base_env import BaseEnv
+from ray.rllib.evaluation.episode import MultiAgentEpisode
+from ray.rllib.evaluation.rollout_worker import RolloutWorker
+from ray.rllib.policy.policy import Policy
+from ray.rllib.utils.typing import PolicyID
 from ray.tune.schedulers import PopulationBasedTraining
+from ray.rllib.agents.callbacks import DefaultCallbacks
 
 import smarts
 from smarts.core.utils.file import copy_tree
@@ -18,28 +27,50 @@
 
 
 # Add custom metrics to your tensorboard using these callbacks
-# see: https://ray.readthedocs.io/en/latest/rllib-training.html#callbacks-and-custom-metrics
-def on_episode_start(info):
-    episode = info["episode"]
-    episode.user_data["ego_speed"] = []
+# See: https://ray.readthedocs.io/en/latest/rllib-training.html#callbacks-and-custom-metrics
+class Callbacks(DefaultCallbacks):
+    @staticmethod
+    def on_episode_start(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        policies: Dict[PolicyID, Policy],
+        episode: MultiAgentEpisode,
+        env_index: int,
+        **kwargs,
+    ):
 
+        episode.user_data["ego_speed"] = []
 
-def on_episode_step(info):
-    episode = info["episode"]
-    single_agent_id = list(episode._agent_to_last_obs)[0]
-    obs = episode.last_raw_obs_for(single_agent_id)
-    episode.user_data["ego_speed"].append(obs["speed"])
+    @staticmethod
+    def on_episode_step(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        episode: MultiAgentEpisode,
+        env_index: int,
+        **kwargs,
+    ):
 
+        single_agent_id = list(episode._agent_to_last_obs)[0]
+        obs = episode.last_raw_obs_for(single_agent_id)
+        episode.user_data["ego_speed"].append(obs["speed"])
 
-def on_episode_end(info):
-    episode = info["episode"]
-    mean_ego_speed = np.mean(episode.user_data["ego_speed"])
-    print(
-        f"ep. {episode.episode_id:<12} ended;"
-        f" length={episode.length:<6}"
-        f" mean_ego_speed={mean_ego_speed:.2f}"
-    )
-    episode.custom_metrics["mean_ego_speed"] = mean_ego_speed
+    @staticmethod
+    def on_episode_end(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        policies: Dict[PolicyID, Policy],
+        episode: MultiAgentEpisode,
+        env_index: int,
+        **kwargs,
+    ):
+
+        mean_ego_speed = np.mean(episode.user_data["ego_speed"])
+        print(
+            f"ep. {episode.episode_id:<12} ended;"
+            f" length={episode.length:<6}"
+            f" mean_ego_speed={mean_ego_speed:.2f}"
+        )
+        episode.custom_metrics["mean_ego_speed"] = mean_ego_speed
 
 
 def explore(config):
@@ -53,6 +84,8 @@ def main(
     scenario,
     headless,
     time_total_s,
+    rollout_fragment_length,
+    train_batch_size,
     seed,
     num_samples,
     num_agents,
@@ -62,17 +95,22 @@ def main(
     checkpoint_num,
     save_model_path,
 ):
+    assert train_batch_size > 0, f"{train_batch_size.__name__} cannot be less than 1."
+    if rollout_fragment_length > train_batch_size:
+        rollout_fragment_length = train_batch_size
+
     pbt = PopulationBasedTraining(
         time_attr="time_total_s",
         metric="episode_reward_mean",
         mode="max",
         perturbation_interval=300,
         resample_probability=0.25,
         # Specifies the mutations of these hyperparams
+        # See: `ray.rllib.agents.trainer.COMMON_CONFIG` for common hyperparams
         hyperparam_mutations={
             "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
-            "rollout_fragment_length": lambda: random.randint(128, 16384),
-            "train_batch_size": lambda: random.randint(2000, 160000),
+            "rollout_fragment_length": lambda: rollout_fragment_length,
+            "train_batch_size": lambda: train_batch_size,
         },
         # Specifies additional mutations after hyperparam_mutations is applied
         custom_explore_fn=explore,
@@ -104,11 +142,7 @@ def main(
             },
         },
         "multiagent": {"policies": rllib_policies},
-        "callbacks": {
-            "on_episode_start": on_episode_start,
-            "on_episode_step": on_episode_step,
-            "on_episode_end": on_episode_end,
-        },
+        "callbacks": Callbacks,
     }
 
     experiment_name = "rllib_example_multi"
@@ -139,7 +173,7 @@ def main(
 
     print(analysis.dataframe().head())
 
-    best_logdir = Path(analysis.get_best_logdir("episode_reward_max"))
+    best_logdir = Path(analysis.get_best_logdir("episode_reward_max", mode="max"))
     model_path = best_logdir / "model"
 
     copy_tree(str(model_path), save_model_path, overwrite=True)
@@ -165,11 +199,23 @@ def main(
         default=1,
         help="Number of times to sample from hyperparameter space",
     )
+    parser.add_argument(
+        "--rollout_fragment_length",
+        type=int,
+        default=200,
+        help="Episodes are divided into fragments of this many steps for each rollout. In this example this will be ensured to be `1=<rollout_fragment_length<=train_batch_size`",
+    )
+    parser.add_argument(
+        "--train_batch_size",
+        type=int,
+        default=2000,
+        help="The training batch size. This value must be > 0.",
+    )
     parser.add_argument(
         "--time_total_s",
         type=int,
         default=1 * 60 * 60,  # 1 hour
-        help="Total time in seconds to run the simulation for",
+        help="Total time in seconds to run the simulation for. This is a rough end time as it will be checked per training batch.",
     )
     parser.add_argument(
         "--seed",
@@ -214,6 +260,8 @@ def main(
         scenario=args.scenario,
         headless=args.headless,
         time_total_s=args.time_total_s,
+        rollout_fragment_length=args.rollout_fragment_length,
+        train_batch_size=args.train_batch_size,
         seed=args.seed,
         num_samples=args.num_samples,
         num_agents=args.num_agents,

diff --git a/examples/rllib_agent.py b/examples/rllib_agent.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from ray.rllib.models import ModelCatalog
-from ray.rllib.models.tf.fcnet_v2 import FullyConnectedNetwork
+from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
 from ray.rllib.utils import try_import_tf
 
 from smarts.core.agent_interface import AgentInterface, AgentType

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,7 @@
 absl-py==0.10.0
 aiohttp==3.6.3
+aiohttp-cors==0.7.0
+aioredis==1.3.1
 apipkg==1.5
 astor==0.8.1
 astunparse==1.6.3
@@ -8,12 +10,14 @@ atari-py==0.2.6
 attrs==20.2.0
 Automat==20.2.0
 beautifulsoup4==4.9.3
+blessings==1.7
 cachetools==4.1.1
 certifi==2020.6.20
 chardet==3.0.4
 click==7.1.2
 cloudpickle==1.3.0
 colorama==0.4.4
+colorful==0.5.4
 commonmark==0.9.1
 constantly==15.1.0
 coverage==5.3
@@ -27,12 +31,16 @@ filelock==3.0.12
 future==0.18.2
 gast==0.3.3
 google==3.0.0
+google-api-core==1.24.1
 google-auth==1.23.0
 google-auth-oauthlib==0.4.2
 google-pasta==0.2.0
+googleapis-common-protos==1.52.0
+gpustat==0.6.0
 grpcio==1.30.0
 gym==0.17.3
 h5py==2.10.0
+hiredis==1.1.0
 hyperlink==20.0.1
 idna==2.10
 imageio==2.9.0
@@ -56,9 +64,12 @@ msgpack==1.0.1
 multidict==4.7.6
 networkx==2.5
 numpy==1.18.0
+nvidia-ml-py3==7.352.0
 oauthlib==3.1.0
+opencensus==0.7.11
+opencensus-context==0.1.2
 opencv-python==4.4.0.44
-opencv-python-headless==4.4.0.46
+opencv-python-headless==4.3.0.36
 opt-einsum==3.3.0
 packaging==20.4
 panda3d==1.10.7
@@ -67,6 +78,7 @@ panda3d-simplepbr==0.7
 pandas==1.1.3
 Pillow==8.0.0
 pluggy==0.13.1
+prometheus-client==0.9.0
 protobuf==3.13.0
 psutil==5.7.2
 py==1.9.0
@@ -93,7 +105,7 @@ PyWavelets==1.1.1
 PyYAML==5.3.1
 pyzmq==19.0.2
 quadprog==0.1.7
-ray==0.8.6
+ray==1.0.1.post1
 redis==3.4.1
 requests==2.24.0
 requests-oauthlib==1.3.0