Merge pull request #154 from itwasabhi/update_to_gymnasium

Switch to gymnasium in favor of openai gym
HumanCompatibleAI · Feb 1, 2025 · 8bbf72e · 8bbf72e
2 parents 3138131 + 06090f2
commit 8bbf72e
Show file tree

Hide file tree

Showing 9 changed files with 145 additions and 168 deletions.
diff --git a/.github/workflows/pythonlint.yml b/.github/workflows/pythonlint.yml
@@ -8,10 +8,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: '3.7' 
+        python-version: 3.8
         architecture: 'x64'
     - name: Install dependencies
       run: |

diff --git a/.github/workflows/pythontests.yml b/.github/workflows/pythontests.yml
@@ -21,10 +21,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: '3.7'
+        python-version: 3.8
         architecture: 'x64'
     - name: Install dependencies
       run: |

diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ $ ./run_tests.sh
 
 ⚠️**Be sure to change your CWD to the human_aware_rl directory before running the script, as the test script uses the CWD to dynamically generate a path to save temporary training runs/checkpoints. The testing script will fail if not being run from the correct directory.**
 
-This will run all tests belonging to the human_aware_rl module. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
+This will run all tests belonging to the human_aware_rl module. _These tests don't work anymore out of the box, due to package version issues_: if you fix them, feel free to make a PR. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
 
 If you're thinking of using the planning code extensively, you should run the full testing suite that verifies all of the Overcooked accessory tools (this can take 5-10 mins): 
 ```

diff --git a/setup.py b/setup.py
@@ -42,8 +42,7 @@
         "numpy",
         "scipy",
         "tqdm",
-        "gym",
-        "pettingzoo",
+        "gymnasium",
         "ipython",
         "pygame",
         "ipywidgets",

diff --git a/src/human_aware_rl/imitation/behavior_cloning_tf2.py b/src/human_aware_rl/imitation/behavior_cloning_tf2.py
@@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config):
         """
         RLLib compatible constructor for initializing a behavior cloning model
 
-        observation_space (gym.Space|tuple)     Shape of the featurized observations
-        action_space (gym.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
+        observation_space (gymnasium.Space|tuple)     Shape of the featurized observations
+        action_space (gymnasium.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
         config (dict)                           Dictionary of relavant bc params
             - model_dir (str)                   Path to pickled keras.Model used to map observations to action logits
             - stochastic (bool)                 Whether action should return logit argmax or sample over distribution
@@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config):
         self.context = self._create_execution_context()
 
     def _setup_shapes(self):
-        # This is here to make the class compatible with both tuples or gym.Space objs for the spaces
+        # This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces
         # Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars
         self.observation_shape = (
             self.observation_space

diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py
@@ -6,7 +6,7 @@
 from datetime import datetime
 
 import dill
-import gym
+import gymnasium
 import numpy as np
 import ray
 from ray.rllib.agents.ppo import PPOTrainer
@@ -32,8 +32,8 @@
     OvercookedGridworld,
 )
 
-action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
 timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
 
 
@@ -218,9 +218,13 @@ def _validate_schedule(self, schedule):
     def _setup_action_space(self, agents):
         action_sp = {}
         for agent in agents:
-            action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-        self.action_space = gym.spaces.Dict(action_sp)
-        self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+            action_sp[agent] = gymnasium.spaces.Discrete(
+                len(Action.ALL_ACTIONS)
+            )
+        self.action_space = gymnasium.spaces.Dict(action_sp)
+        self.shared_action_space = gymnasium.spaces.Discrete(
+            len(Action.ALL_ACTIONS)
+        )
 
     def _setup_observation_space(self, agents):
         dummy_state = self.base_env.mdp.get_standard_start_state()
@@ -232,7 +236,7 @@ def _setup_observation_space(self, agents):
 
         high = np.ones(obs_shape) * float("inf")
         low = np.ones(obs_shape) * 0
-        self.ppo_observation_space = gym.spaces.Box(
+        self.ppo_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
 
@@ -243,7 +247,7 @@ def _setup_observation_space(self, agents):
         obs_shape = featurize_fn_bc(dummy_state)[0].shape
         high = np.ones(obs_shape) * 100
         low = np.ones(obs_shape) * -100
-        self.bc_observation_space = gym.spaces.Box(
+        self.bc_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
         # hardcode mapping between action space and agent
@@ -253,7 +257,7 @@ def _setup_observation_space(self, agents):
                 ob_space[agent] = self.ppo_observation_space
             else:
                 ob_space[agent] = self.bc_observation_space
-        self.observation_space = gym.spaces.Dict(ob_space)
+        self.observation_space = gymnasium.spaces.Dict(ob_space)
 
     def _get_featurize_fn(self, agent_id):
         if agent_id.startswith("ppo"):

diff --git a/src/overcooked_ai_py/__init__.py b/src/overcooked_ai_py/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="Overcooked-v0",