Change pathfinding to use ego-centric ordering of player planes.

PiperOrigin-RevId: 430678310 Change-Id: I5d582008724ef8578d5dd915286b84f95b7bddda
google-deepmind · Feb 27, 2022 · f126e99 · f126e99
1 parent b4248ff
commit f126e99
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 8 deletions.
diff --git a/open_spiel/games/pathfinding.cc b/open_spiel/games/pathfinding.cc
@@ -433,6 +433,23 @@ std::string PathfindingState::ToString() const {
   return str;
 }
 
+int PathfindingState::PlayerPlaneIndex(int observing_player,
+                                       int actual_player) const {
+  // Need to add a num_players_ inside the brackets here because of how C++
+  // handles mod of negative values.
+  return (actual_player - observing_player + num_players_) % num_players_;
+}
+
+// Note: currently, the observations are current non-Markovian because the time
+// step is not included and the horizon is finite.
+std::string PathfindingState::ObservationString(int player) const {
+  SPIEL_CHECK_GE(player, 0);
+  SPIEL_CHECK_LT(player, num_players_);
+  return ToString();
+}
+
+// Note: currently, the observations are current non-Markovian because the time
+// step is not included and the horizon is finite.
 void PathfindingState::ObservationTensor(int player,
                                          absl::Span<float> values) const {
   SPIEL_CHECK_GE(player, 0);
@@ -450,11 +467,18 @@ void PathfindingState::ObservationTensor(int player,
   //   - Third n planes refer to player's destination position
   //   - 1 plane for wall
   //   - 1 plane for empty
+  //
+  // The first three sets of n planes corresponding to the players are each
+  // ordered ego-centrically:
+  //   - the first plane is the observing player's plane, followed by the next
+  //     player, followed by the next etc. so in a 4-player game, if player 2
+  //     is the observing player, the planes would be ordered by player 2, 3, 0,
+  //     1.
   for (int r = 0; r < grid_spec_.num_rows; ++r) {
     for (int c = 0; c < grid_spec_.num_cols; ++c) {
       // Player on the position.
       if (grid_[r][c] >= 0 && grid_[r][c] < num_players_) {
-        view[{grid_[r][c], r, c}] = 1.0;
+        view[{PlayerPlaneIndex(player, grid_[r][c]), r, c}] = 1.0;
       }
 
       // Wall
@@ -472,8 +496,9 @@ void PathfindingState::ObservationTensor(int player,
   for (Player p = 0; p < num_players_; ++p) {
     const std::pair<int, int>& start_pos = grid_spec_.starting_positions[p];
     const std::pair<int, int>& dest_pos = grid_spec_.destinations[p];
-    view[{num_players_ + p, start_pos.first, start_pos.second}] = 1.0;
-    view[{2 * num_players_ + p, dest_pos.first, dest_pos.second}] = 1.0;
+    int pidx = PlayerPlaneIndex(player, p);
+    view[{num_players_ + pidx, start_pos.first, start_pos.second}] = 1.0;
+    view[{2 * num_players_ + pidx, dest_pos.first, dest_pos.second}] = 1.0;
   }
 }
 

diff --git a/open_spiel/games/pathfinding.h b/open_spiel/games/pathfinding.h
@@ -45,6 +45,10 @@ namespace pathfinding {
 //                          (default: 100.0).
 //   "step_reward"  double  The reward given to every agent on each per step
 //                          (default: -0.01).
+//
+// Note: currently, the observations are current non-Markovian because the time
+// step is not included and the horizon is finite. This can be easily added as
+// an option if desired.
 
 inline constexpr char kDefaultSingleAgentGrid[] =
     "A.*..**\n"
@@ -131,11 +135,7 @@ class PathfindingState : public SimMoveState {
   bool IsTerminal() const override;
   std::vector<double> Rewards() const override;
   std::vector<double> Returns() const override;
-  std::string ObservationString(int player) const override {
-    SPIEL_CHECK_GE(player, 0);
-    SPIEL_CHECK_LT(player, num_players_);
-    return ToString();
-  }
+  std::string ObservationString(int player) const override;
   void ObservationTensor(int player, absl::Span<float> values) const override;
   int CurrentPlayer() const override {
     return IsTerminal() ? kTerminalPlayerId : cur_player_;
@@ -164,6 +164,7 @@ class PathfindingState : public SimMoveState {
   Player PlayerAt(const std::pair<int, int>& coord) const;
   int TryResolveContested();
   bool AllPlayersOnDestinations() const;
+  int PlayerPlaneIndex(int observing_player, int actual_player) const;
 
   const PathfindingGame& parent_game_;
   const GridSpec& grid_spec_;