Skip to content

Commit

Permalink
Change pathfinding to use ego-centric ordering of player planes.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 430678310
Change-Id: I5d582008724ef8578d5dd915286b84f95b7bddda
  • Loading branch information
lanctot committed Feb 27, 2022
1 parent b4248ff commit f126e99
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 8 deletions.
31 changes: 28 additions & 3 deletions open_spiel/games/pathfinding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,23 @@ std::string PathfindingState::ToString() const {
return str;
}

int PathfindingState::PlayerPlaneIndex(int observing_player,
int actual_player) const {
// Need to add a num_players_ inside the brackets here because of how C++
// handles mod of negative values.
return (actual_player - observing_player + num_players_) % num_players_;
}

// Note: currently, the observations are current non-Markovian because the time
// step is not included and the horizon is finite.
std::string PathfindingState::ObservationString(int player) const {
SPIEL_CHECK_GE(player, 0);
SPIEL_CHECK_LT(player, num_players_);
return ToString();
}

// Note: currently, the observations are current non-Markovian because the time
// step is not included and the horizon is finite.
void PathfindingState::ObservationTensor(int player,
absl::Span<float> values) const {
SPIEL_CHECK_GE(player, 0);
Expand All @@ -450,11 +467,18 @@ void PathfindingState::ObservationTensor(int player,
// - Third n planes refer to player's destination position
// - 1 plane for wall
// - 1 plane for empty
//
// The first three sets of n planes corresponding to the players are each
// ordered ego-centrically:
// - the first plane is the observing player's plane, followed by the next
// player, followed by the next etc. so in a 4-player game, if player 2
// is the observing player, the planes would be ordered by player 2, 3, 0,
// 1.
for (int r = 0; r < grid_spec_.num_rows; ++r) {
for (int c = 0; c < grid_spec_.num_cols; ++c) {
// Player on the position.
if (grid_[r][c] >= 0 && grid_[r][c] < num_players_) {
view[{grid_[r][c], r, c}] = 1.0;
view[{PlayerPlaneIndex(player, grid_[r][c]), r, c}] = 1.0;
}

// Wall
Expand All @@ -472,8 +496,9 @@ void PathfindingState::ObservationTensor(int player,
for (Player p = 0; p < num_players_; ++p) {
const std::pair<int, int>& start_pos = grid_spec_.starting_positions[p];
const std::pair<int, int>& dest_pos = grid_spec_.destinations[p];
view[{num_players_ + p, start_pos.first, start_pos.second}] = 1.0;
view[{2 * num_players_ + p, dest_pos.first, dest_pos.second}] = 1.0;
int pidx = PlayerPlaneIndex(player, p);
view[{num_players_ + pidx, start_pos.first, start_pos.second}] = 1.0;
view[{2 * num_players_ + pidx, dest_pos.first, dest_pos.second}] = 1.0;
}
}

Expand Down
11 changes: 6 additions & 5 deletions open_spiel/games/pathfinding.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ namespace pathfinding {
// (default: 100.0).
// "step_reward" double The reward given to every agent on each per step
// (default: -0.01).
//
// Note: currently, the observations are current non-Markovian because the time
// step is not included and the horizon is finite. This can be easily added as
// an option if desired.

inline constexpr char kDefaultSingleAgentGrid[] =
"A.*..**\n"
Expand Down Expand Up @@ -131,11 +135,7 @@ class PathfindingState : public SimMoveState {
bool IsTerminal() const override;
std::vector<double> Rewards() const override;
std::vector<double> Returns() const override;
std::string ObservationString(int player) const override {
SPIEL_CHECK_GE(player, 0);
SPIEL_CHECK_LT(player, num_players_);
return ToString();
}
std::string ObservationString(int player) const override;
void ObservationTensor(int player, absl::Span<float> values) const override;
int CurrentPlayer() const override {
return IsTerminal() ? kTerminalPlayerId : cur_player_;
Expand Down Expand Up @@ -164,6 +164,7 @@ class PathfindingState : public SimMoveState {
Player PlayerAt(const std::pair<int, int>& coord) const;
int TryResolveContested();
bool AllPlayersOnDestinations() const;
int PlayerPlaneIndex(int observing_player, int actual_player) const;

const PathfindingGame& parent_game_;
const GridSpec& grid_spec_;
Expand Down

0 comments on commit f126e99

Please sign in to comment.