From aae2c1e37b9978846876b44171bdaca5afa495e7 Mon Sep 17 00:00:00 2001 From: Marc Lanctot Date: Fri, 20 Dec 2024 14:48:22 +0000 Subject: [PATCH] Change the string representations of Hex to support standard notation (and set it as the default). Keep support for the old string representation (under game parameter string_rep=explicit) for backwards compatibility, if needed. PiperOrigin-RevId: 708308838 Change-Id: I3e3d39b9c08a4d76da05e71f8eca38e6b4de2146 --- open_spiel/games/dark_hex/dark_hex.cc | 7 +- open_spiel/games/hex/hex.cc | 87 +++++++++++-- open_spiel/games/hex/hex.h | 36 ++++-- open_spiel/games/hex/hex_test.cc | 5 + .../dark_hex(num_rows=5,num_cols=3).txt | 120 +++++++++--------- .../dark_hex_ir(board_size=3).txt | 54 ++++---- .../dark_hex_reveal_turn_long.txt | 70 +++++----- .../playthroughs/hex(board_size=5).txt | 106 ++++++++-------- 8 files changed, 285 insertions(+), 200 deletions(-) diff --git a/open_spiel/games/dark_hex/dark_hex.cc b/open_spiel/games/dark_hex/dark_hex.cc index 9bdf28b04c..f36df74e50 100644 --- a/open_spiel/games/dark_hex/dark_hex.cc +++ b/open_spiel/games/dark_hex/dark_hex.cc @@ -107,7 +107,7 @@ DarkHexState::DarkHexState(std::shared_ptr game, int num_cols, int num_rows, GameVersion game_version, ObservationType obs_type) : State(game), - state_(game, num_cols, num_rows), + state_(game, num_cols, num_rows, hex::StringRep::kStandard), obs_type_(obs_type), game_version_(game_version), num_cols_(num_cols), @@ -145,7 +145,7 @@ void DarkHexState::DoApplyAction(Action move) { } } - SPIEL_CHECK_EQ(cur_view[move], CellState::kEmpty); + SPIEL_CHECK_TRUE(cur_view[move] == CellState::kEmpty); // Update the view - only using CellState::kBlack and CellState::kWhite if (state_.BoardAt(move) == CellState::kBlack || state_.BoardAt(move) == CellState::kBlackNorth || @@ -185,7 +185,8 @@ std::string DarkHexState::ViewToString(Player player) const { for (int r = 0; r < num_rows_; ++r) { for (int c = 0; c < num_cols_; ++c) { - absl::StrAppend(&str, StateToString(cur_view[r * num_cols_ + c])); + absl::StrAppend( + &str, StateToString(cur_view[r * num_cols_ + c], state_.StringRep())); } if (r < (num_rows_ - 1)) { absl::StrAppend(&str, "\n"); diff --git a/open_spiel/games/hex/hex.cc b/open_spiel/games/hex/hex.cc index 1bdabcfbbd..41630f143a 100644 --- a/open_spiel/games/hex/hex.cc +++ b/open_spiel/games/hex/hex.cc @@ -14,11 +14,15 @@ #include "open_spiel/games/hex/hex.h" -#include #include -#include +#include #include +#include "open_spiel/abseil-cpp/absl/strings/str_cat.h" +#include "open_spiel/game_parameters.h" +#include "open_spiel/observer.h" +#include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" #include "open_spiel/utils/tensor_view.h" namespace open_spiel { @@ -44,6 +48,7 @@ const GameType kGameType{/*short_name=*/"hex", {"board_size", GameParameter(kDefaultBoardSize)}, {"num_cols", GameParameter(kDefaultBoardSize)}, {"num_rows", GameParameter(kDefaultBoardSize)}, + {"string_rep", GameParameter(kDefaultStringRep)}, }}; std::shared_ptr Factory(const GameParameters& params) { @@ -54,6 +59,16 @@ REGISTER_SPIEL_GAME(kGameType, Factory); RegisterSingleTensorObserver single_tensor(kGameType.short_name); +StringRep StringRepStrToEnum(const std::string& string_rep) { + if (string_rep == "standard") { + return StringRep::kStandard; + } else if (string_rep == "explicit") { + return StringRep::kExplicit; + } else { + SpielFatalError(absl::StrCat("Invalid string_rep ", string_rep)); + } +} + } // namespace CellState PlayerToState(Player player) { @@ -133,7 +148,27 @@ CellState HexState::PlayerAndActionToState(Player player, Action move) const { } } -std::string StateToString(CellState state) { +std::string StateToStringStandard(CellState state) { + switch (state) { + case CellState::kEmpty: + return "."; + case CellState::kWhite: + case CellState::kWhiteWin: + case CellState::kWhiteWest: + case CellState::kWhiteEast: + return "o"; + case CellState::kBlack: + case CellState::kBlackWin: + case CellState::kBlackNorth: + case CellState::kBlackSouth: + return "x"; + default: + SpielFatalError("Unknown state."); + return "This will never return."; + } +} + +std::string StateToStringExplicit(CellState state) { switch (state) { case CellState::kEmpty: return "."; @@ -159,8 +194,18 @@ std::string StateToString(CellState state) { } } +std::string StateToString(CellState state, StringRep string_rep) { + if (string_rep == StringRep::kExplicit) { + return StateToStringExplicit(state); + } else if (string_rep == StringRep::kStandard) { + return StateToStringStandard(state); + } else { + SpielFatalError("Unknown string_rep."); + } +} + void HexState::DoApplyAction(Action move) { - SPIEL_CHECK_EQ(board_[move], CellState::kEmpty); + SPIEL_CHECK_TRUE(board_[move] == CellState::kEmpty); CellState move_cell_state = PlayerAndActionToState(CurrentPlayer(), move); board_[move] = move_cell_state; if (move_cell_state == CellState::kBlackWin) { @@ -208,11 +253,21 @@ std::vector HexState::LegalActions() const { } std::string HexState::ActionToString(Player player, Action action_id) const { - // This does not comply with the Hex Text Protocol - // TODO(author8): Make compliant with HTP - return absl::StrCat(StateToString(PlayerAndActionToState(player, action_id)), - "(", action_id % num_cols_, ",", action_id / num_cols_, - ")"); + int row = action_id % num_cols_; + int col = action_id / num_cols_; + if (StringRep() == StringRep::kStandard) { + char row_char = static_cast(static_cast('a') + row); + std::string row_str; + row_str += row_char; + std::string ret = absl::StrCat(row_str, col + 1); + return ret; + } else if (StringRep() == StringRep::kExplicit) { + return absl::StrCat( + StateToString(PlayerAndActionToState(player, action_id), StringRep()), + "(", row, ",", col, ")"); + } else { + SpielFatalError("Unknown string_rep."); + } } std::vector HexState::AdjacentCells(int cell) const { @@ -230,8 +285,12 @@ std::vector HexState::AdjacentCells(int cell) const { return neighbours; } -HexState::HexState(std::shared_ptr game, int num_cols, int num_rows) - : State(game), num_cols_(num_cols), num_rows_(num_rows) { +HexState::HexState(std::shared_ptr game, int num_cols, int num_rows, + enum StringRep string_rep) + : State(game), + num_cols_(num_cols), + num_rows_(num_rows), + string_rep_(string_rep) { // for all num_colss & num_rowss -> num_colss_ >= num_rowss_ board_.resize(num_cols * num_rows, CellState::kEmpty); } @@ -249,7 +308,7 @@ std::string HexState::ToString() const { line_num++; absl::StrAppend(&str, std::string(line_num, ' ')); } - absl::StrAppend(&str, StateToString(board_[cell])); + absl::StrAppend(&str, StateToString(board_[cell], string_rep_)); absl::StrAppend(&str, " "); } return str; @@ -296,7 +355,9 @@ HexGame::HexGame(const GameParameters& params) num_cols_( ParameterValue("num_cols", ParameterValue("board_size"))), num_rows_( - ParameterValue("num_rows", ParameterValue("board_size"))) {} + ParameterValue("num_rows", ParameterValue("board_size"))), + string_rep_(StringRepStrToEnum( + ParameterValue("string_rep", kDefaultStringRep))) {} } // namespace hex } // namespace open_spiel diff --git a/open_spiel/games/hex/hex.h b/open_spiel/games/hex/hex.h index 55ad4a8967..4319a65968 100644 --- a/open_spiel/games/hex/hex.h +++ b/open_spiel/games/hex/hex.h @@ -15,13 +15,16 @@ #ifndef OPEN_SPIEL_GAMES_HEX_H_ #define OPEN_SPIEL_GAMES_HEX_H_ -#include -#include #include +#include #include #include +#include "open_spiel/abseil-cpp/absl/types/optional.h" +#include "open_spiel/game_parameters.h" #include "open_spiel/spiel.h" +#include "open_spiel/spiel_globals.h" +#include "open_spiel/spiel_utils.h" // The classic game of Hex: https://en.wikipedia.org/wiki/Hex_(board_game) // Does not implement pie rule to balance the game @@ -30,6 +33,9 @@ // "board_size" int size of the board (default = 11) // "num_cols" int number of columns (optional) // "num_rows" int number of rows (optional) +// "string_rep" string representation of the action and board strings +// ("standard" (default) | "explicit"). See below +// for details. namespace open_spiel { namespace hex { @@ -41,6 +47,8 @@ inline constexpr int kMaxNeighbours = 6; // Maximum number of neighbours for a cell inline constexpr int kCellStates = 1 + 4 * kNumPlayers; inline constexpr int kMinValueCellState = -4; +inline constexpr const char* kDefaultStringRep = "standard"; + // State of a cell. // Describes if a cell is // - empty, black or white @@ -62,10 +70,19 @@ enum class CellState { kBlack = 1, // Black and not edge connected }; +// The string representations of the game. Standard uses normal stones and +// chess-like action coordinates ('a1'). Explicit uses different stones +// depending on the state of each stone and uses the full cell coordinates. +enum class StringRep { + kStandard = 0, + kExplicit = 1, +}; + // State of an in-play game. class HexState : public State { public: - HexState(std::shared_ptr game, int num_cols, int num_rows); + HexState(std::shared_ptr game, int num_cols, int num_rows, + StringRep string_rep); HexState(const HexState&) = default; @@ -85,6 +102,7 @@ class HexState : public State { CellState BoardAt(int cell) const { return board_[cell]; } void ChangePlayer() { current_player_ = current_player_ == 0 ? 1 : 0; } + StringRep StringRep() const { return string_rep_; } protected: std::vector board_; @@ -92,12 +110,14 @@ class HexState : public State { private: CellState PlayerAndActionToState(Player player, Action move) const; + Player current_player_ = 0; // Player zero goes first double result_black_perspective_ = 0; // 1 if Black (player 0) wins std::vector AdjacentCells(int cell) const; // Cells adjacent to cell const int num_cols_; // x const int num_rows_; // y + const enum StringRep string_rep_; }; // Game object. @@ -107,7 +127,7 @@ class HexGame : public Game { int NumDistinctActions() const override { return num_cols_ * num_rows_; } std::unique_ptr NewInitialState() const override { return std::unique_ptr( - new HexState(shared_from_this(), num_cols_, num_rows_)); + new HexState(shared_from_this(), num_cols_, num_rows_, string_rep_)); } int NumPlayers() const override { return kNumPlayers; } double MinUtility() const override { return -1; } @@ -117,18 +137,16 @@ class HexGame : public Game { return {kCellStates, num_cols_, num_rows_}; } int MaxGameLength() const override { return num_cols_ * num_rows_; } + StringRep string_rep() const { return string_rep_; } private: const int num_cols_; const int num_rows_; + const enum StringRep string_rep_; }; CellState PlayerToState(Player player); -std::string StateToString(CellState state); - -inline std::ostream& operator<<(std::ostream& stream, const CellState& state) { - return stream << StateToString(state); -} +std::string StateToString(CellState state, StringRep string_rep); } // namespace hex } // namespace open_spiel diff --git a/open_spiel/games/hex/hex_test.cc b/open_spiel/games/hex/hex_test.cc index 36f2ba5225..3de26acc08 100644 --- a/open_spiel/games/hex/hex_test.cc +++ b/open_spiel/games/hex/hex_test.cc @@ -12,7 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include + +#include "open_spiel/game_parameters.h" #include "open_spiel/spiel.h" +#include "open_spiel/spiel_utils.h" #include "open_spiel/tests/basic_tests.h" namespace open_spiel { diff --git a/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt b/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt index 08a9aeba15..296c3d74e3 100644 --- a/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt +++ b/open_spiel/integration_tests/playthroughs/dark_hex(num_rows=5,num_cols=3).txt @@ -56,13 +56,13 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "x(0,1)", "x(1,1)", "x(2,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(0,3)", "x(1,3)", "x(2,3)", "z(0,4)", "z(1,4)", "z(2,4)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] -# Apply action "y(0,0)" +# Apply action "a1" action: 0 # State 1 -# y . . +# x . . # . . . # . . . # . . . @@ -84,17 +84,17 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)", "p(0,3)", "o(1,3)", "q(2,3)", "p(0,4)", "o(1,4)", "q(2,4)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] -# Apply action "q(2,4)" +# Apply action "c5" action: 14 # State 2 -# y . . +# x . . # . . . # . . . # . . . -# . . q +# . . o IsTerminal() = False History() = [0, 14] HistoryString() = "0, 14" @@ -112,17 +112,17 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] -StringLegalActions() = ["y(1,0)", "y(2,0)", "y(0,1)", "x(1,1)", "x(2,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(0,3)", "x(1,3)", "x(2,3)", "z(0,4)", "z(1,4)", "z(2,4)"] +StringLegalActions() = ["b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] -# Apply action "x(2,1)" +# Apply action "c2" action: 5 # State 3 -# y . . +# x . . # . . x # . . . # . . . -# . . q +# . . o IsTerminal() = False History() = [0, 14, 5] HistoryString() = "0, 14, 5" @@ -140,17 +140,17 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)", "p(0,3)", "o(1,3)", "q(2,3)", "p(0,4)", "q(1,4)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5"] -# Apply action "q(2,3)" +# Apply action "c4" action: 11 # State 4 -# y . . +# x . . # . . x # . . . -# . . q -# . . q +# . . o +# . . o IsTerminal() = False History() = [0, 14, 5, 11] HistoryString() = "0, 14, 5, 11" @@ -168,25 +168,25 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14] -StringLegalActions() = ["y(1,0)", "y(2,0)", "y(0,1)", "x(1,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(0,3)", "x(1,3)", "x(2,3)", "z(0,4)", "z(1,4)", "z(2,4)"] +StringLegalActions() = ["b1", "c1", "a2", "b2", "a3", "b3", "c3", "a4", "b4", "c4", "a5", "b5", "c5"] -# Apply action "z(2,4)" +# Apply action "c5" action: 14 # State 5 -# Apply action "x(2,3)" +# Apply action "c4" action: 11 # State 6 -# Apply action "x(1,1)" +# Apply action "b2" action: 4 # State 7 -# y . . +# x . . # . x x # . . . -# . . q -# . . q +# . . o +# . . o IsTerminal() = False History() = [0, 14, 5, 11, 14, 11, 4] HistoryString() = "0, 14, 5, 11, 14, 11, 4" @@ -204,57 +204,57 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)", "p(0,3)", "q(1,3)", "p(0,4)", "q(1,4)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3", "a4", "b4", "a5", "b5"] -# Apply action "q(2,2)" +# Apply action "c3" action: 8 # State 8 -# Apply action "z(1,4)" +# Apply action "b5" action: 13 # State 9 -# Apply action "p(0,2)" +# Apply action "a3" action: 6 # State 10 -# Apply action "z(0,4)" +# Apply action "a5" action: 12 # State 11 -# Apply action "o(1,0)" +# Apply action "b1" action: 1 # State 12 -# Apply action "x(1,2)" +# Apply action "b3" action: 7 # State 13 -# Apply action "p(1,1)" +# Apply action "b2" action: 4 # State 14 -# Apply action "O(1,2)" +# Apply action "b3" action: 7 # State 15 -# Apply action "p(0,3)" +# Apply action "a4" action: 9 # State 16 -# Apply action "y(1,0)" +# Apply action "b1" action: 1 # State 17 -# Apply action "z(0,3)" +# Apply action "a4" action: 9 # State 18 -# y o . +# x o . # . x x -# p x q -# p . q -# z z q +# o x o +# o . o +# x x o IsTerminal() = False History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9] HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9" @@ -272,25 +272,25 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [2, 3, 6, 8, 10] -StringLegalActions() = ["y(2,0)", "y(0,1)", "x(0,2)", "x(2,2)", "z(1,3)"] +StringLegalActions() = ["c1", "a2", "a3", "c3", "b4"] -# Apply action "z(1,3)" +# Apply action "b4" action: 10 # State 19 -# Apply action "O(1,3)" +# Apply action "b4" action: 10 # State 20 -# Apply action "p(0,0)" +# Apply action "a1" action: 0 # State 21 -# y o . -# . z z -# p z q -# p z q -# z z q +# x o . +# . x x +# o x o +# o x o +# x x o IsTerminal() = False History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0] HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0" @@ -308,40 +308,40 @@ ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [2, 3, 5, 12, 13] -StringLegalActions() = ["q(2,0)", "p(0,1)", "q(2,1)", "p(0,4)", "q(1,4)"] +StringLegalActions() = ["c1", "a2", "c2", "a5", "b5"] -# Apply action "q(2,0)" +# Apply action "c1" action: 2 # State 22 -# Apply action "X(2,0)" +# Apply action "c1" action: 2 # State 23 -# Apply action "z(2,2)" +# Apply action "c3" action: 8 # State 24 -# Apply action "X(0,1)" +# Apply action "a2" action: 3 # State 25 -# y q q -# X z z -# p z q -# p z q -# z z q +# x o o +# x x x +# o x o +# o x o +# x x o IsTerminal() = True History() = [0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0, 2, 2, 8, 3] HistoryString() = "0, 14, 5, 11, 14, 11, 4, 8, 13, 6, 12, 1, 7, 4, 7, 9, 1, 9, 10, 10, 0, 2, 2, 8, 3" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = -4 -InformationStateString(0) = "xoo\nXxx\n.xo\noxo\nxxo\n25\n0,0 0,5 0,14 0,11 0,4 0,13 0,12 0,7 0,1 0,9 0,10 0,2 0,8 0,3 " +InformationStateString(0) = "xoo\nxxx\n.xo\noxo\nxxo\n25\n0,0 0,5 0,14 0,11 0,4 0,13 0,12 0,7 0,1 0,9 0,10 0,2 0,8 0,3 " InformationStateString(1) = "xoo\n.x.\noxo\noxo\n..o\n25\n1,14 1,11 1,8 1,6 1,1 1,4 1,7 1,9 1,10 1,0 1,2 " InformationStateTensor(0): binvec(360, 0x40804001040202008100801020040204100001000001001010000010004010080000100010200001004000000) InformationStateTensor(1): binvec(360, 0x40804010040404008100801020080404000040040040020080002000080004000420001000000000000000000) -ObservationString(0) = "xoo\nXxx\n.xo\noxo\nxxo" +ObservationString(0) = "xoo\nxxx\n.xo\noxo\nxxo" ObservationString(1) = "xoo\n.x.\noxo\noxo\n..o" ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯ ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ diff --git a/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt b/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt index fbb89363c1..7b6c647691 100644 --- a/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt +++ b/open_spiel/integration_tests/playthroughs/dark_hex_ir(board_size=3).txt @@ -54,9 +54,9 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "x(0,1)", "x(1,1)", "x(2,1)", "z(0,2)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "x(1,1)" +# Apply action "b2" action: 4 # State 1 @@ -80,15 +80,15 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "q(2,2)" +# Apply action "c3" action: 8 # State 2 # . . . # . x . -# . . q +# . . o IsTerminal() = False History() = [4, 8] HistoryString() = "4, 8" @@ -106,15 +106,15 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 5, 6, 7, 8] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "x(0,1)", "x(2,1)", "z(0,2)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "c2", "a3", "b3", "c3"] -# Apply action "z(0,2)" +# Apply action "a3" action: 6 # State 3 # . . . -# . z . -# z . q +# . x . +# x . o IsTerminal() = False History() = [4, 8, 6] HistoryString() = "4, 8, 6" @@ -132,15 +132,15 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "q(1,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3"] -# Apply action "p(0,2)" +# Apply action "a3" action: 6 # State 4 # . . . -# . z . -# z . q +# . x . +# x . o IsTerminal() = False History() = [4, 8, 6, 6] HistoryString() = "4, 8, 6, 6" @@ -158,15 +158,15 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 7] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "q(1,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "b3"] -# Apply action "q(2,1)" +# Apply action "c2" action: 5 # State 5 # . . . -# . z q -# z . q +# . x o +# x . o IsTerminal() = False History() = [4, 8, 6, 6, 5] HistoryString() = "4, 8, 6, 6, 5" @@ -184,38 +184,38 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 5, 7, 8] -StringLegalActions() = ["y(0,0)", "X(1,0)", "X(2,0)", "z(0,1)", "z(2,1)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "c2", "b3", "c3"] -# Apply action "y(0,0)" +# Apply action "a1" action: 0 # State 6 -# Apply action "o(1,0)" +# Apply action "b1" action: 1 # State 7 -# Apply action "z(2,1)" +# Apply action "c2" action: 5 # State 8 -# Apply action "X(0,1)" +# Apply action "a2" action: 3 # State 9 -# y o . -# X z q -# z . q +# x o . +# x x o +# x . o IsTerminal() = True History() = [4, 8, 6, 6, 5, 0, 1, 5, 3] HistoryString() = "4, 8, 6, 6, 5, 0, 1, 5, 3" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = -4 -InformationStateString(0) = "P0 x..\nXxo\nx.." +InformationStateString(0) = "P0 x..\nxxo\nx.." InformationStateString(1) = "P1 .o.\n..o\nx.o" InformationStateTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ InformationStateTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯ -ObservationString(0) = "x..\nXxo\nx.." +ObservationString(0) = "x..\nxxo\nx.." ObservationString(1) = ".o.\n..o\nx.o" ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◉◯◯◯◯◯ diff --git a/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt b/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt index 0a2a1b327b..f5891bda76 100644 --- a/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt +++ b/open_spiel/integration_tests/playthroughs/dark_hex_reveal_turn_long.txt @@ -54,13 +54,13 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "x(0,1)", "x(1,1)", "x(2,1)", "z(0,2)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "y(0,0)" +# Apply action "a1" action: 0 # State 1 -# y . . +# x . . # . . . # . . . IsTerminal() = False @@ -80,13 +80,13 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["p(0,0)", "o(1,0)", "q(2,0)", "p(0,1)", "o(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)"] +StringLegalActions() = ["a1", "b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "q(2,0)" +# Apply action "c1" action: 2 # State 2 -# y . q +# x . o # . . . # . . . IsTerminal() = False @@ -106,13 +106,13 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [1, 2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["y(1,0)", "y(2,0)", "y(0,1)", "x(1,1)", "x(2,1)", "z(0,2)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["b1", "c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "y(1,0)" +# Apply action "b1" action: 1 # State 3 -# y y q +# x x o # . . . # . . . IsTerminal() = False @@ -132,14 +132,14 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["p(0,0)", "q(1,0)", "p(0,1)", "q(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)"] +StringLegalActions() = ["a1", "b1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "p(0,1)" +# Apply action "a2" action: 3 # State 4 -# y y q -# p . . +# x x o +# o . . # . . . IsTerminal() = False History() = [0, 2, 1, 3] @@ -158,14 +158,14 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [2, 3, 4, 5, 6, 7, 8] -StringLegalActions() = ["y(2,0)", "y(0,1)", "y(1,1)", "x(2,1)", "z(0,2)", "z(1,2)", "z(2,2)"] +StringLegalActions() = ["c1", "a2", "b2", "c2", "a3", "b3", "c3"] -# Apply action "y(1,1)" +# Apply action "b2" action: 4 # State 5 -# y y q -# p y . +# x x o +# o x . # . . . IsTerminal() = False History() = [0, 2, 1, 3, 4] @@ -184,70 +184,70 @@ ObservationTensor(1): ◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯ Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 4, 5, 6, 7, 8] -StringLegalActions() = ["p(0,0)", "O(1,0)", "O(1,1)", "q(2,1)", "p(0,2)", "o(1,2)", "q(2,2)"] +StringLegalActions() = ["a1", "b1", "b2", "c2", "a3", "b3", "c3"] -# Apply action "p(0,2)" +# Apply action "a3" action: 6 # State 6 -# Apply action "y(2,1)" +# Apply action "c2" action: 5 # State 7 -# Apply action "p(1,2)" +# Apply action "b3" action: 7 # State 8 -# Apply action "X(1,2)" +# Apply action "b3" action: 7 # State 9 -# Apply action "O(2,1)" +# Apply action "c2" action: 5 # State 10 -# Apply action "X(0,2)" +# Apply action "a3" action: 6 # State 11 -# Apply action "O(1,1)" +# Apply action "b2" action: 4 # State 12 -# Apply action "y(0,1)" +# Apply action "a2" action: 3 # State 13 -# Apply action "O(1,0)" +# Apply action "b1" action: 1 # State 14 -# Apply action "y(2,0)" +# Apply action "c1" action: 2 # State 15 -# Apply action "p(0,0)" +# Apply action "a1" action: 0 # State 16 -# Apply action "X(2,2)" +# Apply action "c3" action: 8 # State 17 -# y y q -# p y y -# p p X +# x x o +# o x x +# o o x IsTerminal() = True History() = [0, 2, 1, 3, 4, 6, 5, 7, 7, 5, 6, 4, 3, 1, 2, 0, 8] HistoryString() = "0, 2, 1, 3, 4, 6, 5, 7, 7, 5, 6, 4, 3, 1, 2, 0, 8" IsChanceNode() = False IsSimultaneousNode() = False CurrentPlayer() = -4 -InformationStateString(0) = "xxo\noxx\nooX\n17\n0,0 1,? 0,1 1,? 0,4 1,? 0,5 1,? 0,7 1,? 0,6 1,? 0,3 1,? 0,2 1,? 0,8 " +InformationStateString(0) = "xxo\noxx\noox\n17\n0,0 1,? 0,1 1,? 0,4 1,? 0,5 1,? 0,7 1,? 0,6 1,? 0,3 1,? 0,2 1,? 0,8 " InformationStateString(1) = "xxo\noxx\noo.\n17\n0,? 1,2 0,? 1,3 0,? 1,6 0,? 1,7 0,? 1,5 0,? 1,4 0,? 1,1 0,? 1,0 0,? " InformationStateTensor(0): binvec(268, 0x4020402004020402000a0080240200820802042008048020220084080220200802) InformationStateTensor(1): binvec(268, 0x4020402004020402008001900006200018100060200182000610001a0000700001) -ObservationString(0) = "xxo\noxx\nooX\nTotal turns: 17" +ObservationString(0) = "xxo\noxx\noox\nTotal turns: 17" ObservationString(1) = "xxo\noxx\noo.\nTotal turns: 17" ObservationTensor(0): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ ObservationTensor(1): ◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉ diff --git a/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt b/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt index 1fc3c7e400..d234e390aa 100644 --- a/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt +++ b/open_spiel/integration_tests/playthroughs/hex(board_size=5).txt @@ -6,7 +6,7 @@ GameType.information = Information.PERFECT_INFORMATION GameType.long_name = "Hex" GameType.max_num_players = 2 GameType.min_num_players = 2 -GameType.parameter_specification = ["board_size", "num_cols", "num_rows"] +GameType.parameter_specification = ["board_size", "num_cols", "num_rows", "string_rep"] GameType.provides_information_state_string = True GameType.provides_information_state_tensor = False GameType.provides_observation_string = True @@ -19,7 +19,7 @@ GameType.utility = Utility.ZERO_SUM NumDistinctActions() = 25 PolicyTensorShape() = [25] MaxChanceOutcomes() = 0 -GetParameters() = {board_size=5,num_cols=5,num_rows=5} +GetParameters() = {board_size=5,num_cols=5,num_rows=5,string_rep=standard} NumPlayers() = 2 MinUtility() = -1.0 MaxUtility() = 1.0 @@ -61,9 +61,9 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "y(3,0)", "y(4,0)", "x(0,1)", "x(1,1)", "x(2,1)", "x(3,1)", "x(4,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(3,2)", "x(4,2)", "x(0,3)", "x(1,3)", "x(2,3)", "x(3,3)", "x(4,3)", "z(0,4)", "z(1,4)", "z(2,4)", "z(3,4)", "z(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "a2", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5"] -# Apply action "x(0,1)" +# Apply action "a2" action: 5 # State 1 @@ -97,9 +97,9 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] -StringLegalActions() = ["p(0,0)", "o(1,0)", "o(2,0)", "o(3,0)", "q(4,0)", "o(1,1)", "o(2,1)", "o(3,1)", "q(4,1)", "p(0,2)", "o(1,2)", "o(2,2)", "o(3,2)", "q(4,2)", "p(0,3)", "o(1,3)", "o(2,3)", "o(3,3)", "q(4,3)", "p(0,4)", "o(1,4)", "o(2,4)", "o(3,4)", "q(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "d4", "e4", "a5", "b5", "c5", "d5", "e5"] -# Apply action "o(3,3)" +# Apply action "d4" action: 18 # State 2 @@ -133,9 +133,9 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "y(3,0)", "y(4,0)", "x(1,1)", "x(2,1)", "x(3,1)", "x(4,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(3,2)", "x(4,2)", "x(0,3)", "x(1,3)", "x(2,3)", "x(4,3)", "z(0,4)", "z(1,4)", "z(2,4)", "z(3,4)", "z(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "d5", "e5"] -# Apply action "z(3,4)" +# Apply action "d5" action: 23 # State 3 @@ -143,7 +143,7 @@ action: 23 # x . . . . # . . . . . # . . . o . -# . . . z . +# . . . x . IsTerminal() = False History() = [5, 18, 23] HistoryString() = "5, 18, 23" @@ -152,8 +152,8 @@ IsSimultaneousNode() = False CurrentPlayer() = 1 InformationStateString(0) = "5, 18, 23" InformationStateString(1) = "5, 18, 23" -ObservationString(0) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . z . " -ObservationString(1) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . z . " +ObservationString(0) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationString(1) = ". . . . . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " ObservationTensor(0): ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ @@ -169,9 +169,9 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24] -StringLegalActions() = ["p(0,0)", "o(1,0)", "o(2,0)", "o(3,0)", "q(4,0)", "o(1,1)", "o(2,1)", "o(3,1)", "q(4,1)", "p(0,2)", "o(1,2)", "o(2,2)", "o(3,2)", "q(4,2)", "p(0,3)", "o(1,3)", "o(2,3)", "q(4,3)", "p(0,4)", "o(1,4)", "o(2,4)", "q(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "d1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "e5"] -# Apply action "o(3,0)" +# Apply action "d1" action: 3 # State 4 @@ -179,7 +179,7 @@ action: 3 # x . . . . # . . . . . # . . . o . -# . . . z . +# . . . x . IsTerminal() = False History() = [5, 18, 23, 3] HistoryString() = "5, 18, 23, 3" @@ -188,8 +188,8 @@ IsSimultaneousNode() = False CurrentPlayer() = 0 InformationStateString(0) = "5, 18, 23, 3" InformationStateString(1) = "5, 18, 23, 3" -ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . z . " -ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . z . " +ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " +ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . . . o . \n . . . x . " ObservationTensor(0): ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ @@ -205,9 +205,9 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(2,0)", "y(4,0)", "x(1,1)", "x(2,1)", "x(3,1)", "x(4,1)", "x(0,2)", "x(1,2)", "x(2,2)", "x(3,2)", "x(4,2)", "x(0,3)", "x(1,3)", "x(2,3)", "z(4,3)", "z(0,4)", "z(1,4)", "z(2,4)", "z(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "b4", "c4", "e4", "a5", "b5", "c5", "e5"] -# Apply action "x(1,3)" +# Apply action "b4" action: 16 # State 5 @@ -215,7 +215,7 @@ action: 16 # x . . . . # . . . . . # . x . o . -# . . . z . +# . . . x . IsTerminal() = False History() = [5, 18, 23, 3, 16] HistoryString() = "5, 18, 23, 3, 16" @@ -224,8 +224,8 @@ IsSimultaneousNode() = False CurrentPlayer() = 1 InformationStateString(0) = "5, 18, 23, 3, 16" InformationStateString(1) = "5, 18, 23, 3, 16" -ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . z . " -ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . z . " +ObservationString(0) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . x . " +ObservationString(1) = ". . . o . \n x . . . . \n . . . . . \n . x . o . \n . . . x . " ObservationTensor(0): ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◉◉◉◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◉◉◉◉ ◉◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ @@ -241,73 +241,73 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 24] -StringLegalActions() = ["p(0,0)", "o(1,0)", "o(2,0)", "q(4,0)", "o(1,1)", "o(2,1)", "o(3,1)", "q(4,1)", "p(0,2)", "o(1,2)", "o(2,2)", "o(3,2)", "q(4,2)", "p(0,3)", "o(2,3)", "q(4,3)", "p(0,4)", "o(1,4)", "o(2,4)", "q(4,4)"] +StringLegalActions() = ["a1", "b1", "c1", "e1", "b2", "c2", "d2", "e2", "a3", "b3", "c3", "d3", "e3", "a4", "c4", "e4", "a5", "b5", "c5", "e5"] -# Apply action "p(0,3)" +# Apply action "a4" action: 15 # State 6 -# Apply action "x(0,2)" +# Apply action "a3" action: 10 # State 7 -# Apply action "o(3,2)" +# Apply action "d3" action: 13 # State 8 -# Apply action "x(4,2)" +# Apply action "e3" action: 14 # State 9 -# Apply action "q(4,3)" +# Apply action "e4" action: 19 # State 10 -# Apply action "y(2,0)" +# Apply action "c1" action: 2 # State 11 -# Apply action "q(2,4)" +# Apply action "c5" action: 22 # State 12 -# Apply action "z(0,4)" +# Apply action "a5" action: 20 # State 13 -# Apply action "q(3,1)" +# Apply action "d2" action: 8 # State 14 -# Apply action "z(2,3)" +# Apply action "c4" action: 17 # State 15 -# Apply action "q(2,1)" +# Apply action "c2" action: 7 # State 16 -# Apply action "z(4,4)" +# Apply action "e5" action: 24 # State 17 -# Apply action "q(2,2)" +# Apply action "c3" action: 12 # State 18 -# Apply action "y(1,1)" +# Apply action "b2" action: 6 # State 19 -# Apply action "q(4,1)" +# Apply action "e2" action: 9 # State 20 -# . . y q . -# y y q q q -# y . q q x -# p z z q q -# z . q z z +# . . x o . +# x x o o o +# x . o o x +# o x x o o +# x . o x x IsTerminal() = False History() = [5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9] HistoryString() = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" @@ -316,8 +316,8 @@ IsSimultaneousNode() = False CurrentPlayer() = 0 InformationStateString(0) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" InformationStateString(1) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9" -ObservationString(0) = ". . y q . \n y y q q q \n y . q q x \n p z z q q \n z . q z z " -ObservationString(1) = ". . y q . \n y y q q q \n y . q q x \n p z z q q \n z . q z z " +ObservationString(0) = ". . x o . \n x x o o o \n x . o o x \n o x x o o \n x . o x x " +ObservationString(1) = ". . x o . \n x x o o o \n x . o o x \n o x x o o \n x . o x x " ObservationTensor(0): ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯ @@ -333,17 +333,17 @@ ObservationTensor(1): Rewards() = [0, 0] Returns() = [0, -0] LegalActions() = [0, 1, 4, 11, 21] -StringLegalActions() = ["y(0,0)", "y(1,0)", "y(4,0)", "X(1,2)", "z(1,4)"] +StringLegalActions() = ["a1", "b1", "e1", "b3", "b5"] -# Apply action "X(1,2)" +# Apply action "b3" action: 11 # State 21 -# . . y q . -# y y q q q -# y X q q x -# p z z q q -# z . q z z +# . . x o . +# x x o o o +# x x o o x +# o x x o o +# x . o x x IsTerminal() = True History() = [5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11] HistoryString() = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" @@ -352,8 +352,8 @@ IsSimultaneousNode() = False CurrentPlayer() = -4 InformationStateString(0) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" InformationStateString(1) = "5, 18, 23, 3, 16, 15, 10, 13, 14, 19, 2, 22, 20, 8, 17, 7, 24, 12, 6, 9, 11" -ObservationString(0) = ". . y q . \n y y q q q \n y X q q x \n p z z q q \n z . q z z " -ObservationString(1) = ". . y q . \n y y q q q \n y X q q x \n p z z q q \n z . q z z " +ObservationString(0) = ". . x o . \n x x o o o \n x x o o x \n o x x o o \n x . o x x " +ObservationString(1) = ". . x o . \n x x o o o \n x x o o x \n o x x o o \n x . o x x " ObservationTensor(0): ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◉◯ ◯◯◯◯◯ ◉◉◯◯◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◉◉◉ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◯◯◯◯◯ ◉◉◯◯◯ ◯◯◯◯◯