Штраф за выход из битвы

import numpy as np
import gym
from gym import spaces


class GoLeftEnv(gym.Env):
  """
  Custom Environment that follows gym interface.
  This is a simple env where the agent must learn to go always left. 
  """
  # Because of google colab, we cannot implement the GUI ('human' render mode)
  metadata = {'render.modes': ['console']}
  # Define constants for clearer code
  LEFT = 0
  RIGHT = 1
  HIGH = 2
  LOW = 3
  ATTACK = 4
  def __init__(self, grid_size=10):
    super(GoLeftEnv, self).__init__()

    # Size of the 1D-grid
    self.grid_size = grid_size
    # Initialize the agent at the right of the grid
    self.agent_pos = grid_size - 1

    # Define action and observation space
    # They must be gym.spaces objects
    # Example when using discrete actions, we have two: left and right
    n_actions = 5
    self.action_space = spaces.Discrete(n_actions)
    # The observation will be the coordinate of the agent
    # this can be described both by Discrete and Box space
    self.observation_space = spaces.Box(low=0, high=100,
                                        shape=(4,))

  def reset(self):
    """
    Important: the observation must be a numpy array
    :return: (np.array) 
    """
    # Initialize the agent at the right of the grid
    self.agent_posX = 1
    self.agent_posY = 1
    self.enemyhealth = 30
    self.battle = 0
    # here we convert to float32 to make it more general (in case we want to use continuous actions)
    return np.array([self.agent_posX, self.agent_posY, self.enemyhealth, self.battle])

  def step(self, action):
    if (self.agent_posX == 7 and self.agent_posY == 8):
      self.battle = 1
    if action == self.LEFT:
      if (self.agent_posX == 1):
        self.agent_posX = 1
      else:  
        self.agent_posX -= 1
    elif action == self.RIGHT:
      if (self.agent_posX == 10):
        self.agent_posX = 10
      else:  
        self.agent_posX += 1
    elif action == self.LOW:
      if (self.agent_posY == 1):
        self.agent_posY = 1
      else:  
        self.agent_posY -= 1
    elif action == self.HIGH:
      if (self.agent_posY == 10):
        self.agent_posY = 10
      else:  
        self.agent_posY += 1
    elif action == self.ATTACK:
      if (self.agent_posX == 7 and self.agent_posY == 8):
        self.enemyhealth -= 10          
    else:
      raise ValueError("Received invalid action={} which is not part of the action space".format(action))

    # Are we at the left of the grid?
    done = bool(self.enemyhealth == 0)

    # Null reward everywhere except when reaching the goal (left of the grid)
   # reward = 100 if self.enemyhealth == 0 else -1
    if (self.battle == 1 and (self.agent_posX != 7 and self.agent_posY != 8)):
      reward = -50
    elif (self.enemyhealth == 0):
      reward = 100
    else:
      reward = -1
    
    # Optionally we can pass additional info, we are not using that for now
    info = {}

    return np.array([self.agent_posX, self.agent_posY, self.enemyhealth, self.battle]), reward, done, info

  def render(self, mode='console'):
    if mode != 'console':
      raise NotImplementedError()
    # agent is represented as a cross, rest as a dot
    print("Координата X")
    print(self.agent_posX)
    print("Координата Y")
    print(self.agent_posY)

  def close(self):
    pass