Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign upThe current docstring for `reset()` seems to indicate that the environment will be identical after separate calls to `reset()`. However, the `reset()` function isn't supposed to reset the states of the environment's RNGs [1]. This change clarifies the relationship between the `reset()` function and the RNGs. [1]: #250
| import gym | |
| from gym import error | |
| from gym.utils import closer | |
| env_closer = closer.Closer() | |
| class Env(object): | |
| """The main OpenAI Gym class. It encapsulates an environment with | |
| arbitrary behind-the-scenes dynamics. An environment can be | |
| partially or fully observed. | |
| The main API methods that users of this class need to know are: | |
| step | |
| reset | |
| render | |
| close | |
| seed | |
| And set the following attributes: | |
| action_space: The Space object corresponding to valid actions | |
| observation_space: The Space object corresponding to valid observations | |
| reward_range: A tuple corresponding to the min and max possible rewards | |
| Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range. | |
| The methods are accessed publicly as "step", "reset", etc... | |
| """ | |
| # Set this in SOME subclasses | |
| metadata = {'render.modes': []} | |
| reward_range = (-float('inf'), float('inf')) | |
| spec = None | |
| # Set these in ALL subclasses | |
| action_space = None | |
| observation_space = None | |
| def step(self, action): | |
| """Run one timestep of the environment's dynamics. When end of | |
| episode is reached, you are responsible for calling `reset()` | |
| to reset this environment's state. | |
| Accepts an action and returns a tuple (observation, reward, done, info). | |
| Args: | |
| action (object): an action provided by the agent | |
| Returns: | |
| observation (object): agent's observation of the current environment | |
| reward (float) : amount of reward returned after previous action | |
| done (bool): whether the episode has ended, in which case further step() calls will return undefined results | |
| info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) | |
| """ | |
| raise NotImplementedError | |
| def reset(self): | |
| """Resets the environment to an initial state and returns an initial | |
| observation. | |
| Note that this function should not reset the environment's random | |
| number generator(s); random variables in the environment's state should | |
| be sampled independently between multiple calls to `reset()`. In other | |
| words, each call of `reset()` should yield an environment suitable for | |
| a new episode, independent of previous episodes. | |
| Returns: | |
| observation (object): the initial observation. | |
| """ | |
| raise NotImplementedError | |
| def render(self, mode='human'): | |
| """Renders the environment. | |
| The set of supported modes varies per environment. (And some | |
| environments do not support rendering at all.) By convention, | |
| if mode is: | |
| - human: render to the current display or terminal and | |
| return nothing. Usually for human consumption. | |
| - rgb_array: Return an numpy.ndarray with shape (x, y, 3), | |
| representing RGB values for an x-by-y pixel image, suitable | |
| for turning into a video. | |
| - ansi: Return a string (str) or StringIO.StringIO containing a | |
| terminal-style text representation. The text can include newlines | |
| and ANSI escape sequences (e.g. for colors). | |
| Note: | |
| Make sure that your class's metadata 'render.modes' key includes | |
| the list of supported modes. It's recommended to call super() | |
| in implementations to use the functionality of this method. | |
| Args: | |
| mode (str): the mode to render with | |
| Example: | |
| class MyEnv(Env): | |
| metadata = {'render.modes': ['human', 'rgb_array']} | |
| def render(self, mode='human'): | |
| if mode == 'rgb_array': | |
| return np.array(...) # return RGB frame suitable for video | |
| elif mode == 'human': | |
| ... # pop up a window and render | |
| else: | |
| super(MyEnv, self).render(mode=mode) # just raise an exception | |
| """ | |
| raise NotImplementedError | |
| def close(self): | |
| """Override close in your subclass to perform any necessary cleanup. | |
| Environments will automatically close() themselves when | |
| garbage collected or when the program exits. | |
| """ | |
| pass | |
| def seed(self, seed=None): | |
| """Sets the seed for this env's random number generator(s). | |
| Note: | |
| Some environments use multiple pseudorandom number generators. | |
| We want to capture all such seeds used in order to ensure that | |
| there aren't accidental correlations between multiple generators. | |
| Returns: | |
| list<bigint>: Returns the list of seeds used in this env's random | |
| number generators. The first value in the list should be the | |
| "main" seed, or the value which a reproducer should pass to | |
| 'seed'. Often, the main seed equals the provided 'seed', but | |
| this won't be true if seed=None, for example. | |
| """ | |
| return | |
| @property | |
| def unwrapped(self): | |
| """Completely unwrap this env. | |
| Returns: | |
| gym.Env: The base non-wrapped gym.Env instance | |
| """ | |
| return self | |
| def __str__(self): | |
| if self.spec is None: | |
| return '<{} instance>'.format(type(self).__name__) | |
| else: | |
| return '<{}<{}>>'.format(type(self).__name__, self.spec.id) | |
| def __enter__(self): | |
| """Support with-statement for the environment. """ | |
| return self | |
| def __exit__(self, *args): | |
| """Support with-statement for the environment. """ | |
| self.close() | |
| # propagate exception | |
| return False | |
| class GoalEnv(Env): | |
| """A goal-based environment. It functions just as any regular OpenAI Gym environment but it | |
| imposes a required structure on the observation_space. More concretely, the observation | |
| space is required to contain at least three elements, namely `observation`, `desired_goal`, and | |
| `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve. | |
| `achieved_goal` is the goal that it currently achieved instead. `observation` contains the | |
| actual observations of the environment as per usual. | |
| """ | |
| def reset(self): | |
| # Enforce that each GoalEnv uses a Goal-compatible observation space. | |
| if not isinstance(self.observation_space, gym.spaces.Dict): | |
| raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict') | |
| for key in ['observation', 'achieved_goal', 'desired_goal']: | |
| if key not in self.observation_space.spaces: | |
| raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key)) | |
| def compute_reward(self, achieved_goal, desired_goal, info): | |
| """Compute the step reward. This externalizes the reward function and makes | |
| it dependent on a desired goal and the one that was achieved. If you wish to include | |
| additional rewards that are independent of the goal, you can include the necessary values | |
| to derive it in 'info' and compute it accordingly. | |
| Args: | |
| achieved_goal (object): the goal that was achieved during execution | |
| desired_goal (object): the desired goal that we asked the agent to attempt to achieve | |
| info (dict): an info dictionary with additional information | |
| Returns: | |
| float: The reward that corresponds to the provided achieved goal w.r.t. to the desired | |
| goal. Note that the following should always hold true: | |
| ob, reward, done, info = env.step() | |
| assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info) | |
| """ | |
| raise NotImplementedError | |
| class Wrapper(Env): | |
| """Wraps the environment to allow a modular transformation. | |
| This class is the base class for all wrappers. The subclass could override | |
| some methods to change the behavior of the original environment without touching the | |
| original code. | |
| .. note:: | |
| Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. | |
| """ | |
| def __init__(self, env): | |
| self.env = env | |
| self.action_space = self.env.action_space | |
| self.observation_space = self.env.observation_space | |
| self.reward_range = self.env.reward_range | |
| self.metadata = self.env.metadata | |
| def __getattr__(self, name): | |
| if name.startswith('_'): | |
| raise AttributeError("attempted to get missing private attribute '{}'".format(name)) | |
| return getattr(self.env, name) | |
| @property | |
| def spec(self): | |
| return self.env.spec | |
| @classmethod | |
| def class_name(cls): | |
| return cls.__name__ | |
| def step(self, action): | |
| return self.env.step(action) | |
| def reset(self, **kwargs): | |
| return self.env.reset(**kwargs) | |
| def render(self, mode='human', **kwargs): | |
| return self.env.render(mode, **kwargs) | |
| def close(self): | |
| return self.env.close() | |
| def seed(self, seed=None): | |
| return self.env.seed(seed) | |
| def compute_reward(self, achieved_goal, desired_goal, info): | |
| return self.env.compute_reward(achieved_goal, desired_goal, info) | |
| def __str__(self): | |
| return '<{}{}>'.format(type(self).__name__, self.env) | |
| def __repr__(self): | |
| return str(self) | |
| @property | |
| def unwrapped(self): | |
| return self.env.unwrapped | |
| class ObservationWrapper(Wrapper): | |
| def reset(self, **kwargs): | |
| observation = self.env.reset(**kwargs) | |
| return self.observation(observation) | |
| def step(self, action): | |
| observation, reward, done, info = self.env.step(action) | |
| return self.observation(observation), reward, done, info | |
| def observation(self, observation): | |
| raise NotImplementedError | |
| class RewardWrapper(Wrapper): | |
| def reset(self, **kwargs): | |
| return self.env.reset(**kwargs) | |
| def step(self, action): | |
| observation, reward, done, info = self.env.step(action) | |
| return observation, self.reward(reward), done, info | |
| def reward(self, reward): | |
| raise NotImplementedError | |
| class ActionWrapper(Wrapper): | |
| def reset(self, **kwargs): | |
| return self.env.reset(**kwargs) | |
| def step(self, action): | |
| return self.env.step(self.action(action)) | |
| def action(self, action): | |
| raise NotImplementedError | |
| def reverse_action(self, action): | |
| raise NotImplementedError |

Formed in 2009, the Archive Team (not to be confused with the archive.org Archive-It Team) is a rogue archivist collective dedicated to saving copies of rapidly dying or deleted websites for the sake of history and digital heritage. The group is 100% composed of volunteers and interested parties, and has expanded into a large amount of related projects for saving online and digital history.
