openai / gym

gym/gym/core.py /

Jump to

Code definitions

Env Class step Function reset Function render Function close Function seed Function unwrapped Function str Function enter Function exit Function GoalEnv Class reset Function compute_reward Function Wrapper Class init Function getattr Function spec Function class_name Function step Function reset Function render Function close Function seed Function compute_reward Function str Function repr Function unwrapped Function ObservationWrapper Class reset Function step Function observation Function RewardWrapper Class reset Function step Function reward Function ActionWrapper Class reset Function step Function action Function reverse_action Function

Code navigation index up-to-date

jturner314 Clarify relationship between reset() and RNGs (#2019 )

Latest commit abb815c

Aug 28, 2020

The current docstring for `reset()` seems to indicate that the
environment will be identical after separate calls to `reset()`.
However, the `reset()` function isn't supposed to reset the states of
the environment's RNGs [1]. This change clarifies the relationship
between the `reset()` function and the RNGs.

[1]: #250

32 contributors

Users who have contributed to this file

298 lines (225 sloc) 10.3 KB

Raw Blame

	import gym
	from gym import error
	from gym.utils import closer

	env_closer = closer.Closer()


	class Env(object):
	"""The main OpenAI Gym class. It encapsulates an environment with
	arbitrary behind-the-scenes dynamics. An environment can be
	partially or fully observed.

	The main API methods that users of this class need to know are:

	step
	reset
	render
	close
	seed

	And set the following attributes:

	action_space: The Space object corresponding to valid actions
	observation_space: The Space object corresponding to valid observations
	reward_range: A tuple corresponding to the min and max possible rewards

	Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.

	The methods are accessed publicly as "step", "reset", etc...
	"""
	# Set this in SOME subclasses
	metadata = {'render.modes': []}
	reward_range = (-float('inf'), float('inf'))
	spec = None

	# Set these in ALL subclasses
	action_space = None
	observation_space = None

	def step(self, action):
	"""Run one timestep of the environment's dynamics. When end of
	episode is reached, you are responsible for calling `reset()`
	to reset this environment's state.

	Accepts an action and returns a tuple (observation, reward, done, info).

	Args:
	action (object): an action provided by the agent

	Returns:
	observation (object): agent's observation of the current environment
	reward (float) : amount of reward returned after previous action
	done (bool): whether the episode has ended, in which case further step() calls will return undefined results
	info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
	"""
	raise NotImplementedError

	def reset(self):
	"""Resets the environment to an initial state and returns an initial
	observation.

	Note that this function should not reset the environment's random
	number generator(s); random variables in the environment's state should
	be sampled independently between multiple calls to `reset()`. In other
	words, each call of `reset()` should yield an environment suitable for
	a new episode, independent of previous episodes.

	Returns:
	observation (object): the initial observation.
	"""
	raise NotImplementedError

	def render(self, mode='human'):
	"""Renders the environment.

	The set of supported modes varies per environment. (And some
	environments do not support rendering at all.) By convention,
	if mode is:

	- human: render to the current display or terminal and
	return nothing. Usually for human consumption.
	- rgb_array: Return an numpy.ndarray with shape (x, y, 3),
	representing RGB values for an x-by-y pixel image, suitable
	for turning into a video.
	- ansi: Return a string (str) or StringIO.StringIO containing a
	terminal-style text representation. The text can include newlines
	and ANSI escape sequences (e.g. for colors).

	Note:
	Make sure that your class's metadata 'render.modes' key includes
	the list of supported modes. It's recommended to call super()
	in implementations to use the functionality of this method.

	Args:
	mode (str): the mode to render with

	Example:

	class MyEnv(Env):
	metadata = {'render.modes': ['human', 'rgb_array']}

	def render(self, mode='human'):
	if mode == 'rgb_array':
	return np.array(...) # return RGB frame suitable for video
	elif mode == 'human':
	... # pop up a window and render
	else:
	super(MyEnv, self).render(mode=mode) # just raise an exception
	"""
	raise NotImplementedError

	def close(self):
	"""Override close in your subclass to perform any necessary cleanup.

	Environments will automatically close() themselves when
	garbage collected or when the program exits.
	"""
	pass

	def seed(self, seed=None):
	"""Sets the seed for this env's random number generator(s).

	Note:
	Some environments use multiple pseudorandom number generators.
	We want to capture all such seeds used in order to ensure that
	there aren't accidental correlations between multiple generators.

	Returns:
	list<bigint>: Returns the list of seeds used in this env's random
	number generators. The first value in the list should be the
	"main" seed, or the value which a reproducer should pass to
	'seed'. Often, the main seed equals the provided 'seed', but
	this won't be true if seed=None, for example.
	"""
	return

	@property
	def unwrapped(self):
	"""Completely unwrap this env.

	Returns:
	gym.Env: The base non-wrapped gym.Env instance
	"""
	return self

	def __str__(self):
	if self.spec is None:
	return '<{} instance>'.format(type(self).__name__)
	else:
	return '<{}<{}>>'.format(type(self).__name__, self.spec.id)

	def __enter__(self):
	"""Support with-statement for the environment. """
	return self

	def __exit__(self, *args):
	"""Support with-statement for the environment. """
	self.close()
	# propagate exception
	return False


	class GoalEnv(Env):
	"""A goal-based environment. It functions just as any regular OpenAI Gym environment but it
	imposes a required structure on the observation_space. More concretely, the observation
	space is required to contain at least three elements, namely `observation`, `desired_goal`, and
	`achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
	`achieved_goal` is the goal that it currently achieved instead. `observation` contains the
	actual observations of the environment as per usual.
	"""

	def reset(self):
	# Enforce that each GoalEnv uses a Goal-compatible observation space.
	if not isinstance(self.observation_space, gym.spaces.Dict):
	raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
	for key in ['observation', 'achieved_goal', 'desired_goal']:
	if key not in self.observation_space.spaces:
	raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))

	def compute_reward(self, achieved_goal, desired_goal, info):
	"""Compute the step reward. This externalizes the reward function and makes
	it dependent on a desired goal and the one that was achieved. If you wish to include
	additional rewards that are independent of the goal, you can include the necessary values
	to derive it in 'info' and compute it accordingly.

	Args:
	achieved_goal (object): the goal that was achieved during execution
	desired_goal (object): the desired goal that we asked the agent to attempt to achieve
	info (dict): an info dictionary with additional information

	Returns:
	float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
	goal. Note that the following should always hold true:

	ob, reward, done, info = env.step()
	assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
	"""
	raise NotImplementedError


	class Wrapper(Env):
	"""Wraps the environment to allow a modular transformation.

	This class is the base class for all wrappers. The subclass could override
	some methods to change the behavior of the original environment without touching the
	original code.

	.. note::

	Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.

	"""
	def __init__(self, env):
	self.env = env
	self.action_space = self.env.action_space
	self.observation_space = self.env.observation_space
	self.reward_range = self.env.reward_range
	self.metadata = self.env.metadata

	def __getattr__(self, name):
	if name.startswith('_'):
	raise AttributeError("attempted to get missing private attribute '{}'".format(name))
	return getattr(self.env, name)

	@property
	def spec(self):
	return self.env.spec

	@classmethod
	def class_name(cls):
	return cls.__name__

	def step(self, action):
	return self.env.step(action)

	def reset(self, **kwargs):
	return self.env.reset(**kwargs)

	def render(self, mode='human', **kwargs):
	return self.env.render(mode, **kwargs)

	def close(self):
	return self.env.close()

	def seed(self, seed=None):
	return self.env.seed(seed)

	def compute_reward(self, achieved_goal, desired_goal, info):
	return self.env.compute_reward(achieved_goal, desired_goal, info)

	def __str__(self):
	return '<{}{}>'.format(type(self).__name__, self.env)

	def __repr__(self):
	return str(self)

	@property
	def unwrapped(self):
	return self.env.unwrapped


	class ObservationWrapper(Wrapper):
	def reset(self, **kwargs):
	observation = self.env.reset(**kwargs)
	return self.observation(observation)

	def step(self, action):
	observation, reward, done, info = self.env.step(action)
	return self.observation(observation), reward, done, info

	def observation(self, observation):
	raise NotImplementedError


	class RewardWrapper(Wrapper):
	def reset(self, **kwargs):
	return self.env.reset(**kwargs)

	def step(self, action):
	observation, reward, done, info = self.env.step(action)
	return observation, self.reward(reward), done, info

	def reward(self, reward):
	raise NotImplementedError


	class ActionWrapper(Wrapper):
	def reset(self, **kwargs):
	return self.env.reset(**kwargs)

	def step(self, action):
	return self.env.step(self.action(action))

	def action(self, action):
	raise NotImplementedError

	def reverse_action(self, action):
	raise NotImplementedError

Oct	NOV	Dec
	11
2019	2020	2021

openai / gym

Join GitHub today

Users who have contributed to this file

Essential cookies

Always active

Analytics cookies