Skip to content

Pettingzoo wrapper changed to gymnasium interface #6211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -56,9 +56,13 @@ repos:
(?x)^(
.*cs.meta|
.*.css|
.*.meta
.*.meta|
.*.asset|
.*.prefab|
.*.unity|
.*.json
)$
args: [--fix=lf]
args: [--fix=crlf]

- id: trailing-whitespace
name: trailing-whitespace-markdown
19 changes: 12 additions & 7 deletions docs/Python-Gym-API-Documentation.md
Original file line number Diff line number Diff line change
@@ -59,18 +59,22 @@ Environment initialization
#### reset

```python
| reset() -> Union[List[np.ndarray], np.ndarray]
| reset(*, seed: int | None = None, options: dict[str, Any] | None = None) -> Tuple[np.ndarray, Dict]
```

Resets the state of the environment and returns an initial observation.
Returns: observation (object/list): the initial observation of the
space.
Resets the state of the environment and returns an initial observation and info.

**Returns**:

- `observation` _object/list_ - the initial observation of the
space.
- `info` _dict_ - contains auxiliary diagnostic information.

<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
#### step

```python
| step(action: List[Any]) -> GymStepResult
| step(action: Any) -> GymStepResult
```

Run one timestep of the environment's dynamics. When end of
@@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info).

- `observation` _object/list_ - agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
- `done` _boolean/list_ - whether the episode has ended.
- `terminated` _boolean/list_ - whether the episode has ended by termination.
- `truncated` _boolean/list_ - whether the episode has ended by truncation.
- `info` _dict_ - contains auxiliary diagnostic information.

<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
#### render

```python
| render(mode="rgb_array")
| render()
```

Return the latest visual observations.
44 changes: 30 additions & 14 deletions ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
Original file line number Diff line number Diff line change
@@ -3,8 +3,8 @@
import numpy as np
from typing import Any, Dict, List, Optional, Tuple, Union

import gym
from gym import error, spaces
import gymnasium as gym
from gymnasium import error, spaces

from mlagents_envs.base_env import ActionTuple, BaseEnv
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
@@ -20,7 +20,7 @@ class UnityGymException(error.Error):


logger = logging_util.get_logger(__name__)
GymStepResult = Tuple[np.ndarray, float, bool, Dict]
GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict]


class UnityToGymWrapper(gym.Env):
@@ -151,21 +151,26 @@ def __init__(
else:
self._observation_space = list_spaces[0] # only return the first one

def reset(self) -> Union[List[np.ndarray], np.ndarray]:
"""Resets the state of the environment and returns an initial observation.
Returns: observation (object/list): the initial observation of the
def reset(
self, *, seed: int | None = None, options: dict[str, Any] | None = None
) -> Tuple[np.ndarray, Dict]:
"""Resets the state of the environment and returns an initial observation and info.
Returns:
observation (object/list): the initial observation of the
space.
info (dict): contains auxiliary diagnostic information.
"""
super().reset(seed=seed, options=options)
self._env.reset()
decision_step, _ = self._env.get_steps(self.name)
n_agents = len(decision_step)
self._check_agents(n_agents)
self.game_over = False

res: GymStepResult = self._single_step(decision_step)
return res[0]
return res[0], res[4]

def step(self, action: List[Any]) -> GymStepResult:
def step(self, action: Any) -> GymStepResult:
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
@@ -175,14 +180,15 @@ def step(self, action: List[Any]) -> GymStepResult:
Returns:
observation (object/list): agent's observation of the current environment
reward (float/list) : amount of reward returned after previous action
done (boolean/list): whether the episode has ended.
terminated (boolean/list): whether the episode has ended by termination.
truncated (boolean/list): whether the episode has ended by truncation.
info (dict): contains auxiliary diagnostic information.
"""
if self.game_over:
raise UnityGymException(
"You are calling 'step()' even though this environment has already "
"returned done = True. You must always call 'reset()' once you "
"receive 'done = True'."
"returned `terminated` or `truncated` as True. You must always call 'reset()' once you "
"receive `terminated` or `truncated` as True."
)
if self._flattener is not None:
# Translate action into list
@@ -227,9 +233,19 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu
visual_obs = self._get_vis_obs_list(info)
self.visual_obs = self._preprocess_single(visual_obs[0][0])

done = isinstance(info, TerminalSteps)
if isinstance(info, TerminalSteps):
interrupted = info.interrupted
terminated, truncated = not interrupted, interrupted
else:
terminated, truncated = False, False

return (default_observation, info.reward[0], done, {"step": info})
return (
default_observation,
info.reward[0],
terminated,
truncated,
{"step": info},
)

def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray:
if self.uint8_visual:
@@ -276,7 +292,7 @@ def _get_vec_obs_size(self) -> int:
result += obs_spec.shape[0]
return result

def render(self, mode="rgb_array"):
def render(self):
"""
Return the latest visual observations.
Note that it will not render a new frame of the environment.
38 changes: 38 additions & 0 deletions ml-agents-envs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[tool.poetry]
name = "mlagents_envs"
version = "1.2.0.dev0"
description = "Unity Machine Learning Agents Interface"
homepage = "https://github.com/Unity-Technologies/ml-agents"
authors = ["Unity Technologies <[email protected]>"]
classifiers=[
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
readme = "README.md"

packages = [
{ include = "mlagents_envs", from = "." },
]
include = ["mlagents_envs/*"]
exclude = ["*.tests", "*.tests.*", "tests.*", "tests", "colabs", "*.ipynb"]

[tool.poetry.dependencies]
python = "^3.9"
grpcio = ">=1.11.0,<=1.48.2"
Pillow = ">=4.2.1"
protobuf = ">=3.6,<3.21"
pyyaml = ">=3.1.0"
gymnasium = ">=0.25.0"
pettingzoo = ">=1.15.0"
numpy = ">=1.23.5,<2.0"
filelock = ">=3.4.0"
cloudpickle = "*"

[build-system]
requires = ["poetry-core>=1.9.0"]
build-backend = "poetry.core.masonry.api"
8 changes: 4 additions & 4 deletions ml-agents-envs/setup.py
Original file line number Diff line number Diff line change
@@ -58,12 +58,12 @@ def run(self):
"Pillow>=4.2.1",
"protobuf>=3.6,<3.21",
"pyyaml>=3.1.0",
"gym>=0.21.0",
"pettingzoo==1.15.0",
"numpy>=1.23.5,<1.24.0",
"gymnasium>=0.25.0",
"pettingzoo>=1.15.0",
"numpy>=1.23.5,<2.0",
"filelock>=3.4.0",
],
python_requires=">=3.10.1,<=3.10.12",
python_requires=">=3.9,<4",
# TODO: Remove this once mypy stops having spurious setuptools issues.
cmdclass={"verify": VerifyVersionCommand}, # type: ignore
)
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/subprocess_env_manager.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
UnityCommunicatorStoppedException,
)
from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from multiprocessing.connection import Connection, PipeConnection
from queue import Empty as EmptyQueueException
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs import logging_util
@@ -77,7 +77,7 @@ class StepResponse(NamedTuple):


class UnityEnvWorker:
def __init__(self, process: Process, worker_id: int, conn: Connection):
def __init__(self, process: Process, worker_id: int, conn: PipeConnection):
self.process = process
self.worker_id = worker_id
self.conn = conn
7 changes: 3 additions & 4 deletions utils/generate_markdown_docs.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,6 @@
import argparse
import hashlib


# pydoc-markdown -I . -m module_name --render_toc > doc.md


@@ -52,8 +51,8 @@ def remove_trailing_whitespace(filename):
# compare source and destination and write only if changed
if source_file != destination_file:
num_changed += 1
with open(filename, "wb") as f:
f.write(destination_file.encode())
with open(filename, "w", newline="\r\n") as f:
f.write(destination_file)


if __name__ == "__main__":
@@ -84,7 +83,7 @@ def remove_trailing_whitespace(filename):
for submodule in submodules:
module_args.append("-m")
module_args.append(f"{module_name}.{submodule}")
with open(output_file_name, "w") as output_file:
with open(output_file_name, "wb") as output_file:
subprocess_args = [
"pydoc-markdown",
"-I",