From 6bd65f14739ceb0ba95cf7c64ff8f211f6c51419 Mon Sep 17 00:00:00 2001
From: a_zap <test@test>
Date: Wed, 28 May 2025 17:29:08 +0200
Subject: [PATCH 1/9] Adjusted .pre-commit-config.yaml for Windows usage

---
 .pre-commit-config.yaml         | 8 ++++++--
 utils/generate_markdown_docs.py | 7 +++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 08a8e33e72..7d1fffd938 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,9 +56,13 @@ repos:
             (?x)^(
                 .*cs.meta|
                 .*.css|
-                .*.meta
+                .*.meta|
+                .*.asset|
+                .*.prefab|
+                .*.unity|
+                .*.json
             )$
-        args: [--fix=lf]
+        args: [--fix=crlf]
 
     -   id: trailing-whitespace
         name: trailing-whitespace-markdown
diff --git a/utils/generate_markdown_docs.py b/utils/generate_markdown_docs.py
index 7566b1bdc7..5ce432b3a2 100755
--- a/utils/generate_markdown_docs.py
+++ b/utils/generate_markdown_docs.py
@@ -6,7 +6,6 @@
 import argparse
 import hashlib
 
-
 # pydoc-markdown -I . -m module_name --render_toc > doc.md
 
 
@@ -52,8 +51,8 @@ def remove_trailing_whitespace(filename):
     # compare source and destination and write only if changed
     if source_file != destination_file:
         num_changed += 1
-        with open(filename, "wb") as f:
-            f.write(destination_file.encode())
+        with open(filename, "w", newline="\r\n") as f:
+            f.write(destination_file)
 
 
 if __name__ == "__main__":
@@ -84,7 +83,7 @@ def remove_trailing_whitespace(filename):
                 for submodule in submodules:
                     module_args.append("-m")
                     module_args.append(f"{module_name}.{submodule}")
-                with open(output_file_name, "w") as output_file:
+                with open(output_file_name, "wb") as output_file:
                     subprocess_args = [
                         "pydoc-markdown",
                         "-I",

From cd2b649017dcebd1722b01fd70c68f90c1ff612c Mon Sep 17 00:00:00 2001
From: a_zap <test@test>
Date: Wed, 28 May 2025 17:31:55 +0200
Subject: [PATCH 2/9] Fixed mypy issue

---
 ml-agents/mlagents/trainers/subprocess_env_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py
index 43d468f2bc..8f767e23d0 100644
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
@@ -12,7 +12,7 @@
     UnityCommunicatorStoppedException,
 )
 from multiprocessing import Process, Pipe, Queue
-from multiprocessing.connection import Connection
+from multiprocessing.connection import Connection, PipeConnection
 from queue import Empty as EmptyQueueException
 from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
 from mlagents_envs import logging_util
@@ -77,7 +77,7 @@ class StepResponse(NamedTuple):
 
 
 class UnityEnvWorker:
-    def __init__(self, process: Process, worker_id: int, conn: Connection):
+    def __init__(self, process: Process, worker_id: int, conn: PipeConnection):
         self.process = process
         self.worker_id = worker_id
         self.conn = conn

From 48a29df751f9c357826c00807c73abfed2caf64d Mon Sep 17 00:00:00 2001
From: a_zap <test@test>
Date: Wed, 28 May 2025 17:32:27 +0200
Subject: [PATCH 3/9] Switched go gymnasium interface

- added dependency to gymnasium instead of gym
- adjusted Unity gym interface to return `truncated` on step
- adjusted Unity gym interface to accept `seed` and `options` on reset (not used)
- adjusted Unity gym interface to not accept `mode` on reset (not used anyway)
- relaxed dependencies to numpy, pettingzoo and Python version
---
 docs/Python-Gym-API-Documentation.md          | 19 +++++---
 .../mlagents_envs/envs/unity_gym_env.py       | 44 +++++++++++++------
 ml-agents-envs/setup.py                       |  8 ++--
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/docs/Python-Gym-API-Documentation.md b/docs/Python-Gym-API-Documentation.md
index b35771fc46..e92edce5e0 100644
--- a/docs/Python-Gym-API-Documentation.md
+++ b/docs/Python-Gym-API-Documentation.md
@@ -59,18 +59,22 @@ Environment initialization
 #### reset
 
 ```python
- | reset() -> Union[List[np.ndarray], np.ndarray]
+ | reset(*, seed: int | None = None, options: dict[str, Any] | None = None) -> Tuple[np.ndarray, Dict]
 ```
 
-Resets the state of the environment and returns an initial observation.
-Returns: observation (object/list): the initial observation of the
-space.
+Resets the state of the environment and returns an initial observation and info.
+
+**Returns**:
+
+- `observation` _object/list_ - the initial observation of the
+  space.
+- `info` _dict_ - contains auxiliary diagnostic information.
 
 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
 #### step
 
 ```python
- | step(action: List[Any]) -> GymStepResult
+ | step(action: Any) -> GymStepResult
 ```
 
 Run one timestep of the environment's dynamics. When end of
@@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info).
 
 - `observation` _object/list_ - agent's observation of the current environment
   reward (float/list) : amount of reward returned after previous action
-- `done` _boolean/list_ - whether the episode has ended.
+- `terminated` _boolean/list_ - whether the episode has ended by termination.
+- `truncated` _boolean/list_ - whether the episode has ended by truncation.
 - `info` _dict_ - contains auxiliary diagnostic information.
 
 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
 #### render
 
 ```python
- | render(mode="rgb_array")
+ | render()
 ```
 
 Return the latest visual observations.
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
index df29a95c9a..3f0513ffb0 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
@@ -3,8 +3,8 @@
 import numpy as np
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import gym
-from gym import error, spaces
+import gymnasium as gym
+from gymnasium import error, spaces
 
 from mlagents_envs.base_env import ActionTuple, BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
@@ -20,7 +20,7 @@ class UnityGymException(error.Error):
 
 
 logger = logging_util.get_logger(__name__)
-GymStepResult = Tuple[np.ndarray, float, bool, Dict]
+GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict]
 
 
 class UnityToGymWrapper(gym.Env):
@@ -151,11 +151,16 @@ def __init__(
         else:
             self._observation_space = list_spaces[0]  # only return the first one
 
-    def reset(self) -> Union[List[np.ndarray], np.ndarray]:
-        """Resets the state of the environment and returns an initial observation.
-        Returns: observation (object/list): the initial observation of the
+    def reset(
+        self, *, seed: int | None = None, options: dict[str, Any] | None = None
+    ) -> Tuple[np.ndarray, Dict]:
+        """Resets the state of the environment and returns an initial observation and info.
+        Returns:
+            observation (object/list): the initial observation of the
         space.
+            info (dict): contains auxiliary diagnostic information.
         """
+        super().reset(seed=seed, options=options)
         self._env.reset()
         decision_step, _ = self._env.get_steps(self.name)
         n_agents = len(decision_step)
@@ -163,9 +168,9 @@ def reset(self) -> Union[List[np.ndarray], np.ndarray]:
         self.game_over = False
 
         res: GymStepResult = self._single_step(decision_step)
-        return res[0]
+        return res[0], res[4]
 
-    def step(self, action: List[Any]) -> GymStepResult:
+    def step(self, action: Any) -> GymStepResult:
         """Run one timestep of the environment's dynamics. When end of
         episode is reached, you are responsible for calling `reset()`
         to reset this environment's state.
@@ -175,14 +180,15 @@ def step(self, action: List[Any]) -> GymStepResult:
         Returns:
             observation (object/list): agent's observation of the current environment
             reward (float/list) : amount of reward returned after previous action
-            done (boolean/list): whether the episode has ended.
+            terminated (boolean/list): whether the episode has ended by termination.
+            truncated (boolean/list): whether the episode has ended by truncation.
             info (dict): contains auxiliary diagnostic information.
         """
         if self.game_over:
             raise UnityGymException(
                 "You are calling 'step()' even though this environment has already "
-                "returned done = True. You must always call 'reset()' once you "
-                "receive 'done = True'."
+                "returned `terminated` or `truncated` as True. You must always call 'reset()' once you "
+                "receive `terminated` or `truncated` as True."
             )
         if self._flattener is not None:
             # Translate action into list
@@ -227,9 +233,19 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu
             visual_obs = self._get_vis_obs_list(info)
             self.visual_obs = self._preprocess_single(visual_obs[0][0])
 
-        done = isinstance(info, TerminalSteps)
+        if isinstance(info, TerminalSteps):
+            interrupted = info.interrupted
+            terminated, truncated = not interrupted, interrupted
+        else:
+            terminated, truncated = False, False
 
-        return (default_observation, info.reward[0], done, {"step": info})
+        return (
+            default_observation,
+            info.reward[0],
+            terminated,
+            truncated,
+            {"step": info},
+        )
 
     def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray:
         if self.uint8_visual:
@@ -276,7 +292,7 @@ def _get_vec_obs_size(self) -> int:
                 result += obs_spec.shape[0]
         return result
 
-    def render(self, mode="rgb_array"):
+    def render(self):
         """
         Return the latest visual observations.
         Note that it will not render a new frame of the environment.
diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py
index fcbee96151..bd40cb4c01 100644
--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py
@@ -58,12 +58,12 @@ def run(self):
         "Pillow>=4.2.1",
         "protobuf>=3.6,<3.21",
         "pyyaml>=3.1.0",
-        "gym>=0.21.0",
-        "pettingzoo==1.15.0",
-        "numpy>=1.23.5,<1.24.0",
+        "gymnasium>=0.25.0",
+        "pettingzoo>=1.15.0",
+        "numpy>=1.23.5,<2.0",
         "filelock>=3.4.0",
     ],
-    python_requires=">=3.10.1,<=3.10.12",
+    python_requires=">=3.9,<4",
     # TODO: Remove this once mypy stops having spurious setuptools issues.
     cmdclass={"verify": VerifyVersionCommand},  # type: ignore
 )

From 98b22d7557b74b80bf859ab32449a3aabf88d6a7 Mon Sep 17 00:00:00 2001
From: a_zap <test@test>
Date: Wed, 28 May 2025 17:34:29 +0200
Subject: [PATCH 4/9] Created pyproject.toml to support installation of package
 via poetry (without publishing)

---
 ml-agents-envs/pyproject.toml | 38 +++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 ml-agents-envs/pyproject.toml

diff --git a/ml-agents-envs/pyproject.toml b/ml-agents-envs/pyproject.toml
new file mode 100644
index 0000000000..51752d556d
--- /dev/null
+++ b/ml-agents-envs/pyproject.toml
@@ -0,0 +1,38 @@
+[tool.poetry]
+name = "mlagents_envs"
+version = "1.2.0.dev0"
+description = "Unity Machine Learning Agents Interface"
+homepage = "https://github.com/Unity-Technologies/ml-agents"
+authors = ["Unity Technologies <ML-Agents@unity3d.com>"]
+classifiers=[
+        "Intended Audience :: Developers",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+]
+readme = "README.md"
+
+packages = [
+    { include = "mlagents_envs", from = "." },
+]
+include = ["mlagents_envs/*"]
+exclude = ["*.tests", "*.tests.*", "tests.*", "tests", "colabs", "*.ipynb"]
+
+[tool.poetry.dependencies]
+python = "^3.9"
+grpcio = ">=1.11.0,<=1.48.2"
+Pillow = ">=4.2.1"
+protobuf = ">=3.6,<3.21"
+pyyaml = ">=3.1.0"
+gymnasium = ">=0.25.0"
+pettingzoo = ">=1.15.0"
+numpy = ">=1.23.5,<2.0"
+filelock = ">=3.4.0"
+cloudpickle = "*"
+
+[build-system]
+requires = ["poetry-core>=1.9.0"]
+build-backend = "poetry.core.masonry.api"

From 4ca9d8be7ef2a65d6e43a09654281005320bc091 Mon Sep 17 00:00:00 2001
From: alexander-zap <test@test>
Date: Wed, 28 May 2025 18:13:39 +0200
Subject: [PATCH 5/9] Updated imports to use gymnasium instead of gym

---
 colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb             | 4 ++--
 docs/Python-Gym-API.md                                        | 2 +-
 ml-agents-envs/mlagents_envs/envs/unity_aec_env.py            | 2 +-
 ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py       | 2 +-
 .../mlagents_envs/envs/unity_pettingzoo_base_env.py           | 2 +-
 ml-agents-envs/tests/test_gym.py                              | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
index e5d3d45c8b..83aad09aba 100644
--- a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
+++ b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
@@ -161,8 +161,8 @@
     "from pathlib import Path\n",
     "from typing import Callable, Any\n",
     "\n",
-    "import gym\n",
-    "from gym import Env\n",
+    "import gymnasium as gym\n",
+    "from gymnasium import Env\n",
     "\n",
     "from stable_baselines3 import PPO\n",
     "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",
diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md
index 97869899ce..59ce44eeb6 100644
--- a/docs/Python-Gym-API.md
+++ b/docs/Python-Gym-API.md
@@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene.
 Add the following code to the `train_unity.py` file:
 
 ```python
-import gym
+import gymnasium as gym
 
 from baselines import deepq
 from baselines import logger
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
index 4bb6fdf390..bccae65c0f 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
@@ -1,5 +1,5 @@
 from typing import Any, Optional
-from gym import error
+from gymnasium import error
 from mlagents_envs.base_env import BaseEnv
 from pettingzoo import AECEnv
 
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
index 09398d27fa..906905e83b 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
@@ -1,5 +1,5 @@
 from typing import Optional, Dict, Any, Tuple
-from gym import error
+from gymnasium import error
 from mlagents_envs.base_env import BaseEnv
 from pettingzoo import ParallelEnv
 
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
index 3457f18c88..c040050a2b 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
@@ -1,7 +1,7 @@
 import atexit
 from typing import Optional, List, Set, Dict, Any, Tuple
 import numpy as np
-from gym import error, spaces
+from gymnasium import error, spaces
 from mlagents_envs.base_env import BaseEnv, ActionTuple
 from mlagents_envs.envs.env_helpers import _agent_id_to_behavior, _unwrap_batch_steps
 
diff --git a/ml-agents-envs/tests/test_gym.py b/ml-agents-envs/tests/test_gym.py
index 4fc2bf548c..21afdc0c9f 100644
--- a/ml-agents-envs/tests/test_gym.py
+++ b/ml-agents-envs/tests/test_gym.py
@@ -2,7 +2,7 @@
 import pytest
 import numpy as np
 
-from gym import spaces
+from gymnasium import spaces
 
 from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper
 from mlagents_envs.base_env import (

From be47ce09d38bd0207379fda425dba72c284b4c30 Mon Sep 17 00:00:00 2001
From: alexander-zap <test@test>
Date: Fri, 13 Jun 2025 18:53:04 +0200
Subject: [PATCH 6/9] Adjusted pettingzoo environments to support gymnasium
 interface (reset returning obs and info; step returning terminated and
 truncated instead of done)

---
 docs/Python-PettingZoo-API.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/Python-PettingZoo-API.md b/docs/Python-PettingZoo-API.md
index 2c62ed8415..651932a9e4 100644
--- a/docs/Python-PettingZoo-API.md
+++ b/docs/Python-PettingZoo-API.md
@@ -25,13 +25,13 @@ Here's an example of interacting with wrapped environment:
 
 ```python
 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.envs import UnityToPettingZooWrapper
+from mlagents_envs.envs.unity_aec_env import UnityAECEnv
 
 unity_env = UnityEnvironment("StrikersVsGoalie")
-env = UnityToPettingZooWrapper(unity_env)
+env = UnityAECEnv(unity_env)
 env.reset()
 for agent in env.agent_iter():
-    observation, reward, done, info = env.last()
+    observation, reward, terminated, truncated, info = env.last()
     action = policy(observation, agent)
     env.step(action)
 ```

From f3011030c1ad79151cb4a9f49b2f30ec93f91fc9 Mon Sep 17 00:00:00 2001
From: alexander-zap <test@test>
Date: Fri, 13 Jun 2025 21:20:18 +0200
Subject: [PATCH 7/9] Adjusted pettingzoo environments to support gymnasium
 interface (reset returning obs and info; step returning terminated and
 truncated instead of done)

---
 docs/Python-PettingZoo-API-Documentation.md   | 23 ++-----
 ml-agents-envs/README.md                      | 37 +++++++---
 .../mlagents_envs/envs/env_helpers.py         | 20 +++++-
 .../mlagents_envs/envs/unity_aec_env.py       |  9 ++-
 .../mlagents_envs/envs/unity_gym_env.py       |  2 +-
 .../mlagents_envs/envs/unity_parallel_env.py  | 18 +++--
 .../envs/unity_pettingzoo_base_env.py         | 69 ++++++++++++-------
 ml-agents-envs/pyproject.toml                 |  4 +-
 ml-agents-envs/setup.py                       |  6 +-
 9 files changed, 122 insertions(+), 66 deletions(-)

diff --git a/docs/Python-PettingZoo-API-Documentation.md b/docs/Python-PettingZoo-API-Documentation.md
index 233e45e805..423a09d64e 100644
--- a/docs/Python-PettingZoo-API-Documentation.md
+++ b/docs/Python-PettingZoo-API-Documentation.md
@@ -21,7 +21,6 @@
     * [action\_space](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.action_space)
     * [side\_channel](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.side_channel)
     * [reset](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.reset)
-    * [seed](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed)
     * [render](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render)
     * [close](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.close)
 
@@ -137,7 +136,7 @@ Initializes a Unity Parallel environment wrapper.
 #### reset
 
 ```python
- | reset() -> Dict[str, Any]
+ | reset(seed: int | None = None, options: dict | None = None) -> Tuple[Dict[str, Any], Dict[str, Dict]]
 ```
 
 Resets the environment.
@@ -207,32 +206,24 @@ of an environment with `env.side_channel[<name-of-channel>]`.
 #### reset
 
 ```python
- | reset()
+ | reset(seed: int | None = None, options: dict | None = None) -> Any
 ```
 
 Resets the environment.
 
-<a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed"></a>
-#### seed
-
-```python
- | seed(seed=None)
-```
-
-Reseeds the environment (making the resulting environment deterministic).
-`reset()` must be called after `seed()`, and before `step()`.
-
 <a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render"></a>
 #### render
 
 ```python
- | render(mode="human")
+ | render()
 ```
 
 NOT SUPPORTED.
 
-Displays a rendered frame from the environment, if supported.
-Alternate render modes in the default environments are `'rgb_array'`
+Renders the environment as specified by self.render_mode, if supported.
+
+Render mode can be `human` to display a window.
+Other render modes in the default environments are `'rgb_array'`
 which returns a numpy array and is supported by all environments outside of classic,
 and `'ansi'` which returns the strings printed (specific to classic environments).
 
diff --git a/ml-agents-envs/README.md b/ml-agents-envs/README.md
index 4db68723d2..b6b94f7a18 100644
--- a/ml-agents-envs/README.md
+++ b/ml-agents-envs/README.md
@@ -12,14 +12,6 @@ The LLAPI is used by the trainer implementation in `mlagents`.
 `mlagents_envs` can be used independently of `mlagents` for Python
 communication.
 
-## Installation
-
-Install the `mlagents_envs` package with:
-
-```sh
-python -m pip install mlagents_envs==1.1.0
-```
-
 ## Usage & More Information
 
 See
@@ -42,3 +34,32 @@ scene with the ML-Agents SDK, check out the main
 - Communication between Unity and the Python `UnityEnvironment` is not secure.
 - On Linux, ports are not released immediately after the communication closes.
   As such, you cannot reuse ports right after closing a `UnityEnvironment`.
+
+## Development and publishing (Wargaming artifactory)
+
+Since this package does not seem to be maintained anymore be the official developers, we have forked it to the Wargaming gitlab and are maintaining it there.
+Publishing is done via the [Wargaming artifactory](https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple).
+
+To contribute to the `mlagents_envs` package, please work on a branch and create a merge request to `master` once ready.
+Once the merge request is approved and merged to `master` branch, a gitlab pipeline will automatically create a new git tag and publish the new version to the Wargaming artifactory.
+
+## Installation (Wargaming artifactory)
+
+Since publishing is done via the Wargaming artifactory, you can use this package as dependency by adding the following to your `pyproject.toml`:
+
+```toml
+[tool.poetry.dependencies]
+mlagents-envs = { version = "^0.1", source = "artifactory" }
+
+[[tool.poetry.source]]
+name = "artifactory"
+url = "https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple"
+priority = "explicit"
+```
+
+
+Or you can install the `mlagents_envs` package from the Wargaming artifactory using pip:
+
+```bash
+pip install mlagents-envs --extra-index-url https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple
+```
diff --git a/ml-agents-envs/mlagents_envs/envs/env_helpers.py b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
index 768e670603..0c17c2b20d 100644
--- a/ml-agents-envs/mlagents_envs/envs/env_helpers.py
+++ b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
@@ -40,8 +40,6 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
             }
         )
     obs = {k: v if len(v) > 1 else v[0] for k, v in obs.items()}
-    dones = {agent_id: True for agent_id in termination_id}
-    dones.update({agent_id: False for agent_id in decision_id})
     rewards = {
         agent_id: termination_batch.reward[i]
         for i, agent_id in enumerate(termination_id)
@@ -51,19 +49,35 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
     )
     cumulative_rewards = {k: v for k, v in rewards.items()}
     infos = {}
+    terminations = {}
+    truncations = {}
     for i, agent_id in enumerate(decision_id):
         infos[agent_id] = {}
         infos[agent_id]["behavior_name"] = behavior_name
         infos[agent_id]["group_id"] = decision_batch.group_id[i]
         infos[agent_id]["group_reward"] = decision_batch.group_reward[i]
+        truncations[agent_id] = False
+        terminations[agent_id] = False
     for i, agent_id in enumerate(termination_id):
         infos[agent_id] = {}
         infos[agent_id]["behavior_name"] = behavior_name
         infos[agent_id]["group_id"] = termination_batch.group_id[i]
         infos[agent_id]["group_reward"] = termination_batch.group_reward[i]
         infos[agent_id]["interrupted"] = termination_batch.interrupted[i]
+        truncated = termination_batch.interrupted[i]
+        truncations[agent_id] = truncated
+        terminations[agent_id] = not truncated
     id_map = {agent_id: i for i, agent_id in enumerate(decision_id)}
-    return agents, obs, dones, rewards, cumulative_rewards, infos, id_map
+    return (
+        agents,
+        obs,
+        terminations,
+        truncations,
+        rewards,
+        cumulative_rewards,
+        infos,
+        id_map,
+    )
 
 
 def _parse_behavior(full_behavior):
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
index bccae65c0f..d7dea3fc10 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
@@ -53,7 +53,8 @@ def observe(self, agent_id):
         return (
             self._observations[agent_id],
             self._cumm_rewards[agent_id],
-            self._dones[agent_id],
+            self._terminations[agent_id],
+            self._truncations[agent_id],
             self._infos[agent_id],
         )
 
@@ -61,8 +62,10 @@ def last(self, observe=True):
         """
         returns observation, cumulative reward, done, info for the current agent (specified by self.agent_selection)
         """
-        obs, reward, done, info = self.observe(self._agents[self._agent_index])
-        return obs if observe else None, reward, done, info
+        obs, cumm_rewards, terminated, truncated, info = self.observe(
+            self._agents[self._agent_index]
+        )
+        return obs if observe else None, cumm_rewards, terminated, truncated, info
 
     @property
     def agent_selection(self):
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
index 3f0513ffb0..f4209ba837 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
@@ -234,7 +234,7 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu
             self.visual_obs = self._preprocess_single(visual_obs[0][0])
 
         if isinstance(info, TerminalSteps):
-            interrupted = info.interrupted
+            interrupted = info.interrupted[0]
             terminated, truncated = not interrupted, interrupted
         else:
             terminated, truncated = False, False
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
index 906905e83b..85ce904f24 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
@@ -20,13 +20,17 @@ def __init__(self, env: BaseEnv, seed: Optional[int] = None):
         """
         super().__init__(env, seed)
 
-    def reset(self) -> Dict[str, Any]:
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict | None = None,
+    ) -> Tuple[Dict[str, Any], Dict[str, Dict]]:
         """
         Resets the environment.
         """
-        super().reset()
+        super().reset(seed=seed, options=options)
 
-        return self._observations
+        return self._observations, self._infos
 
     def step(self, actions: Dict[str, Any]) -> Tuple:
         self._assert_loaded()
@@ -50,4 +54,10 @@ def step(self, actions: Dict[str, Any]) -> Tuple:
         self._cleanup_agents()
         self._live_agents.sort()  # unnecessary, only for passing API test
 
-        return self._observations, self._rewards, self._dones, self._infos
+        return (
+            self._observations,
+            self._rewards,
+            self._terminations,
+            self._truncations,
+            self._infos,
+        )
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
index c040050a2b..41c1cff8c1 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
@@ -32,7 +32,8 @@ def __init__(
         self._possible_agents: Set[str] = set()  # all agents that have ever appear
         self._agent_id_to_index: Dict[str, int] = {}  # agent_id: index in decision step
         self._observations: Dict[str, np.ndarray] = {}  # agent_id: obs
-        self._dones: Dict[str, bool] = {}  # agent_id: done
+        self._terminations: Dict[str, bool] = {}  # agent_id: terminated
+        self._truncations: Dict[str, bool] = {}  # agent_id: truncated
         self._rewards: Dict[str, float] = {}  # agent_id: reward
         self._cumm_rewards: Dict[str, float] = {}  # agent_id: reward
         self._infos: Dict[str, Dict] = {}  # agent_id: info
@@ -45,7 +46,7 @@ def __init__(
         if not self._env.behavior_specs:
             self._env.step()
             for behavior_name in self._env.behavior_specs.keys():
-                _, _, _ = self._batch_update(behavior_name)
+                _, _, _, _ = self._batch_update(behavior_name)
         self._update_observation_spaces()
         self._update_action_spaces()
 
@@ -162,7 +163,7 @@ def _process_action(self, current_agent, action):
             else:
                 action = ActionTuple(action, None)
 
-        if not self._dones[current_agent]:
+        if not self._terminations[current_agent] or self._truncations[current_agent]:
             current_behavior = _agent_id_to_behavior(current_agent)
             current_index = self._agent_id_to_index[current_agent]
             if action.continuous is not None:
@@ -176,7 +177,8 @@ def _process_action(self, current_agent, action):
         else:
             self._live_agents.remove(current_agent)
             del self._observations[current_agent]
-            del self._dones[current_agent]
+            del self._terminations[current_agent]
+            del self._truncations[current_agent]
             del self._rewards[current_agent]
             del self._cumm_rewards[current_agent]
             del self._infos[current_agent]
@@ -187,15 +189,22 @@ def _step(self):
         self._env.step()
         self._reset_states()
         for behavior_name in self._env.behavior_specs.keys():
-            dones, rewards, cumulative_rewards = self._batch_update(behavior_name)
-            self._dones.update(dones)
+            terminations, truncations, rewards, cumulative_rewards = self._batch_update(
+                behavior_name
+            )
+            self._terminations.update(terminations)
+            self._truncations.update(truncations)
             self._rewards.update(rewards)
             self._cumm_rewards.update(cumulative_rewards)
         self._agent_index = 0
 
     def _cleanup_agents(self):
-        for current_agent, done in self.dones.items():
-            if done:
+        for current_agent, terminated in self.terminations.items():
+            if terminated:
+                self._live_agents.remove(current_agent)
+
+        for current_agent, truncated in self.truncations.items():
+            if truncated:
                 self._live_agents.remove(current_agent)
 
     @property
@@ -226,25 +235,33 @@ def _reset_states(self):
         self._live_agents = []
         self._agents = []
         self._observations = {}
-        self._dones = {}
+        self._terminations = {}
+        self._truncations = {}
         self._rewards = {}
         self._cumm_rewards = {}
         self._infos = {}
         self._agent_id_to_index = {}
 
-    def reset(self):
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict | None = None,
+    ) -> Any:
         """
         Resets the environment.
         """
+        self._seed = seed
+
         self._assert_loaded()
         self._agent_index = 0
         self._reset_states()
         self._possible_agents = set()
         self._env.reset()
         for behavior_name in self._env.behavior_specs.keys():
-            _, _, _ = self._batch_update(behavior_name)
+            _, _, _, _ = self._batch_update(behavior_name)
         self._live_agents.sort()  # unnecessary, only for passing API test
-        self._dones = {agent: False for agent in self._agents}
+        self._terminations = {agent: False for agent in self._agents}
+        self._truncations = {agent: False for agent in self._agents}
         self._rewards = {agent: 0 for agent in self._agents}
         self._cumm_rewards = {agent: 0 for agent in self._agents}
 
@@ -256,7 +273,8 @@ def _batch_update(self, behavior_name):
         (
             agents,
             obs,
-            dones,
+            terminations,
+            truncations,
             rewards,
             cumulative_rewards,
             infos,
@@ -268,29 +286,28 @@ def _batch_update(self, behavior_name):
         self._infos.update(infos)
         self._agent_id_to_index.update(id_map)
         self._possible_agents.update(agents)
-        return dones, rewards, cumulative_rewards
+        return terminations, truncations, rewards, cumulative_rewards
 
-    def seed(self, seed=None):
-        """
-        Reseeds the environment (making the resulting environment deterministic).
-        `reset()` must be called after `seed()`, and before `step()`.
-        """
-        self._seed = seed
-
-    def render(self, mode="human"):
+    def render(self):
         """
         NOT SUPPORTED.
 
-        Displays a rendered frame from the environment, if supported.
-        Alternate render modes in the default environments are `'rgb_array'`
+        Renders the environment as specified by self.render_mode, if supported.
+
+        Render mode can be `human` to display a window.
+        Other render modes in the default environments are `'rgb_array'`
         which returns a numpy array and is supported by all environments outside of classic,
         and `'ansi'` which returns the strings printed (specific to classic environments).
         """
         pass
 
     @property
-    def dones(self):
-        return dict(self._dones)
+    def terminations(self):
+        return dict(self._terminations)
+
+    @property
+    def truncations(self):
+        return dict(self._truncations)
 
     @property
     def agents(self):
diff --git a/ml-agents-envs/pyproject.toml b/ml-agents-envs/pyproject.toml
index 51752d556d..104caf7a78 100644
--- a/ml-agents-envs/pyproject.toml
+++ b/ml-agents-envs/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mlagents_envs"
-version = "1.2.0.dev0"
+version = "None"
 description = "Unity Machine Learning Agents Interface"
 homepage = "https://github.com/Unity-Technologies/ml-agents"
 authors = ["Unity Technologies <ML-Agents@unity3d.com>"]
@@ -28,7 +28,7 @@ Pillow = ">=4.2.1"
 protobuf = ">=3.6,<3.21"
 pyyaml = ">=3.1.0"
 gymnasium = ">=0.25.0"
-pettingzoo = ">=1.15.0"
+pettingzoo = ">=1.22.0"
 numpy = ">=1.23.5,<2.0"
 filelock = ">=3.4.0"
 cloudpickle = "*"
diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py
index bd40cb4c01..2eb0a0401d 100644
--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py
@@ -4,7 +4,7 @@
 from setuptools.command.install import install
 import mlagents_envs
 
-VERSION = mlagents_envs.__version__
+VERSION = (None,)
 EXPECTED_TAG = mlagents_envs.__release_tag__
 
 here = os.path.abspath(os.path.dirname(__file__))
@@ -35,7 +35,7 @@ def run(self):
 
 setup(
     name="mlagents_envs",
-    version=VERSION,
+    version=None,
     description="Unity Machine Learning Agents Interface",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -59,7 +59,7 @@ def run(self):
         "protobuf>=3.6,<3.21",
         "pyyaml>=3.1.0",
         "gymnasium>=0.25.0",
-        "pettingzoo>=1.15.0",
+        "pettingzoo>=1.22.0",
         "numpy>=1.23.5,<2.0",
         "filelock>=3.4.0",
     ],

From 440c5cf86a064551f7adc1171f8820e4e72469dc Mon Sep 17 00:00:00 2001
From: alexander-zap <test@test>
Date: Tue, 17 Jun 2025 18:23:25 +0200
Subject: [PATCH 8/9] Set continuous action space as dtype float64 instead of
 int32

---
 ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
index 41c1cff8c1..34fb60ca68 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
@@ -133,7 +133,7 @@ def _update_action_spaces(self) -> None:
                         continue
                 if act_spec.continuous_size > 0:
                     c_space = spaces.Box(
-                        -1, 1, (act_spec.continuous_size,), dtype=np.int32
+                        -1, 1, (act_spec.continuous_size,), dtype=np.float64
                     )
                     if self._seed is not None:
                         c_space.seed(self._seed)

From f0d25ea1c22b676177b2e1d78a09074f2733ba70 Mon Sep 17 00:00:00 2001
From: alexander-zap <test@test>
Date: Mon, 30 Jun 2025 15:27:59 +0200
Subject: [PATCH 9/9] Fixed interface problems with pettingzoo implementation

- all agents return an observations synchronously (instead of having steps where only one agent returns an observation)
- in case the agent sends a TerminationStep *and* a DecisionStep after, the reward from the termination step should be taken (but for observations it's the other way around because of SB3 convention)
- `agents` attribute should store agents even if they have *just* finished (they should be removed the step after)
- fixed a bug where a continuous action vector was assigned len(action) * action[0]
- fixed double removal of an agent from _live_agents for ParallelEnv
---
 ml-agents-envs/mlagents_envs/base_env.py      | 46 +++++++++++++++++++
 .../mlagents_envs/envs/env_helpers.py         | 18 +++++---
 .../mlagents_envs/envs/unity_parallel_env.py  |  3 +-
 .../envs/unity_pettingzoo_base_env.py         | 32 +++++++++++--
 4 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
index 67deb26e85..eb3acdfff2 100644
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
@@ -138,6 +138,30 @@ def __getitem__(self, agent_id: AgentId) -> DecisionStep:
     def __iter__(self) -> Iterator[Any]:
         yield from self.agent_id
 
+    def __add__(self, other: "DecisionSteps") -> "DecisionSteps":
+        assert isinstance(other, DecisionSteps)
+
+        combined_terminal_steps = DecisionSteps(
+            list(np.hstack([self.obs, other.obs])),
+            np.hstack([self.reward, other.reward]),
+            np.hstack([self.agent_id, other.agent_id]),
+            list(np.hstack([self.action_mask, other.action_mask]))
+            if self.action_mask or other.action_mask
+            else None,
+            np.hstack([self.group_id, other.group_id]),
+            np.hstack([self.group_reward, other.group_reward]),
+        )
+        combined_terminal_steps._agent_id_to_index = {
+            **self.agent_id_to_index,
+            # shift index of added termination steps because of appending
+            **{
+                agent_id: index + len(self)
+                for agent_id, index in other.agent_id_to_index.items()
+            },
+        }
+
+        return combined_terminal_steps
+
     @staticmethod
     def empty(spec: "BehaviorSpec") -> "DecisionSteps":
         """
@@ -245,6 +269,28 @@ def __getitem__(self, agent_id: AgentId) -> TerminalStep:
     def __iter__(self) -> Iterator[Any]:
         yield from self.agent_id
 
+    def __add__(self, other: "TerminalSteps") -> "TerminalSteps":
+        assert isinstance(other, TerminalSteps)
+
+        combined_terminal_steps = TerminalSteps(
+            list(np.hstack([self.obs, other.obs])),
+            np.hstack([self.reward, other.reward]),
+            np.hstack([self.interrupted, other.interrupted]),
+            np.hstack([self.agent_id, other.agent_id]),
+            np.hstack([self.group_id, other.group_id]),
+            np.hstack([self.group_reward, other.group_reward]),
+        )
+        combined_terminal_steps._agent_id_to_index = {
+            **self.agent_id_to_index,
+            # shift index of added termination steps because of appending
+            **{
+                agent_id: index + len(self)
+                for agent_id, index in other.agent_id_to_index.items()
+            },
+        }
+
+        return combined_terminal_steps
+
     @staticmethod
     def empty(spec: "BehaviorSpec") -> "TerminalSteps":
         """
diff --git a/ml-agents-envs/mlagents_envs/envs/env_helpers.py b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
index 0c17c2b20d..7d8ef10687 100644
--- a/ml-agents-envs/mlagents_envs/envs/env_helpers.py
+++ b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
@@ -17,7 +17,11 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
     termination_id = [
         _behavior_to_agent_id(behavior_name, i) for i in termination_batch.agent_id
     ]
-    agents = decision_id + termination_id
+    agents = decision_id
+    for id in termination_id:
+        if id not in agents:
+            agents.append(id)
+
     obs = {
         agent_id: [batch_obs[i] for batch_obs in termination_batch.obs]
         for i, agent_id in enumerate(termination_id)
@@ -41,11 +45,13 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
         )
     obs = {k: v if len(v) > 1 else v[0] for k, v in obs.items()}
     rewards = {
-        agent_id: termination_batch.reward[i]
-        for i, agent_id in enumerate(termination_id)
+        agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)
     }
     rewards.update(
-        {agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)}
+        {
+            agent_id: termination_batch.reward[i]
+            for i, agent_id in enumerate(termination_id)
+        }
     )
     cumulative_rewards = {k: v for k, v in rewards.items()}
     infos = {}
@@ -63,8 +69,8 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
         infos[agent_id]["behavior_name"] = behavior_name
         infos[agent_id]["group_id"] = termination_batch.group_id[i]
         infos[agent_id]["group_reward"] = termination_batch.group_reward[i]
-        infos[agent_id]["interrupted"] = termination_batch.interrupted[i]
-        truncated = termination_batch.interrupted[i]
+        truncated = bool(termination_batch.interrupted[i])
+        infos[agent_id]["interrupted"] = truncated
         truncations[agent_id] = truncated
         terminations[agent_id] = not truncated
     id_map = {agent_id: i for i, agent_id in enumerate(decision_id)}
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
index 85ce904f24..9121199e4a 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py
@@ -50,8 +50,7 @@ def step(self, actions: Dict[str, Any]) -> Tuple:
         # Step environment
         self._step()
 
-        # Agent cleanup and sorting
-        self._cleanup_agents()
+        # Agent sorting
         self._live_agents.sort()  # unnecessary, only for passing API test
 
         return (
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
index 34fb60ca68..3c62885f12 100644
--- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
+++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py
@@ -169,7 +169,7 @@ def _process_action(self, current_agent, action):
             if action.continuous is not None:
                 self._current_action[current_behavior].continuous[
                     current_index
-                ] = action.continuous[0]
+                ] = action.continuous
             if action.discrete is not None:
                 self._current_action[current_behavior].discrete[
                     current_index
@@ -186,7 +186,33 @@ def _process_action(self, current_agent, action):
     def _step(self):
         for behavior_name, actions in self._current_action.items():
             self._env.set_actions(behavior_name, actions)
-        self._env.step()
+
+        def step_and_return_steps(behavior_name):
+            self._env.step()
+            decision_steps, termination_steps = self._env.get_steps(behavior_name)
+            return decision_steps, termination_steps
+
+        # DecisionSteps are assumed come in synchronously at every `DecisionPeriod` frame,
+        #   but TerminationSteps can be sent inbetween. Therefore, to collect step information about all agents,
+        #   we need to continue stepping the environment.
+        # NOTE: This can lead to returning TerminationSteps and subsequent DecisionSteps at the same time for an agent
+        #   (but this was also possible before).
+        for behavior_name in self._env.behavior_specs.keys():
+            decision_steps, termination_steps = step_and_return_steps(behavior_name)
+            collected_decision_steps = decision_steps
+            collected_termination_steps = termination_steps
+            while not len(set(collected_decision_steps.agent_id)) >= len(self._agents):
+                decision_steps, termination_steps = step_and_return_steps(behavior_name)
+                if len(decision_steps) > 0:
+                    collected_decision_steps += decision_steps
+                if len(termination_steps) > 0:
+                    collected_termination_steps += termination_steps
+
+        self._env._env_state[behavior_name] = (
+            collected_decision_steps,
+            collected_termination_steps,
+        )
+
         self._reset_states()
         for behavior_name in self._env.behavior_specs.keys():
             terminations, truncations, rewards, cumulative_rewards = self._batch_update(
@@ -311,7 +337,7 @@ def truncations(self):
 
     @property
     def agents(self):
-        return sorted(self._live_agents)
+        return sorted(self._agents)
 
     @property
     def rewards(self):