Enhance ReplayBuffer's feed_forward_generator to support sampling with replacement (#14)

GiulioRomualdi · web-flow · commit fb59fb32cf92 · 2025-05-09T16:38:47.000+02:00
diff --git a/amp_rsl_rl/algorithms/amp_ppo.py b/amp_rsl_rl/algorithms/amp_ppo.py
@@ -376,10 +376,11 @@ def update(self) -> Tuple[float, float, float, float, float, float, float, float
 
         # Generator for policy-generated AMP transitions.
         amp_policy_generator = self.amp_storage.feed_forward_generator(
-            self.num_learning_epochs * self.num_mini_batches,
-            self.storage.num_envs
+            num_mini_batch=self.num_learning_epochs * self.num_mini_batches,
+            mini_batch_size=self.storage.num_envs
             * self.storage.num_transitions_per_env
             // self.num_mini_batches,
+            allow_replacement=True,
         )
 
         # Generator for expert AMP data.
diff --git a/amp_rsl_rl/storage/replay_buffer.py b/amp_rsl_rl/storage/replay_buffer.py
@@ -87,25 +87,46 @@ def feed_forward_generator(
         self,
         num_mini_batch: int,
         mini_batch_size: int,
+        allow_replacement: bool = True,
     ) -> Generator[Tuple[torch.Tensor, torch.Tensor], None, None]:
         """
-        Yield mini-batches of (state, next_state) tuples from the buffer.
-
-        Args:
-            num_mini_batch (int): Number of mini-batches to generate.
-            mini_batch_size (int): Number of samples per mini-batch.
-
-        Yields:
-            Tuple[Tensor, Tensor]: A mini-batch of states and next_states.
+        Yield `num_mini_batch` mini‑batches of (state, next_state) tuples from the buffer,
+        each of length `mini_batch_size`.
+
+        If the total number of requested samples is larger than the number of
+        items currently stored (`len(self)`), the method will
+
+        * raise an error  when `allow_replacement=False`;
+        * silently sample **with replacement** when `allow_replacement=True`
+          (the default).
+
+        Args
+        ----
+        num_mini_batch : int
+        mini_batch_size : int
+        allow_replacement : bool, optional
+            Whether to allow sampling with replacement when the request
+            exceeds the number of stored transitions.
         """
         total = num_mini_batch * mini_batch_size
-        assert (
-            total <= self.num_samples
-        ), f"Not enough samples in buffer: requested {total}, but have {self.num_samples}"
 
-        # Generate a random permutation of valid indices on-device
-        indices = torch.randperm(self.num_samples, device=self.device)[:total]
+        # Sampling with replacement might yield duplicate samples, which can affect training dynamics
+        if total > self.num_samples:
+            if not allow_replacement:
+                raise ValueError(
+                    f"Not enough samples in buffer: requested {total}, "
+                    f"but have {self.num_samples}"
+                )
+            # Permute‑then‑modulo
+            cycles = (total + self.num_samples - 1) // self.num_samples
+            big_size = self.num_samples * cycles
+            big_perm = torch.randperm(big_size, device=self.device)
+            indices = big_perm[:total] % self.num_samples
+        else:
+            # Sample WITHOUT replacement
+            indices = torch.randperm(self.num_samples, device=self.device)[:total]
 
+        # Yield the mini‑batches
         for i in range(num_mini_batch):
             batch_idx = indices[i * mini_batch_size : (i + 1) * mini_batch_size]
             yield self.states[batch_idx], self.next_states[batch_idx]