init

pytorch · vmoens · Jun 12, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
commit 5967d006d26285eaadaffc47392f1e6b40cf4e03
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -926,7 +926,9 @@ def __repr__(self):
         )
 
     @classmethod
-    def _find_start_stop_traj(cls, *, trajectory=None, end=None, at_capacity: bool):
+    def _find_start_stop_traj(
+        cls, *, trajectory=None, end=None, at_capacity: bool, cursor=None
+    ):
         if trajectory is not None:
             # slower
             # _, stop_idx = torch.unique_consecutive(trajectory, return_counts=True)
@@ -954,12 +956,22 @@ def _find_start_stop_traj(cls, *, trajectory=None, end=None, at_capacity: bool):
                 dim=0,
                 value=1,
             )
-        elif not end.any(0).all():
-            # we must have at least one end by traj to delimitate trajectories
+        else:
+            # we must have at least one end by traj to individuate trajectories
             # so if no end can be found we set it manually
-            mask = ~end.any(0, True)
-            mask = torch.cat([torch.zeros_like(end[:-1]), mask])
-            end = torch.masked_fill(mask, end, 1)
+            if cursor is not None:
+                if isinstance(cursor, torch.Tensor):
+                    cursor = cursor[-1].item()
+                end = torch.index_fill(
+                    end,
+                    index=torch.tensor(cursor, device=end.device, dtype=torch.long),
+                    dim=0,
+                    value=1,
+                )
+            if not end.any(0).all():
+                mask = ~end.any(0, True)
+                mask = torch.cat([torch.zeros_like(end[:-1]), mask])
+                end = torch.masked_fill(mask, end, 1)
         ndim = end.ndim
         if ndim == 0:
             raise RuntimeError(
@@ -1032,6 +1044,7 @@ def _tensor_slices_from_startend(self, seq_length, start, storage_length):
         return result
 
     def _get_stop_and_length(self, storage, fallback=True):
+        last_cursor = getattr(storage, "_last_cursor", None)
         if self.cache_values and "stop-and-length" in self._cache:
             return self._cache.get("stop-and-length")
 
@@ -1072,7 +1085,9 @@ def _get_stop_and_length(self, storage, fallback=True):
                         "Could not get a tensordict out of the storage, which is required for SliceSampler to compute the trajectories."
                     )
                 vals = self._find_start_stop_traj(
-                    end=done.squeeze()[: len(storage)], at_capacity=storage._is_full
+                    end=done.squeeze()[: len(storage)],
+                    at_capacity=storage._is_full,
+                    cursor=getattr(storage, "_last_cursor", None),
                 )
                 if self.cache_values:
                     self._cache["stop-and-length"] = vals

diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -2609,9 +2609,10 @@ def rollout(
             for key in self.done_keys:
                 if _ends_with(key, "truncated"):
                     val = out_td.get(("next", key))
+                    done = out_td.get(("next", _replace_last(key, "done")))
                     val[(slice(None),) * (out_td.ndim - 1) + (-1,)] = True
                     out_td.set(("next", key), val)
-                    out_td.set(("next", _replace_last(key, "done")), val)
+                    out_td.set(("next", _replace_last(key, "done")), val | done)
                     found_truncated = True
             if not found_truncated:
                 raise RuntimeError(

diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -3048,7 +3048,7 @@ def unfold_done(done, N):
             reset_unfold_list = [torch.zeros_like(reset_unfold_slice)]
             for r in reversed(reset_unfold.unbind(-1)):
                 reset_unfold_list.append(r | reset_unfold_list[-1])
-                reset_unfold_slice = reset_unfold_list[-1]
+                # reset_unfold_slice = reset_unfold_list[-1]
             reset_unfold = torch.stack(list(reversed(reset_unfold_list))[1:], -1)
             reset = reset[prefix + (slice(self.N - 1, None),)]
             reset[prefix + (0,)] = 1