LBANN
diff --git a/‎DGraph/Communicator.py‎
Lines changed: 32 additions & 2 deletions b/‎DGraph/Communicator.py‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎DGraph/CommunicatorBase.py‎
Lines changed: 1 addition & 0 deletions b/‎DGraph/CommunicatorBase.py‎
Lines changed: 1 addition & 0 deletions
@@ -12,9 +12,9 @@
 #
 # SPDX-License-Identifier: (Apache-2.0)
 import torch
-from DGraph.distributed.mpi import MPIBackendEngine
+
 from DGraph.distributed.nccl import NCCLBackendEngine
-from DGraph.distributed.nvshmem import NVSHMEMBackendEngine
+
 from DGraph.CommunicatorBase import CommunicatorBase
 
 SUPPORTED_BACKENDS = ["nccl", "mpi", "nvshmem"]
@@ -38,8 +38,12 @@ def __init__(self, backend: str, **kwargs) -> None:
         if backend == "nccl":
             self.__backend_engine = NCCLBackendEngine()
         elif backend == "mpi":
+            from DGraph.distributed.mpi import MPIBackendEngine
+
             self.__backend_engine = MPIBackendEngine(**kwargs)
         elif backend == "nvshmem":
+            from DGraph.distributed.nvshmem import NVSHMEMBackendEngine
+
             self.__backend_engine = NVSHMEMBackendEngine()
         else:
             raise NotImplementedError(f"Backend {backend} not implemented")
@@ -65,6 +69,32 @@ def get_local_rank_slice(self, tensor: torch.Tensor, dim: int = -1) -> torch.Ten
         self.__check_init()
         return self.__backend_engine.get_local_rank_slice(tensor, dim)
 
+    def get_local_tensor(
+        self, tensor: torch.Tensor, placement_tensor: torch.Tensor, dim: int = -1
+    ) -> torch.Tensor:
+        """Returns the tensor corresponding to the current process based on the placement tensor.
+
+        Args:
+            tensor: The tensor to be sliced.
+            placement_tensor: A boolean tensor of the same shape as the tensor, where True indicates the process
+                that should receive the corresponding element.
+            dim: The dimension along which the tensor should be sliced.
+
+        Returns:
+            (torch.Tensor): The local tensor corresponding to the current process.
+        """
+        self.__check_init()
+        mask = (placement_tensor == self.get_rank()).bool()
+        mask_shape = [1] * tensor.ndim
+        mask_shape[dim] = mask.size(0)
+        mask_expanded = mask.view(mask_shape).expand_as(tensor)
+        masked_tensor = tensor[mask_expanded]
+        new_shape = list(tensor.shape)
+        new_shape[dim] = int(mask.sum().item())
+        masked_tensor = masked_tensor.view(new_shape)
+
+        return masked_tensor
+
     def scatter(self, *args, **kwargs) -> torch.Tensor:
         self.__check_init()
         return self.__backend_engine.scatter(*args, **kwargs)
 
@@ -15,6 +15,7 @@ class CommunicatorBase:
     _is_initialized = False
 
     def __init__(self):
+        self.backend = ""
         pass
 
     def init_process_group(self, backend: str, **kwargs):