Optimized cache generation with scatter call and inverse_indices

szaman19 · szaman19 · commit 82b48e9fc185 · 2025-08-25T22:39:53.000-07:00
Add standalone file to generation cache to asynchronously generate and save caches
- Update run code to load pre-saved cache files
diff --git a/DGraph/distributed/RankLocalOps.py b/DGraph/distributed/RankLocalOps.py
@@ -140,7 +140,13 @@ def RankLocalRenumberingWithMapping(_indices, rank_mapping):
     unique_indices, inverse_indices = torch.unique(_indices, return_inverse=True)
     rank_mapping = rank_mapping.to(_indices.device)
     renumbered_indices = inverse_indices
+<<<<<<< HEAD
     unique_rank_mapping = torch.zeros_like(unique_indices, dtype=rank_mapping.dtype, device=rank_mapping.device)
+=======
+    unique_rank_mapping = torch.zeros_like(
+        unique_indices, dtype=rank_mapping.dtype, device=rank_mapping.device
+    )
+>>>>>>> a18faef (Optimized cache generation with scatter call and inverse_indices)
     unique_rank_mapping.scatter_(0, inverse_indices, rank_mapping)
 
     return renumbered_indices, unique_indices, unique_rank_mapping
diff --git a/experiments/OGB/GenerateCache.py b/experiments/OGB/GenerateCache.py
@@ -30,6 +30,7 @@
     "ogbn-arxiv": "arxiv",
     "ogbn-products": "products",
     "ogbn-papers100M": "papers100M",
+    "ogbn-proteins": "proteins",
 }
 
 
@@ -85,7 +86,7 @@ def generate_cache_file(
 
 
 def main(dset: str, world_size: int, node_rank_placement_file: str):
-    assert dset in ["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"]
+    assert dset in ["ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "ogbn-proteins"]
 
     assert world_size > 0
     assert os.path.exists(
diff --git a/experiments/OGB/main.py b/experiments/OGB/main.py
@@ -157,7 +157,7 @@ def _run_experiment(
 
         # This says where the edges are located
         edge_placement = rank_mappings[0]
-        
+
         cache_prefix = f"cache/{dset_name}"
         scatter_cache_file = f"{cache_prefix}_scatter_cache_{world_size}_{rank}.pt"
         gather_cache_file = f"{cache_prefix}_gather_cache_{world_size}_{rank}.pt"
@@ -187,8 +187,8 @@ def _run_experiment(
                 world_size,
             )
             with open(f"{log_prefix}_gather_cache_{world_size}_{rank}.pt", "wb") as f:
-                torch.save(gather_cache, f) 
-        
+                torch.save(gather_cache, f)
+
         if scatter_cache is None:
             nodes_per_rank = dataset.graph_obj.get_nodes_per_rank()
 
@@ -230,12 +230,11 @@ def _run_experiment(
         end_time = perf_counter()
         print(f"Rank: {rank} Cache Generation Time: {end_time - start_time:.4f} s")
 
-        
-        #with open(f"{log_prefix}_gather_cache_{world_size}_{rank}.pt", "wb") as f:
+        # with open(f"{log_prefix}_gather_cache_{world_size}_{rank}.pt", "wb") as f:
         #    torch.save(gather_cache, f)
-        #with open(f"{log_prefix}_scatter_cache_{world_size}_{rank}.pt", "wb") as f:
+        # with open(f"{log_prefix}_scatter_cache_{world_size}_{rank}.pt", "wb") as f:
         #    torch.save(scatter_cache, f)
-        #print(f"Rank: {rank} Cache Generated")
+        # print(f"Rank: {rank} Cache Generated")
 
     training_times = []
     for i in range(epochs):
@@ -391,7 +390,7 @@ def main(
             use_cache=use_cache,
             num_classes=num_classes,
             dset_name=dset_name,
-            in_dim=in_dims[dset_name]
+            in_dim=in_dims[dset_name],
         )
         training_trajectores[i] = training_traj
         validation_trajectores[i] = val_traj