Fung-Lab
diff --git a/‎data/test_data/processed/data.pt‎
2.1 MB b/‎data/test_data/processed/data.pt‎
2.1 MB
diff --git a/‎matdeeplearn/common/data.py‎
Lines changed: 2 additions & 1 deletion b/‎matdeeplearn/common/data.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎matdeeplearn/preprocessor/datasets.py‎
Lines changed: 19 additions & 15 deletions b/‎matdeeplearn/preprocessor/datasets.py‎
Lines changed: 19 additions & 15 deletions
diff --git a/‎matdeeplearn/preprocessor/helpers.py‎
Lines changed: 55 additions & 70 deletions b/‎matdeeplearn/preprocessor/helpers.py‎
Lines changed: 55 additions & 70 deletions
@@ -103,6 +103,7 @@ def get_dataloader(
     batch_size: int,
     num_workers: int = 0,
     sampler=None,
+    shuffle=True
 ):
     """
     Returns a single dataloader for a given dataset
@@ -124,7 +125,7 @@ def get_dataloader(
     loader = DataLoader(
         dataset,
         batch_size=batch_size,
-        shuffle=(sampler is None),
+        shuffle=shuffle,
         num_workers=num_workers,
         sampler=sampler,
     )
 
@@ -1,29 +1,34 @@
-import os
+import torch, os
 
-import torch
 from torch_geometric.data import InMemoryDataset
 
-
 class StructureDataset(InMemoryDataset):
     def __init__(
         self,
-        root,
-        processed_data_path,
-        transform=None,
-        pre_transform=None,
+        root, 
+        processed_data_path, 
+        transform=None, 
+        pre_transform=None, 
         pre_filter=None,
+        device=None
     ):
         self.root = root
         self.processed_data_path = processed_data_path
-        super(StructureDataset, self).__init__(
-            root, transform, pre_transform, pre_filter
-        )
-        self.data, self.slices = torch.load(self.processed_paths[0])
-
+        super(StructureDataset, self).__init__(root, transform, pre_transform, pre_filter)
+
+        if not torch.cuda.is_available() or device == "cpu":
+            self.data, self.slices = torch.load(
+                self.processed_paths[0],
+                map_location=torch.device('cpu')
+            )
+        else:
+            self.data, self.slices = torch.load(self.processed_paths[0])
+            
+    
     @property
     def raw_file_names(self):
         """
-        The name of the files in the self.raw_dir folder
+        The name of the files in the self.raw_dir folder 
         that must be present in order to skip downloading.
         """
         return []
@@ -41,11 +46,10 @@ def processed_dir(self):
     @property
     def processed_file_names(self):
         """
-        The name of the files in the self.processed_dir
+        The name of the files in the self.processed_dir 
         folder that must be present in order to skip processing.
         """
         return ["data.pt"]
 
-
 class LargeStructureDataset(InMemoryDataset):
     pass
@@ -1,35 +1,36 @@
+import numpy as np
+import ase
+from ase import io
+import torch
 import itertools
 from pathlib import Path
 
-import ase
-import numpy as np
 import torch
 import torch.nn.functional as F
-from ase import io
+from torch_geometric.utils import dense_to_sparse, degree, add_self_loops
 from torch_geometric.data.data import Data
-from torch_geometric.utils import add_self_loops, degree, dense_to_sparse
-
 
 def threshold_sort(all_distances, r, n_neighbors):
-    A = all_distances.clone().detach()
+    # A = all_distances.clone().detach()
+    A = all_distances
 
     # set diagonal to zero to exclude self-loop distance
-    A.fill_diagonal_(0)
+    # A.fill_diagonal_(0)
 
     # keep n_neighbors only
     N = len(A) - n_neighbors - 1
     if N > 0:
         _, indices = torch.topk(A, N)
-        A.scatter_(
-            1,
-            indices,
-            torch.zeros(len(A), len(A), device=all_distances.device, dtype=torch.float),
+        A = torch.scatter(
+            A,
+            1, indices, torch.zeros(len(A), len(A), 
+            device=all_distances.device, 
+            dtype=torch.float)
         )
 
     A[A > r] = 0
     return A
 
-
 def one_hot_degree(data, max_degree, in_degree=False, cat=True):
     idx, x = data.edge_index[1 if in_degree else 0], data.x
     deg = degree(idx, data.num_nodes, dtype=torch.long)
@@ -48,10 +49,7 @@ class GaussianSmearing(torch.nn.Module):
     """
     slightly edited version from pytorch geometric to create edge from gaussian basis
     """
-
-    def __init__(
-        self, start=0.0, stop=5.0, resolution=50, width=0.05, device="cpu", **kwargs
-    ):
+    def __init__(self, start=0.0, stop=5.0, resolution=50, width=0.05, device="cpu", **kwargs):
         super(GaussianSmearing, self).__init__()
         offset = torch.linspace(start, stop, resolution, device=device)
         # self.coeff = -0.5 / (offset[1] - offset[0]).item() ** 2
@@ -62,7 +60,6 @@ def forward(self, dist):
         dist = dist.unsqueeze(-1) - self.offset.view(1, -1)
         return torch.exp(self.coeff * torch.pow(dist, 2))
 
-
 def normalize_edge(dataset, descriptor_label):
     mean, std, feature_min, feature_max = get_ranges(dataset, descriptor_label)
 
@@ -71,6 +68,9 @@ def normalize_edge(dataset, descriptor_label):
             data.edge_descriptor[descriptor_label] - feature_min
         ) / (feature_max - feature_min)
 
+def normalize_edge_cutoff(dataset, descriptor_label, r):
+    for data in dataset:
+        data.edge_descriptor[descriptor_label] = data.edge_descriptor[descriptor_label] / r
 
 def get_ranges(dataset, descriptor_label):
     mean = 0.0
@@ -91,42 +91,39 @@ def get_ranges(dataset, descriptor_label):
     std = std / len(dataset)
     return mean, std, feature_min, feature_max
 
-
 def clean_up(data_list, attr_list):
     if not attr_list:
-        return data_list
-
+        return
+    
+    # check which attributes in the list are removable
+    removable_attrs = [t for t in attr_list if t in data_list[0].to_dict()]
     for data in data_list:
-        for attr in attr_list:
-            try:
-                delattr(data, attr)
-            except AttributeError:
-                continue
-
+        for attr in removable_attrs:
+            delattr(data, attr)
 
 def get_distances(
     positions: torch.Tensor,
     offsets: torch.Tensor,
     device: str = "cpu",
-    mic: bool = True,
+    mic: bool = True
 ):
     """
     Get pairwise atomic distances
 
     Parameters
         positions:  torch.Tensor
                     positions of atoms in a unit cell
-
+        
         offsets:    torch.Tensor
                     offsets for the unit cell
-
+        
         device:     str
                     torch device type
-
+        
         mic:        bool
                     minimum image convention
     """
-
+    
     # convert numpy array to torch tensors
     n_atoms = len(positions)
     n_cells = len(offsets)
@@ -143,16 +140,14 @@ def get_distances(
     # this allows us to get the minimum self-loop distance
     # of an atom to itself in all other images
     origin_unit_cell_idx = 13
-    atomic_distances[:, :, origin_unit_cell_idx].fill_diagonal_(float("inf"))
+    # atomic_distances[:,:,origin_unit_cell_idx].fill_diagonal_(float("inf"))
 
     # get minimum
     min_atomic_distances, min_indices = torch.min(atomic_distances, dim=-1)
     expanded_min_indices = min_indices.clone().detach()
 
     atom_rij = pos1 - pos2
-    expanded_min_indices = expanded_min_indices[..., None, None].expand(
-        -1, -1, 1, atom_rij.size(3)
-    )
+    expanded_min_indices = expanded_min_indices[..., None, None].expand(-1, -1, 1, atom_rij.size(3))
     atom_rij = torch.gather(atom_rij, dim=2, index=expanded_min_indices).squeeze()
 
     return min_atomic_distances, min_indices
@@ -161,7 +156,7 @@ def get_distances(
 def get_pbc_cells(cell: torch.Tensor, offset_number: int, device: str = "cpu"):
     """
     Get the periodic boundary condition (PBC) offsets for a unit cell
-
+    
     Parameters
         cell:       torch.Tensor
                     unit cell vectors of ase.cell.Cell
@@ -172,25 +167,22 @@ def get_pbc_cells(cell: torch.Tensor, offset_number: int, device: str = "cpu"):
                     if == 1: 27-cell offsets (3x3x3)
     """
 
-    _range = np.arange(-offset_number, offset_number + 1)
+    _range = np.arange(-offset_number, offset_number+1)
     offsets = [list(x) for x in itertools.product(_range, _range, _range)]
     offsets = torch.tensor(offsets, device=device, dtype=torch.float)
     return offsets @ cell, offsets
 
-
-def get_cutoff_distance_matrix(
-    pos, cell, r, n_neighbors, device, image_selfloop, offset_number=1
-):
+def get_cutoff_distance_matrix(pos, cell, r, n_neighbors, device, image_selfloop, offset_number=1):
     """
     get the distance matrix
     TODO: need to tune this for elongated structures
 
     Parameters
     ----------
-        pos: np.ndarray
+        pos: np.ndarray 
             positions of atoms in a unit cell
             get from crystal.get_positions()
-
+        
         cell: np.ndarray
             unit cell of a ase Atoms object
 
@@ -206,11 +198,11 @@ def get_cutoff_distance_matrix(
 
     cutoff_distance_matrix = threshold_sort(distance_matrix, r, n_neighbors)
 
-    if image_selfloop:
-        # output of threshold sort has diagonal == 0
-        # fill in the original values
-        self_loop_diag = distance_matrix.diagonal()
-        cutoff_distance_matrix.diagonal().copy_(self_loop_diag)
+    # if image_selfloop:
+    #     # output of threshold sort has diagonal == 0
+    #     # fill in the original values
+    #     self_loop_diag = distance_matrix.diagonal()
+    #     cutoff_distance_matrix.diagonal().copy_(self_loop_diag)
 
     all_cell_offsets = cell_coors[torch.flatten(min_indices)]
     all_cell_offsets = all_cell_offsets.view(len(pos), -1, 3)
@@ -222,15 +214,12 @@ def get_cutoff_distance_matrix(
     # thus initialize a zero matrix of (M+N, 3) for cell offsets
     n_edges = torch.count_nonzero(cutoff_distance_matrix).item()
     cell_offsets = torch.zeros(n_edges + len(pos), 3, dtype=torch.float)
-    # get cells for edges except for self loops
+    # get cells for edges except for self loops 
     cell_offsets[:n_edges, :] = all_cell_offsets[cutoff_distance_matrix != 0]
 
     return cutoff_distance_matrix, cell_offsets
 
-
-def add_selfloop(
-    num_nodes, edge_indices, edge_weights, cutoff_distance_matrix, self_loop=True
-):
+def add_selfloop(num_nodes, edge_indices, edge_weights, cutoff_distance_matrix, self_loop=True):
     """
     add self loop (i, i) to graph structure
 
@@ -250,15 +239,16 @@ def add_selfloop(
     distance_matrix_masked = (cutoff_distance_matrix.fill_diagonal_(1) != 0).int()
     return edge_indices, edge_weights, distance_matrix_masked
 
-
 def load_node_representation(node_representation="onehot"):
     node_rep_path = Path(__file__).parent
-    default_reps = {"onehot": str(node_rep_path / "./node_representations/onehot.csv")}
+    default_reps = {
+        "onehot": str(node_rep_path / "./node_representations/onehot.csv")
+    }
 
     rep_file_path = node_representation
     if node_representation in default_reps:
         rep_file_path = default_reps[node_representation]
-
+    
     file_type = rep_file_path.split(".")[-1]
     loaded_rep = None
 
@@ -273,33 +263,28 @@ def load_node_representation(node_representation="onehot"):
 
     return loaded_rep
 
-
 def generate_node_features(input_data, n_neighbors, device):
     node_reps = load_node_representation()
     node_reps = torch.from_numpy(node_reps).to(device)
     n_elements, n_features = node_reps.shape
-
+    
     if isinstance(input_data, Data):
-        input_data.x = node_reps[input_data.z - 1].view(-1, n_features)
-        return one_hot_degree(input_data, n_neighbors + 1)
+        input_data.x = node_reps[input_data.z-1].view(-1,n_features)
+        return one_hot_degree(input_data, n_neighbors+1)
 
     for i, data in enumerate(input_data):
         # minus 1 as the reps are 0-indexed but atomic number starts from 1
-        data.x = node_reps[data.z - 1].view(-1, n_features)
+        data.x = node_reps[data.z-1].view(-1,n_features)
 
     for i, data in enumerate(input_data):
-        input_data[i] = one_hot_degree(data, n_neighbors + 1)
-
+        input_data[i] = one_hot_degree(data, n_neighbors+1)
 
-def generate_edge_features(input_data, edge_steps, device):
+def generate_edge_features(input_data, edge_steps, r, device):
     distance_gaussian = GaussianSmearing(0, 1, edge_steps, 0.2, device=device)
 
     if isinstance(input_data, Data):
-        input_data.edge_attr = distance_gaussian(input_data.edge_descriptor["distance"])
-        return
+        input_data = [input_data]
 
-    normalize_edge(input_data, "distance")
+    normalize_edge_cutoff(input_data, "distance", r)
     for i, data in enumerate(input_data):
-        input_data[i].edge_attr = distance_gaussian(
-            input_data[i].edge_descriptor["distance"]
-        )
+        input_data[i].edge_attr = distance_gaussian(input_data[i].edge_descriptor["distance"])