Fung-Lab
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.isort.cfg‎
Lines changed: 2 additions & 0 deletions b/‎.isort.cfg‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 51 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 15 additions & 1 deletion b/‎README.md‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎data/pt_data_forces_500/pt_data_forces_500.json‎
Lines changed: 1 addition & 1 deletion b/‎data/pt_data_forces_500/pt_data_forces_500.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎matdeeplearn/README.md‎
Lines changed: 0 additions & 1 deletion b/‎matdeeplearn/README.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎matdeeplearn/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎matdeeplearn/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎matdeeplearn/common/config/build_config.py‎
Lines changed: 5 additions & 6 deletions b/‎matdeeplearn/common/config/build_config.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎matdeeplearn/common/config/flags.py‎
Lines changed: 4 additions & 7 deletions b/‎matdeeplearn/common/config/flags.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎matdeeplearn/common/data.py‎
Lines changed: 28 additions & 33 deletions b/‎matdeeplearn/common/data.py‎
Lines changed: 28 additions & 33 deletions
@@ -173,4 +173,4 @@ server/
 main.py
 
 test*.py
-test*.ipynb
+test*.ipynb
@@ -0,0 +1,2 @@
+[settings]
+known_third_party = ase,numpy,pandas,scipy,setuptools,torch,torch_geometric,torch_scatter,tqdm,yaml
@@ -0,0 +1,51 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+#    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+# isort
+-   repo: https://github.com/asottile/seed-isort-config
+    rev: v2.2.0
+    hooks:
+    -   id: seed-isort-config
+-   repo: https://github.com/pre-commit/mirrors-isort
+    rev: v5.10.1
+    hooks:
+    -   id: isort
+        args: ["--profile", "black"]
+# flake8
+-   repo: https://github.com/pycqa/flake8
+    rev: 5.0.4
+    hooks:
+    -   id: flake8
+        args: # arguments to configure flake8
+        # making isort line length compatible with black
+        -   "--max-line-length=88"
+        -   "--max-complexity=18"
+        -   "--select=B,C,E,F,W,T4,B9"
+        # these are errors that will be ignored by flake8
+        # check out their meaning here
+        # https://flake8.pycqa.org/en/latest/user/error-codes.html
+        -   "--ignore=E203,E266,E501,W503,F403,F401,E402"
+# black
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black
+        args: # arguments to configure black
+        -   --line-length=88
+        -   --include='\.pyi?$'
+        # these folders wont be formatted by black
+        -   --exclude="""\.git |
+            \.__pycache__|
+            \.hg|
+            \.mypy_cache|
+            \.tox|
+            \.venv|
+            _build|
+            buck-out|
+            build|
+            dist"""
+        language_version: python3.9
@@ -1 +1,15 @@
-pip install -e .
+## Development
+
+Install matdeeplearn with `pip install -e .`
+
+#### Code Quality
+This project uses flake8, black, and isort for linting.
+To install the pre-commit git hook, run:
+```
+pre-commit install
+```
+By default, the hooks will run every time you say:
+```
+git commit -m "Commit message"
+```
+However, for more information, please see: https://pre-commit.com/#usage
@@ -1 +0,0 @@
-
@@ -1,4 +1,4 @@
+from matdeeplearn.common.data import *
+
 from .models import *
 from .preprocessor import *
-
-from matdeeplearn.common.data import *
@@ -1,11 +1,11 @@
-
 import ast
 import copy
 import logging
 import os
+from pathlib import Path
+
 import yaml
 
-from pathlib import Path
 
 def merge_dicts(dict1: dict, dict2: dict):
     """Recursively merge two dictionaries.
@@ -48,6 +48,7 @@ def merge_dicts(dict1: dict, dict2: dict):
 
     return return_dict, duplicates
 
+
 def dict_set_recursively(dictionary, key_sequence, val):
     top_key = key_sequence.pop(0)
     if len(key_sequence) == 0:
@@ -84,9 +85,8 @@ def create_dict_from_args(args: list, sep: str = "."):
     return return_dict
 
 
-
 def build_config(args, args_override):
-    ##Open provided config file
+    # Open provided config file
     assert os.path.exists(args.config_path), (
         "Config file not found in " + args.config_path
     )
@@ -106,7 +106,7 @@ def build_config(args, args_override):
     config["submit"] = args.submit
     # config["summit"] = args.summit
     # Distributed
-    #TODO: add distributed flags
+    # TODO: add distributed flags
 
     # if run_mode != "Hyperparameter":
     #
@@ -119,5 +119,4 @@ def build_config(args, args_override):
     #         config["Processing"],
     #     )
 
-
     return config
@@ -1,13 +1,10 @@
-
 import argparse
 from pathlib import Path
 
 
 class Flags:
     def __init__(self):
-        self.parser = argparse.ArgumentParser(
-            description="MatDeepLearn inputs"
-        )
+        self.parser = argparse.ArgumentParser(description="MatDeepLearn inputs")
         self.add_core_args()
 
     def get_parser(self):
@@ -118,12 +115,12 @@ def add_core_args(self):
         # parser.add_argument("--batch_size", default=None, type=int, help="batch size")
         # parser.add_argument("--lr", default=None, type=float, help="learning rate")
 
-        #TODO: add cluster args
+        # TODO: add cluster args
         self.parser.add_argument(
             "--submit", action="store_true", help="Submit job to cluster"
         )
-        #TODO: add checkpoint arg
-        #TODO: timestamp id arg?
+        # TODO: add checkpoint arg
+        # TODO: timestamp id arg?
 
 
 flags = Flags()
@@ -1,79 +1,73 @@
-import torch
 import warnings
 
+import torch
 from torch.utils.data import random_split
 from torch_geometric.loader import DataLoader
 
-from matdeeplearn.preprocessor.transforms import *
-from matdeeplearn.preprocessor.datasets import StructureDataset, LargeStructureDataset
+from matdeeplearn.preprocessor.datasets import LargeStructureDataset, StructureDataset
+from matdeeplearn.preprocessor.transforms import GetY
+
 
 # train test split
 def dataset_split(
     dataset,
     train_size: float = 0.8,
     valid_size: float = 0.05,
     test_size: float = 0.15,
-    seed: int = 1234
+    seed: int = 1234,
 ):
-    '''
+    """
     Splits an input dataset into 3 subsets: train, validation, test.
     Requires train_size + valid_size + test_size = 1
 
     Parameters
     ----------
         dataset: matdeeplearn.preprocessor.datasets.StructureDataset
             a dataset object that contains the target data
-        
+
         train_size: float
             a float between 0.0 and 1.0 that represents the proportion
             of the dataset to use as the training set
 
         valid_size: float
             a float between 0.0 and 1.0 that represents the proportion
             of the dataset to use as the validation set
-        
+
         test_size: float
             a float between 0.0 and 1.0 that represents the proportion
             of the dataset to use as the test set
-    '''
+    """
     if train_size + valid_size + test_size != 1:
         warnings.warn("Invalid sizes detected. Using default split of 80/5/15.")
         train_size, valid_size, test_size = 0.8, 0.05, 0.15
 
     dataset_size = len(dataset)
-    
+
     train_len = int(train_size * dataset_size)
     valid_len = int(valid_size * dataset_size)
     test_len = int(test_size * dataset_size)
     unused_len = dataset_size - train_len - valid_len - test_len
 
-    (
-        train_dataset,
-        val_dataset,
-        test_dataset,
-        unused_dataset
-    ) = random_split(
+    (train_dataset, val_dataset, test_dataset, unused_dataset) = random_split(
         dataset,
         [train_len, valid_len, test_len, unused_len],
-        generator=torch.Generator().manual_seed(seed)
+        generator=torch.Generator().manual_seed(seed),
     )
 
     return train_dataset, val_dataset, test_dataset
 
+
 def get_dataset(
-    data_path, 
-    target_index: int = 0, 
-    transform_type='GetY',
-    large_dataset=False
+    data_path, target_index: int = 0, transform_type="GetY", large_dataset=False
 ):
-    '''
+    """
     get dataset according to data_path
     this assumes that the data has already been processed and
     data.pt file exists in data_path/processed/ folder
 
     Parameters
     ----------
-    
+
     data_path: str
         path to the folder containing data.pt file
 
@@ -85,13 +79,13 @@ def get_dataset(
         the current run/experiment
 
     transform_type: transformation function/class to be applied
-    '''
-    
+    """
+
     # set transform method
-    if transform_type == 'GetY':
+    if transform_type == "GetY":
         T = GetY
     else:
-        raise ValueError('No such transform found for {transform}')
+        raise ValueError("No such transform found for {transform}")
 
     # check if large dataset is needed
     if large_dataset:
@@ -101,37 +95,38 @@ def get_dataset(
 
     transform = T(index=target_index)
 
-    return Dataset(data_path, processed_data_path='', transform=transform)
+    return Dataset(data_path, processed_data_path="", transform=transform)
+
 
 def get_dataloader(
     dataset,
     batch_size: int,
     num_workers: int = 0,
-    sampler = None,
+    sampler=None,
 ):
-    '''
+    """
     Returns a single dataloader for a given dataset
 
     Parameters
     ----------
         dataset: matdeeplearn.preprocessor.datasets.StructureDataset
             a dataset object that contains the target data
-        
+
         batch_size: int
             size of each batch
 
         num_workers: int
             how many subprocesses to use for data loading. 0 means that
             the data will be loaded in the main process.
-    '''
+    """
 
     # load data
     loader = DataLoader(
         dataset,
         batch_size=batch_size,
         shuffle=(sampler is None),
         num_workers=num_workers,
-        sampler=sampler
+        sampler=sampler,
     )
 
-    return loader
+    return loader
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+[settings]`
	`2`	`+known_third_party = ase,numpy,pandas,scipy,setuptools,torch,torch_geometric,torch_scatter,tqdm,yaml`