MatDeepLearn_dev/configs/config_templates/cgcnn_han_hmof_5k.yml at 610f4652f20afd2c7b74a441cec89f4abdb1acff · Fung-Lab/MatDeepLearn_dev · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
trainer: property

task:
  # run_mode: train
  identifier: "cgcnn_han_vn_post_hmof"
  reprocess: False
  run_id: ""
  parallel: True
  device: "cuda:0"
  seed: 0
  # seed=0 means random initalization
  write_output: True
  parallel: True
  # Training print out frequency (print per n number of epochs)
  verbosity: 1
  wandb:
    use_wandb: True
    wandb_entity: "fung-lab"
    wandb_project: "cgcnn_vn_new"
    notes: ""
    tags: ["test"]
    track_params:
      - "model.hyperparams.mp_pattern"
      - "model.hyperparams.pool"
      - "model.hyperparams.virtual_pool"
      - "model.hyperparams.gc_count"
      - "model.hyperparams.attn_heads"
      - "optim.lr"
      - "optim.batch_size"
      - "optim.max_epochs"
      - "dataset.preprocess_params.num_offsets"
      - "dataset.preprocess_params.edge_calc_method"
      - "dataset.preprocess_params.all_neighbors"
      - "dataset.preprocess_params.edge_steps"
      - "dataset.preprocess_params.use_degree"
    log_artifacts:
      # - "/nethome/sbaskaran31/projects/Sidharth/MatDeepLearn_dev/matdeeplearn/models/cgcnn_vn.py"
      # - "/storage/home/hcoda1/9/sbaskaran31/p-vfung3-0/MatDeepLearn_dev/matdeeplearn/models/cgcnn_vn.py"
      - "/global/cfs/projectdirs/m3641/Sidharth/MatDeepLearn_dev/matdeeplearn/models/cgcnn_vn.py"
    metadata:
      architecture: "CGCNN_VN"
      cluster: "fung-cluster"
      dataset: "hMOF-5K"
    sweep:
      parallel: True
      do_sweep: False # ignore rest of config if False
      system: "phoenix_slurm" # one of "local", "phoenix_slurm"
      job_config: "/nethome/sbaskaran31/projects/Sidharth/MatDeepLearn_dev/configs/jobs/phoenix_slurm.yml"
      count: 3
      sweep_file: "/nethome/sbaskaran31/projects/Sidharth/MatDeepLearn_dev/configs/sweeps/cgcnn_vn_sweep_d1.yml"
      # sweep_file: "/nethome/sbaskaran31/projects/Sidharth/MatDeepLearn_dev/configs/cgcnn_vn_sweep.yml"

model:
  name: CGCNN_HAN_VN
  load_model: False
  save_model: True
  model_path: "cgcnn_han_vn.pth"
  # model hyperparams
  hyperparams:
    edge_steps: 25
    self_loop: True
    dim1: 100
    dim2: 150
    atomic_intermediate_layer_resolution: 0
    pre_fc_count: 1
    gc_count: 4
    post_fc_count: 3
    attn_heads: 6
    pool: "global_mean_pool" # pooling reduction scheme
    virtual_pool:
      virtual_pool_name: "RealVirtualAttention" # pooling method
      args:
        embed_dim: 100
        attn_size: 128
    mp_pattern: ["rr", "rv"]
    pool_order: "early"
    batch_norm: True
    batch_track_stats: True
    act_fn: "relu"
    act_nn: "ReLU"
    dropout_rate: 0.0

optim:
  max_epochs: 250
  lr: 0.002
  loss:
    loss_type: "TorchLossWrapper"
    loss_args: {"loss_fn": "l1_loss"}
  batch_size: 64
  optimizer:
    optimizer_type: "AdamW"
    optimizer_args: {}
  scheduler:
    scheduler_type: "ReduceLROnPlateau"
    scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}

dataset:
  processed: False # if False, need to preprocessor data and generate .pt file
  force_preprocess: False
  num_examples: 0 # set to 0 when using full dataset, else will take the first "num_examples" examples
  # Path to data files
  # src: "/nethome/sbaskaran31/projects/Sidharth/hMOF/raw_5k/data.json"
  # src: "/storage/home/hcoda1/9/sbaskaran31/p-vfung3-0/hMOF/raw_5k/raw_5k/data.json"
  src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/hMOF/raw_5k/data.json"
  target_path: ""
  # pt_path: "/nethome/sbaskaran31/projects/Sidharth/hMOF/raw_5k/ocp"
  # pt_path: "/storage/home/hcoda1/9/sbaskaran31/p-vfung3-0/hMOF/raw_5k/raw_5k/ocp"
  pt_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/hMOF/raw_5k/ocp"
  # transforms
  transforms:
    - name: GetY
      args:
        index: 5 # methane adsorption uptake
      otf: False
    - name: VirtualNodeGeneration
      args:
        virtual_box_increment: 3
        method: "ase"
      otf: False
    - name: VirtualEdgeGeneration
      args:
        attrs: ["rr", "rv"]
        rr_cutoff: 12
        rv_cutoff: 12
        vr_cutoff: 5.0
        vv_cutoff: 5.0
      otf: False
      batch: True
  # use for passing into global config
  # one of MDL, ASE, OCP
  use_sweep_params: False
  apply_pre_transform_processing: False
  # use again for passing into global config
  data_format: "json"
  node_representation: "onehot"
  additional_attributes: []
  # Print out processing info
  verbose: True
  # graph specific settings: preprocessing hyperparams
  preprocess_params:
    cutoff_radius : 5.0
    n_neighbors : 250
    process_batch_size : 50
    edge_calc_method: "ocp"
    num_offsets: 1
    edge_steps : 25
    all_neighbors: True
    use_degree: False
  # Ratios for train/val/test split out of a total of 1
  train_ratio: 0.8
  val_ratio: 0.05
  test_ratio: 0.15