Skip to content

Commit 19bd557

Browse files
committed
Introduction of a YAML configuration file for CodeEntropy:
- Modularize 'main_mcc.py' to separate configuration and input stages - Functions include: 'load_config", 'setup_argsparse' and 'merge_configs' - Improved error handling by adding specific checks on inputs that are required - Improve error handling to catch and raise specific errors with clear messages - Ensure YAML configuration values are correctly merged with CLI argument - Add detailed comments to explain the purpose and functionality of each section
1 parent 478280f commit 19bd557

2 files changed

Lines changed: 133 additions & 198 deletions

File tree

CodeEntropy/main_mcc.py

Lines changed: 95 additions & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
# import numpy as np
77
import pandas as pd
8+
import yaml
89

910
from CodeEntropy import EntropyFunctions as EF
1011
from CodeEntropy import LevelFunctions as LF
@@ -13,222 +14,118 @@
1314
# from datetime import datetime
1415

1516

17+
def load_config(file_path):
18+
"""Load YAML configuration file."""
19+
with open(file_path, "r") as file:
20+
return yaml.safe_load(file)["arguments"]["CodeEntropy"]
21+
22+
23+
def setup_argparse():
24+
"""Set up argument parser with required arguments."""
25+
parser = argparse.ArgumentParser(
26+
description="Override YAML defaults with CLI arguments"
27+
)
28+
parser.add_argument(
29+
"-f",
30+
"--top_traj_file",
31+
nargs="+",
32+
help="Path to Structure/topology file followed by Trajectory file(s)",
33+
)
34+
parser.add_argument(
35+
"--selection_string", type=str, help="Selection string for CodeEntropy"
36+
)
37+
parser.add_argument(
38+
"--start", type=int, help="Start analysing the trajectory from this frame index"
39+
)
40+
parser.add_argument(
41+
"--end", type=int, help="Stop analysing the trajectory at this frame index"
42+
)
43+
parser.add_argument(
44+
"--step",
45+
type=int,
46+
help="Interval between two consecutive frames to be read index",
47+
)
48+
parser.add_argument(
49+
"--bin_width", type=int, help="Bin width in degrees for making the histogram"
50+
)
51+
parser.add_argument(
52+
"--tempra", type=float, help="Temperature for entropy calculation (K)"
53+
)
54+
parser.add_argument(
55+
"--verbose", type=bool, help="True/False flag for noisy or quiet output"
56+
)
57+
parser.add_argument("--thread", type=int, help="How many multiprocess to use")
58+
parser.add_argument(
59+
"--outfile", help="Name of the file where the output will be written"
60+
)
61+
parser.add_argument(
62+
"--resfile",
63+
help="Name of the file where the residue entropy output will be written",
64+
)
65+
parser.add_argument(
66+
"--mout", help="Name of the file where certain matrices will be written"
67+
)
68+
return parser
69+
70+
71+
def merge_configs(args, config):
72+
"""Merge CLI arguments with YAML configuration."""
73+
for key, value in config.items():
74+
if getattr(args, key) is None:
75+
setattr(
76+
args,
77+
key,
78+
(
79+
value["default"]
80+
if isinstance(value, dict) and "default" in value
81+
else value
82+
),
83+
)
84+
for key, value in vars(args).items():
85+
if value is not None:
86+
config[key] = value
87+
return config
88+
89+
1690
def main():
1791
"""
1892
Main function for calculating the entropy of a system using the multiscale cell
1993
correlation method.
2094
"""
2195

2296
try:
23-
parser = argparse.ArgumentParser(
24-
description="""
25-
CodeEntropy-POSEIDON is a tool to compute entropy using the
26-
multiscale-cell-correlation (MCC) theory and force/torque covariance
27-
methods with the ablity to compute solvent entropy.
28-
Version:
29-
0.3.1;
30-
31-
Authors:
32-
Arghya Chakravorty (arghya90),
33-
Jas Kalayan (jkalayan),
34-
Donald Chang,
35-
Sarah Fegan
36-
Ioana Papa;
37-
38-
Output:
39-
*.csv = results from different calculateion,
40-
*.pkl - Pickled reduced universe for further analysis,
41-
*.out - detailed output such as matrix and spectra"""
42-
)
43-
44-
parser.add_argument(
45-
"-f",
46-
"--top_traj_file",
47-
required=True,
48-
dest="filePath",
49-
action="store",
50-
nargs="+",
51-
help="Path to Structure/topology file (AMBER PRMTOP, GROMACS TPR which "
52-
"contains topology and dihedral information) followed by Trajectory "
53-
"file(s) (AMBER NETCDF or GROMACS TRR) you will need to output the "
54-
"coordinates and forces to the same file. Required.",
55-
)
56-
parser.add_argument(
57-
"-l",
58-
"--selectString",
59-
action="store",
60-
dest="selection_string",
61-
type=str,
62-
default="all",
63-
help="Selection string for CodeEntropy such as protein or resid, refer to "
64-
"MDAnalysis.select_atoms for more information.",
65-
)
66-
parser.add_argument(
67-
"-b",
68-
"--begin",
69-
action="store",
70-
dest="start",
71-
help="Start analysing the trajectory from this frame index. Defaults to 0",
72-
default=0,
73-
type=int,
74-
)
75-
parser.add_argument(
76-
"-e",
77-
"--end",
78-
action="store",
79-
dest="end",
80-
help="Stop analysing the trajectory at this frame index. Defaults to -1 "
81-
"(end of trajectory file)",
82-
default=-1,
83-
type=int,
84-
)
85-
parser.add_argument(
86-
"-d",
87-
"--step",
88-
action="store",
89-
dest="step",
90-
help="interval between two consecutive frames to be read index. "
91-
"Defaults to 1",
92-
default=1,
93-
type=int,
94-
)
95-
parser.add_argument(
96-
"-n",
97-
"--bin_width",
98-
action="store",
99-
dest="bin_width",
100-
default=30,
101-
type=int,
102-
help="Bin width in degrees for making the histogram of the dihedral angles "
103-
"for the conformational entropy. Default: 30",
104-
)
105-
parser.add_argument(
106-
"-k",
107-
"--tempra",
108-
action="store",
109-
dest="temp",
110-
help="Temperature for entropy calculation (K). Default to 298.0 K",
111-
default=298.0,
112-
type=float,
113-
)
114-
parser.add_argument(
115-
"-v",
116-
"--verbose",
117-
action="store",
118-
dest="verbose",
119-
default=False,
120-
type=bool,
121-
help="True/False flag for noisy or quiet output. Default: False",
122-
)
123-
parser.add_argument(
124-
"-t",
125-
"--thread",
126-
action="store",
127-
dest="thread",
128-
help="How many multiprocess to use. Default 1 for single core execution.",
129-
default=1,
130-
type=int,
131-
)
132-
parser.add_argument(
133-
"-o",
134-
"--out",
135-
action="store",
136-
dest="outfile",
137-
default="outfile.out",
138-
help="Name of the file where the output will be written. "
139-
"Default: outfile.out",
140-
)
141-
parser.add_argument(
142-
"-r",
143-
"--resout",
144-
action="store",
145-
dest="resfile",
146-
default="res_outfile.out",
147-
help="Name of the file where the residue entropy output will be written. "
148-
"Default: res_outfile.out",
149-
)
150-
parser.add_argument(
151-
"-m",
152-
"--mout",
153-
action="store",
154-
dest="moutfile",
155-
default=None,
156-
help="Name of the file where certain matrices will be written "
157-
"(default: None).",
158-
)
159-
160-
parser.add_argument(
161-
"-c",
162-
"--cutShell",
163-
action="store",
164-
dest="cutShell",
165-
default=None,
166-
type=float,
167-
help="include cutoff shell analysis, add cutoff distance in angstrom "
168-
"Default None will ust the RAD Algorithm",
169-
)
170-
parser.add_argument(
171-
"-p",
172-
"--pureAtomNum",
173-
action="store",
174-
dest="puteAtomNum",
175-
default=1,
176-
type=int,
177-
help="Reference molecule resid for system of pure liquid. " "Default to 1",
178-
)
179-
parser.add_argument(
180-
"-x",
181-
"--excludedResnames",
182-
dest="excludedResnames",
183-
action="store",
184-
nargs="+",
185-
default=None,
186-
help="exclude a list of molecule names from nearest non-like analysis. "
187-
"Default: None. Multiples are gathered into list.",
188-
)
189-
parser.add_argument(
190-
"-w",
191-
"--water",
192-
dest="waterResnames",
193-
action="store",
194-
default="WAT",
195-
nargs="+",
196-
help="resname for water molecules. "
197-
"Default: WAT. Multiples are gathered into list.",
198-
)
199-
parser.add_argument(
200-
"-s",
201-
"--solvent",
202-
dest="solventResnames",
203-
action="store",
204-
nargs="+",
205-
default=None,
206-
help="include resname of solvent molecules (case-sensitive) "
207-
"Default: None. Multiples are gathered into list.",
208-
)
209-
parser.add_argument(
210-
"--solContact",
211-
action="store_true",
212-
dest="doSolContact",
213-
default=False,
214-
help="Do solute contact calculation",
215-
)
216-
97+
config = load_config("config.yaml")
98+
parser = setup_argparse()
21799
args = parser.parse_args()
100+
config = merge_configs(args, config)
101+
102+
# Check for required arguments
103+
if not config.get("top_traj_file"):
104+
raise ValueError(
105+
"The 'top_traj_file' argument is required but not provided."
106+
)
107+
if not config.get("selection_string"):
108+
raise ValueError(
109+
"The 'selection_string' argument is required but not provided."
110+
)
111+
218112
except argparse.ArgumentError:
219113
print("Command line arguments are ill-defined, please check the arguments")
220114
raise
115+
except ValueError as e:
116+
print(e)
117+
raise
221118

222119
# REPLACE INPUTS
223-
print("printing all input")
120+
print("Printing all input")
224121
for arg in vars(args):
225-
print(" {} {}".format(arg, getattr(args, arg) or ""))
122+
print(" {}: {}".format(arg, getattr(args, arg) or ""))
226123

227124
# startTime = datetime.now()
228125

229126
# Get topology and trajectory file names and make universe
230-
tprfile = args.filePath[0]
231-
trrfile = args.filePath[1:]
127+
tprfile = args.top_traj_file[0]
128+
trrfile = args.top_traj_file[1:]
232129
u = mda.Universe(tprfile, trrfile)
233130

234131
# Define bin_width for histogram from inputs

config.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
3+
arguments:
4+
CodeEntropy:
5+
top_traj_file:
6+
required: True
7+
default: []
8+
selection_string:
9+
type: str
10+
default: "all"
11+
start:
12+
type: int
13+
default: 0
14+
end:
15+
type: int
16+
default: -1
17+
step:
18+
type: int
19+
default: 1
20+
bin_width:
21+
type: int
22+
default: 30
23+
tempra:
24+
type: float
25+
default: 298.0
26+
verbose:
27+
type: bool
28+
default: False
29+
thread:
30+
type: int
31+
default: 1
32+
outfile:
33+
type: str
34+
default: "outfile.out"
35+
resfile:
36+
default: "res_outfile.out"
37+
mout:
38+
default: null

0 commit comments

Comments
 (0)