Skip to content

Commit 10fdb87

Browse files
committed
#22: using pathlib to handle files and directories
1 parent 9c30050 commit 10fdb87

7 files changed

Lines changed: 77 additions & 67 deletions

File tree

docs/source/analyzing_instances.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ provides an *instance loader* tool for importing workflow execution instances
3232
for analysis. For instance, the code snippet below shows how an instance can
3333
be loaded using the :class:`~wfcommons.wfinstances.instance.Instance` class: ::
3434

35+
import pathlib
3536
from wfcommons import Instance
36-
instance = Instance(input_instance='/path/to/instance/file.json')
37+
input_instance = pathlib.Path('/path/to/instance/file.json')
38+
instance = Instance(input_instance=input_instance)
3739

3840
The :class:`~wfcommons.wfinstances.instance.Instance` class provides a number of
3941
methods for interacting with the workflow instance, including:
@@ -115,20 +117,19 @@ distribution fitting for task *prefixes* of the Seismology workflow
115117
plots (runtime, and input and output files) into the :code:`fits` folder using
116118
:code:`seismology` as a prefix for each generated plot: ::
117119

120+
import pathlib
118121
from wfcommons import Instance, InstanceAnalyzer
119-
from os import listdir
120-
from os.path import isfile, join
121122

122123
# obtaining list of instance files in the folder
123-
INSTANCES_PATH = "/path/to/some/instance/folder/"
124-
instance_files = [f for f in listdir(INSTANCES_PATH) if isfile(join(INSTANCES_PATH, f))]
124+
INSTANCES_PATH = pathlib.Path('/path/to/some/instance/folder/')
125+
instance_files = [f for f in INSTANCES_PATH.glob('*') if INSTANCES_PATH.joinpath(f).is_file()]
125126

126127
# creating the instance analyzer object
127128
analyzer = InstanceAnalyzer()
128129

129130
# appending instance files to the instance analyzer
130131
for instance_file in instance_files:
131-
instance = Instance(input_instance=INSTANCES_PATH + instance_file)
132+
instance = Instance(input_instance=INSTANCES_PATH.joinpath(instance_file))
132133
analyzer.append_instance(instance)
133134

134135
# list of workflow task name prefixes to be analyzed in each instance

docs/source/generating_workflows.rst

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,48 +86,52 @@ The following example generates a *Seismology* synthetic workflow instance
8686
os 300 tasks, builds a synthetic workflow instance, and writes the
8787
synthetic instance to a JSON file. ::
8888

89+
import pathlib
8990
from wfcommons.wfchef.recipes import SeismologyRecipe
9091
from wfcommons import WorkflowGenerator
9192

92-
generator = WorkflowGenerator(SeismologyRecipe.from_num_tasks(250))
93+
generator = WorkflowGenerator(SeismologyRecipe.from_num_tasks(250))
9394
workflow = generator.build_workflow()
94-
workflow.write_json(f'seismology-workflow.json')
95+
workflow.write_json(pathlib.Path('seismology-workflow.json'))
9596

9697

9798
The example below generates a number of 10 *Blast* synthetic
9899
workflow instances for every size defined in the array :code:`num_tasks`: ::
99100

101+
import pathlib
100102
from wfcommons.wfchef.recipes import BlastRecipe
101103
from wfcommons import WorkflowGenerator
102104

103105
num_tasks = [100, 250, 370, 800]
104-
106+
105107
for task in num_tasks:
106-
generator = WorkflowGenerator(BlastRecipe.from_num_tasks(task))
107-
workflows = generator.build_workflows(10)
108-
109-
for i, workflow in enumerate(workflows):
110-
workflow.write_json(f'blast-workflow-{task}-{i}.json')
108+
generator = WorkflowGenerator(BlastRecipe.from_num_tasks(task))
109+
workflows = generator.build_workflows(10)
110+
111+
for i, workflow in enumerate(workflows):
112+
workflow.write_json(pathlib.Path(f'blast-workflow-{task}-{i}.json'))
113+
111114

112115
The following example generates 10 *Epigenomics* synthetic workflow instances
113116
based on the number of tasks entered by the user (1000), builds the synthetic
114117
workflow instances, and writes the synthetic instances to JSON files. ::
115118

119+
import pathlib
116120
from wfcommons.wfchef.recipes import EpigenomicsRecipe
117121
from wfcommons import WorkflowGenerator
118122

119123
generator = WorkflowGenerator(EpigenomicsRecipe.from_num_tasks(1000))
120124
for i, workflow in enumerate(generator.build_workflows(10)):
121-
workflow.write_json(f'epigenomics-workflow-{i}.json')
125+
workflow.write_json(pathlib.Path(f'epigenomics-workflow-{i}.json'))
122126

123127
The example below generates a *Cycles* (agroecosystem) synthetic workflow instance
124128
based on the number of tasks entered by the user (250), builds the synthetic workflow
125129
instance, and writes the synthetic instance to a JSON file. ::
126130

131+
import pathlib
127132
from wfcommons.wfchef.recipes import CyclesRecipe
128133
from wfcommons import WorkflowGenerator
129134

130135
generator = WorkflowGenerator(CyclesRecipe.from_num_tasks(250))
131136
workflow = generator.build_workflow()
132-
workflow.write_json(f'cycles-workflow.json')
133-
137+
workflow.write_json(pathlib.Path('cycles-workflow.json'))

docs/source/parsing_logs.rst

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,21 @@ executions. The following example shows the analysis of Makeflow execution logs,
3232
stored in a local folder (:code:`execution_dir`), for a workflow execution using the
3333
:class:`~wfcommons.wfinstances.logs.makeflow.MakeflowLogsParser` class: ::
3434

35+
import pathlib
3536
from wfcommons.wfinstances import MakeflowLogsParser
3637

3738
# creating the parser for the Makeflow workflow
38-
parser = MakeflowLogsParser(execution_dir='/path/to/makeflow/execution/dir/blast/chameleon-small-001/',
39-
resource_monitor_logs_dir='/path/to/makeflow/resource/monitor/logs/dir')
39+
execution_dir = pathlib.Path('/path/to/makeflow/execution/dir/blast/chameleon-small-001/')
40+
resource_monitor_logs_dir = pathlib.Path('/path/to/makeflow/resource/monitor/logs/dir')
41+
parser = MakeflowLogsParser(execution_dir=execution_dir,
42+
resource_monitor_logs_dir=resource_monitor_logs_dir)
4043

4144
# generating the workflow instance object
4245
workflow = parser.build_workflow('makeflow-workflow-test')
4346

4447
# writing the workflow instance to a JSON file
45-
workflow.write_json('makeflow-workflow.json')
48+
workflow_path = pathlib.Path('./makeflow-workflow.json')
49+
workflow.write_json(workflow_path)
4650

4751
.. note::
4852
The :class:`~wfcommons.wfinstances.logs.makeflow.MakeflowLogsParser` class requires
@@ -63,16 +67,19 @@ based on the dataflow programming model. The following example shows the analysi
6367
Nextflow execution logs, stored in a local folder (:code:`execution_dir`), for a workflow
6468
execution using the :class:`~wfcommons.wfinstances.logs.nextflow.NextflowLogsParser` class: ::
6569

70+
import pathlib
6671
from wfcommons.wfinstances import NextflowLogsParser
6772

6873
# creating the parser for the Nextflow workflow
69-
parser = NextflowLogsParser(execution_dir='/path/to/nextflow/execution/dir/')
74+
execution_dir = pathlib.Path('/path/to/nextflow/execution/dir/')
75+
parser = NextflowLogsParser(execution_dir=execution_dir)
7076

7177
# generating the workflow instance object
7278
workflow = parser.build_workflow('nextflow-workflow-test')
7379

7480
# writing the workflow instance to a JSON file
75-
workflow.write_json('nextflow-workflow.json')
81+
workflow_path = pathlib.Path('./nextflow-workflow.json')
82+
workflow.write_json(workflow_path)
7683

7784
.. note::
7885
The :class:`~wfcommons.wfinstances.logs.nextflow.NextflowLogsParser` class assumes
@@ -95,16 +102,19 @@ the analysis of Pegasus execution logs, stored in a local folder (:code:`submit_
95102
workflow execution using the :class:`~wfcommons.wfinstances.logs.pegasus.PegasusLogsParser`
96103
class: ::
97104

105+
import pathlib
98106
from wfcommons.wfinstances import PegasusLogsParser
99107

100108
# creating the parser for the Pegasus workflow
101-
parser = PegasusLogsParser(submit_dir='/path/to/pegasus/submit/dir/seismology/chameleon-100p-001/')
109+
submit_dir = pathlib.Path('/path/to/pegasus/submit/dir/seismology/chameleon-100p-001/')
110+
parser = PegasusLogsParser(submit_dir=submit_dir)
102111

103112
# generating the workflow instance object
104113
workflow = parser.build_workflow('pegasus-workflow-test')
105114

106115
# writing the workflow instance to a JSON file
107-
workflow.write_json('pegasus-workflow.json')
116+
workflow_path = pathlib.Path('./pegasus-workflow.json')
117+
workflow.write_json(workflow_path)
108118

109119
.. warning::
110120
By default, the :class:`~wfcommons.wfinstances.logs.pegasus.PegasusLogsParser`

wfcommons/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
import math
14+
import pathlib
1415
import scipy.stats
1516
import warnings
1617
import numpy as np
@@ -27,7 +28,7 @@ def __repr__(self):
2728
return '<%s.%s>' % (self.__class__.__name__, self.name)
2829

2930

30-
def read_json(instance_filename: str) -> Dict[str, Any]:
31+
def read_json(instance_filename: pathlib.Path) -> Dict[str, Any]:
3132
"""
3233
Read the JSON from the file path.
3334

wfcommons/wfchef/chef.py

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
# the Free Software Foundation, either version 3 of the License, or
99
# (at your option) any later version.
1010

11-
import pathlib
12-
import argparse
13-
from email.mime import base
1411
import json
15-
import traceback
16-
from typing import Dict, Optional, Union
1712
import argparse
13+
import math
1814
import networkx as nx
1915
import numpy as np
2016
import pandas as pd
@@ -23,29 +19,29 @@
2319
import pkg_resources
2420
import subprocess
2521
import traceback
22+
2623
from typing import Dict, Optional, Union
2724
from stringcase import capitalcase
25+
2826
from .duplicate import duplicate, NoMicrostructuresError
2927
from .find_microstructures import save_microstructures
3028
from .utils import create_graph
31-
from ..wfgen.abstract_recipe import WorkflowRecipe
3229
from ..wfinstances.instance import Instance
3330
from ..wfinstances.instance_analyzer import InstanceAnalyzer
34-
import math
3531

3632
this_dir = pathlib.Path(__file__).resolve().parent
3733
skeleton_path = this_dir.joinpath("skeletons")
3834

3935

40-
def compare_rmse(synth_graph: nx.DiGraph, real_graph: nx.DiGraph):
36+
def compare_rmse(synth_graph: nx.DiGraph, real_graph: nx.DiGraph) -> float:
4137
"""
42-
Calculates the Root Mean Square Error of a synthetic instance created
38+
Calculate the Root Mean Square Error of a synthetic instance created
4339
based on the correspondent (in number of tasks) real-world sample.
4440
4541
:param synth_graph: a synthetic instance created by WfCommons.
46-
:type synth_graph: networkX DiGraph
42+
:type synth_graph: networkX.DiGraph
4743
:param real_graph: the correspondent (in number of tasks) real-world workflow instance.
48-
:type real_graph: networkX DiGraph
44+
:type real_graph: networkX.DiGraph
4945
5046
:return: The RMSE between the synthetic instance and the real instance.
5147
:rtype: float
@@ -72,10 +68,10 @@ def compare_rmse(synth_graph: nx.DiGraph, real_graph: nx.DiGraph):
7268
return mse / real_graph.order()
7369

7470

75-
def find_err(workflow: Union[str, pathlib.Path],
76-
err_savepath: Optional[Union[str, pathlib.Path]] = None,
77-
always_update: bool = False,
78-
runs: int = 1) -> pd.DataFrame:
71+
def find_err(workflow: pathlib.Path,
72+
err_savepath: Optional[pathlib.Path] = None,
73+
always_update: Optional[bool] = False,
74+
runs: Optional[int] = 1) -> pd.DataFrame:
7975
"""
8076
Creates a dataframe with the Root Mean Square Error of the synthetic instances created
8177
based on the correspondent, w.r.t. number of tasks, real-world samples available at
@@ -84,13 +80,13 @@ def find_err(workflow: Union[str, pathlib.Path],
8480
repositories.
8581
8682
:param workflow: name (for samples available in WfCommons) or path to the real workflow instances.
87-
:type workflow: str or pathlib.Path
83+
:type workflow: pathlib.Path
8884
:param err_savepath: path to save the err (rmse) of all instances available into a csv.
89-
:type real_graph: str or pathlib.Path
85+
:type real_graph: Optional[pathlib.Path]
9086
:param always_update: flag to set if the err needs to be updated or not (True: if new instances are added, False: otherwise).
91-
:type real_graph: bool
87+
:type real_graph: Optional[bool]
9288
:param runs: number of times to repeat the err calculation process (due to randomization).
93-
:type runs: bool
89+
:type runs: Optional[bool]
9490
9591
:return: dataframe with RMSE of all available instances.
9692
:rtype: pd.DataFrame
@@ -100,7 +96,6 @@ def find_err(workflow: Union[str, pathlib.Path],
10096
key=lambda name: summary["base_graphs"][name]["order"])
10197

10298
if err_savepath:
103-
err_savepath = pathlib.Path(err_savepath)
10499
err_savepath.parent.mkdir(exist_ok=True, parents=True)
105100

106101
labels = [graph for graph in sorted_graphs]
@@ -147,17 +142,11 @@ def analyzer_summary(path_to_instances: pathlib.Path) -> Dict:
147142
and from Makeflow WMS GitHub <https://github.com/wfcommons/makeflow-instances>`_
148143
repositories.
149144
150-
:param workflow: name (for samples available in WfCommons) or path to the real workflow instances.
151-
:type workflow: str or pathlib.Path
152-
:param err_savepath: path to save the err (rmse) of all instances available into a csv.
153-
:type real_graph: str or pathlib.Path
154-
:param always_update: flag to set if the err needs to be updated or not (True: if new instances are added, False: otherwise).
155-
:type real_graph: bool
156-
:param runs: number of times to repeat the err calculation process (due to randomization).
157-
:type runs:bool
145+
:param path_to_instances:
146+
:type path_to_instances: pathlib.Path
158147
159-
:return: dataframe with RMSE of all available instances.
160-
:rtype: pd.DataFrame
148+
:return:
149+
:rtype: Dict
161150
"""
162151
analyzer = InstanceAnalyzer()
163152
task_types = set()
@@ -173,12 +162,14 @@ def analyzer_summary(path_to_instances: pathlib.Path) -> Dict:
173162

174163
return stats_dict
175164

165+
176166
def get_recipe(recipe: str) -> "Module":
177167
for entry_point in pkg_resources.iter_entry_points('workflow_recipes'):
178-
att = entry_point.attrs[0]
168+
att = entry_point.attrs[0]
179169
if att == recipe:
180170
return entry_point.load()
181171

172+
182173
def get_recipes() -> pd.DataFrame:
183174
rows = []
184175
for entry_point in pkg_resources.iter_entry_points('workflow_recipes'):
@@ -191,6 +182,7 @@ def get_recipes() -> pd.DataFrame:
191182
print(f"Could not load {entry_point.module_name}")
192183
return pd.DataFrame(rows, columns=["name", "module", "import command"])
193184

185+
194186
def ls_recipe():
195187
"""
196188
Inspired by UNIX `ls` command, it lists the recipes already installed into the system and
@@ -219,7 +211,7 @@ def create_recipe(path_to_instances: Union[str, pathlib.Path],
219211
wf_name: str,
220212
cutoff: int = 4000,
221213
verbose: bool = False,
222-
runs: int = 1) -> "WorkflowRecipe":
214+
runs: int = 1):
223215
"""
224216
Creates a recipe for a workflow application by automatically replacing custom information
225217
from the recipe skeleton.
@@ -253,7 +245,7 @@ def create_recipe(path_to_instances: Union[str, pathlib.Path],
253245
err_savepath.parent.mkdir(exist_ok=True, parents=True)
254246
df = find_err(microstructures_path, runs=runs)
255247
err_savepath.write_text(df.to_csv())
256-
248+
257249
# Recipe
258250
with skeleton_path.joinpath("recipe.py").open() as fp:
259251
skeleton_str = fp.read()

wfcommons/wfchef/duplicate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
# the Free Software Foundation, either version 3 of the License, or
99
# (at your option) any later version.
1010

11-
import pathlib
1211
import json
13-
import pickle
1412
import networkx as nx
15-
from typing import Set, List, Union, Dict
16-
from uuid import uuid4
1713
import numpy as np
14+
import pathlib
15+
import pickle
1816
import random
1917

18+
from typing import Set, List, Union, Dict
19+
from uuid import uuid4
20+
2021
this_dir = pathlib.Path(__file__).resolve().parent
2122

2223

@@ -77,15 +78,14 @@ def duplicate(path: pathlib.Path,
7778
:rtype: networkX DiGraph.
7879
"""
7980
summary = json.loads(path.joinpath("summary.json").read_text())
80-
81+
8182
if base:
8283
base_path = pathlib.Path(base)
8384
if not base_path.is_absolute():
8485
base_path = path.joinpath(base_path)
8586
else:
8687
base_path = path.joinpath(min(summary["base_graphs"].keys(), key=lambda k: summary["base_graphs"][k]["order"]))
87-
88-
88+
8989
graph = pickle.loads(base_path.joinpath("base_graph.pickle").read_bytes())
9090
if num_nodes < graph.order():
9191
raise ValueError(

0 commit comments

Comments
 (0)