Skip to content

Commit ec6e892

Browse files
committed
code cleanup
1 parent ec2007d commit ec6e892

4 files changed

Lines changed: 74 additions & 66 deletions

File tree

wfcommons/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
# the Free Software Foundation, either version 3 of the License, or
99
# (at your option) any later version.
1010

11-
1211
from .version import __version__
1312

1413
__author__ = 'WfCommons Team - https://wfcommons.org'
1514
__credits__ = 'University of Southern California, University of Hawaii at Manoa'
1615

1716
import logging
1817

18+
from .wfchef.recipes import BlastRecipe, BwaRecipe, CyclesRecipe, EpigenomicsRecipe, GenomeRecipe, MontageRecipe, \
19+
SeismologyRecipe, SoykbRecipe, SrasearchRecipe
1920
from .wfgen import WorkflowGenerator
2021
from .trace import Trace, TraceAnalyzer, TraceElement
2122

wfcommons/wfchef/duplicate.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import pathlib
1212
import json
13-
import pickle
13+
import pickle
1414
import networkx as nx
1515
from typing import Set, Optional, List, Union, Dict
1616
from uuid import uuid4
@@ -19,16 +19,17 @@
1919
from .utils import draw
2020
import random
2121
import argparse
22-
import pandas as pd
22+
import pandas as pd
2323
from functools import partial
2424

2525
this_dir = pathlib.Path(__file__).resolve().parent
2626

27+
2728
class NoMicrostructuresError(Exception):
28-
pass
29+
pass
30+
2931

3032
def duplicate_nodes(graph: nx.DiGraph, nodes: Set[str]) -> Dict:
31-
3233
"""
3334
Replicates nodes of a graph.
3435
@@ -46,7 +47,7 @@ def duplicate_nodes(graph: nx.DiGraph, nodes: Set[str]) -> Dict:
4647
graph.add_node(new_node, **graph.nodes[node])
4748
nx.set_node_attributes(graph, {new_node: node}, "duplicate_of")
4849
new_nodes[node] = new_node
49-
50+
5051
for node, new_node in new_nodes.items():
5152
for parent, _ in graph.in_edges(node):
5253
if parent in new_nodes:
@@ -59,11 +60,12 @@ def duplicate_nodes(graph: nx.DiGraph, nodes: Set[str]) -> Dict:
5960
graph.add_edge(new_node, new_nodes[child])
6061
else:
6162
graph.add_edge(new_node, child)
62-
63+
6364
return new_nodes
6465

65-
def duplicate(path: pathlib.Path,
66-
base: Union[str, pathlib.Path],
66+
67+
def duplicate(path: pathlib.Path,
68+
base: Union[str, pathlib.Path],
6769
num_nodes: int) -> nx.DiGraph:
6870
"""
6971
Attaches replicated nodes to base graph.
@@ -90,29 +92,29 @@ def duplicate(path: pathlib.Path,
9092

9193
graph = pickle.loads(base_path.joinpath("base_graph.pickle").read_bytes())
9294
if num_nodes < graph.order():
93-
raise ValueError(f"Cannot create synthentic graph with {num_nodes} nodes from base graph with {graph.order()} nodes")
95+
raise ValueError(
96+
f"Cannot create synthentic graph with {num_nodes} nodes from base graph with {graph.order()} nodes")
9497

9598
all_microstructures = json.loads(base_path.joinpath("microstructures.json").read_text())
9699
microstructures, freqs = map(list, zip(*[(ms, ms["frequency"]) for ms_hash, ms in all_microstructures.items()]))
97-
100+
98101
p: List[float] = (np.array(freqs) / np.sum(freqs)).tolist()
99102
while graph.order() < num_nodes and microstructures:
100103
i = random.choice(range(len(microstructures)))
101104
ms = microstructures[i]
102105
while ms["nodes"]:
103106
j = random.choice(range(len(ms["nodes"])))
104-
structure = ms["nodes"][j]
107+
structure = ms["nodes"][j]
105108
if graph.order() + len(structure) > num_nodes:
106109
del ms["nodes"][j]
107110
else:
108111
break
109-
110-
if not ms["nodes"]: # delete microstructure
112+
113+
if not ms["nodes"]: # delete microstructure
111114
del microstructures[i]
112115
del p[i]
113116
continue
114-
117+
115118
duplicate_nodes(graph, structure)
116119

117120
return graph
118-

wfcommons/wfchef/find_microstructures.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@
1616
import json
1717
from itertools import product
1818
from networkx.readwrite import read_gpickle, write_gpickle
19-
import numpy as np
19+
import numpy as np
2020
from itertools import chain, combinations
2121
import argparse
2222
from .utils import create_graph, string_hash, type_hash, combine_hashes, annotate, draw
23-
import math
23+
import math
2424

2525
this_dir = pathlib.Path(__file__).resolve().parent
2626

27+
2728
def comb(n: int, k: int) -> int:
2829
"""
2930
Calculates the combination of two integers.
@@ -38,11 +39,12 @@ def comb(n: int, k: int) -> int:
3839
"""
3940
return math.factorial(n) / (math.factorial(k) * math.factorial(n - k))
4041

42+
4143
class ImbalancedMicrostructureError(Exception):
42-
pass
44+
pass
45+
4346

4447
def get_children(graph: nx.DiGraph, node: str) -> List[str]:
45-
4648
"""
4749
Gets the children of a node.
4850
@@ -55,9 +57,9 @@ def get_children(graph: nx.DiGraph, node: str) -> List[str]:
5557
:rtype: List[str].
5658
"""
5759
return [child for _, child in graph.out_edges(node)]
58-
60+
61+
5962
def get_parents(graph: nx.DiGraph, node: str) -> List[str]:
60-
6163
"""
6264
Gets the parents of a node.
6365
@@ -71,8 +73,8 @@ def get_parents(graph: nx.DiGraph, node: str) -> List[str]:
7173
"""
7274
return [parent for parent, _ in graph.in_edges(node)]
7375

76+
7477
def get_relatives(graph: nx.DiGraph, node: str) -> Set[str]:
75-
7678
"""
7779
Gets all node's relatives (children and parents).
7880
@@ -86,6 +88,7 @@ def get_relatives(graph: nx.DiGraph, node: str) -> Set[str]:
8688
"""
8789
return set(chain(get_children(graph, node), get_parents(graph, node)))
8890

91+
8992
def find_microstructure(graph: nx.DiGraph, n1: str, n2: str):
9093
"""
9194
Detects a pattern (microstructure).
@@ -121,7 +124,6 @@ def find_microstructure(graph: nx.DiGraph, n1: str, n2: str):
121124
common_friends.update(n1_new_friends.intersection(n2_new_friends))
122125
all_friends.update(n1_new_friends.union(n2_new_friends))
123126

124-
125127
n1_new_friends -= common_friends
126128
n2_new_friends -= common_friends
127129

@@ -130,6 +132,7 @@ def find_microstructure(graph: nx.DiGraph, n1: str, n2: str):
130132

131133
return n1_friends, n2_friends, common_friends, all_friends
132134

135+
133136
def find_microstructures(graph: nx.DiGraph, verbose: bool = False):
134137
"""
135138
Detects the patterns (microstructures) that are used for replication and graph expansion.
@@ -142,7 +145,7 @@ def find_microstructures(graph: nx.DiGraph, verbose: bool = False):
142145
:return: patterns (microstructures)
143146
:rtype: Set[str].
144147
"""
145-
148+
146149
if verbose:
147150
print("Sorting nodes by type hash and parent")
148151
nodes_by_type_hash: Dict[str, Set[str]] = {}
@@ -174,9 +177,9 @@ def find_microstructures(graph: nx.DiGraph, verbose: bool = False):
174177

175178
return microstructures
176179

180+
177181
def sort_graphs(workflow_path: Union[pathlib.Path],
178182
verbose: bool = False) -> List[nx.DiGraph]:
179-
180183
"""
181184
Sort graphs in crescent order of number of tasks.
182185
@@ -197,7 +200,7 @@ def sort_graphs(workflow_path: Union[pathlib.Path],
197200
annotate(graph)
198201
graph.graph["name"] = path.stem
199202
graphs.append(graph)
200-
203+
201204
if not graphs:
202205
raise ValueError(f"No graphs found in {workflow_path}")
203206

@@ -207,22 +210,23 @@ def sort_graphs(workflow_path: Union[pathlib.Path],
207210
sorted_graphs = sorted(graphs, key=lambda graph: len(graph.nodes))
208211
return sorted_graphs
209212

210-
def save_microstructures(workflow_path: Union[pathlib.Path],
211-
savedir: pathlib.Path,
212-
verbose: bool = False,
213+
214+
def save_microstructures(workflow_path: Union[pathlib.Path],
215+
savedir: pathlib.Path,
216+
verbose: bool = False,
213217
img_type: Optional[str] = 'png',
214218
cutoff: int = 4000,
215219
highlight_all_instances: bool = False) -> List[nx.DiGraph]:
216220
summary = {
217221
"frequencies": {},
218222
"base_graphs": {}
219223
}
220-
224+
221225
for graph in sort_graphs(workflow_path, verbose):
222226
if graph.order() > cutoff:
223227
print(f'This and the next workflows have more than {cutoff} tasks')
224228
break
225-
229+
226230
if verbose:
227231
print(f"Running for {graph.name}")
228232
g_savedir = savedir.joinpath(graph.name)
@@ -239,7 +243,7 @@ def save_microstructures(workflow_path: Union[pathlib.Path],
239243
base_graph_image_path = g_savedir.joinpath(f"base_graph")
240244
if verbose:
241245
print(f"Drawing base graph to {base_graph_image_path}")
242-
draw(graph, close=True, legend=False, extension= img_type, save=str(base_graph_image_path))
246+
draw(graph, close=True, legend=False, extension=img_type, save=str(base_graph_image_path))
243247

244248
if verbose:
245249
print("Finding microstructures")
@@ -259,18 +263,17 @@ def save_microstructures(workflow_path: Union[pathlib.Path],
259263
if img_type:
260264
print(f"Drawing {ms_name}")
261265
draw(
262-
graph,
266+
graph,
263267
subgraph=list(instances)[0] if not highlight_all_instances else set.union(*instances),
264-
with_labels=False,
268+
with_labels=False,
265269
extension=img_type,
266-
save=str(g_savedir.joinpath(ms_name)),
270+
save=str(g_savedir.joinpath(ms_name)),
267271
close=True
268272
)
269-
273+
270274
if verbose:
271275
print()
272-
273-
g_savedir.joinpath("microstructures").with_suffix(".json").write_text(json.dumps(mdatas, indent=2))
274276

275-
savedir.joinpath("summary").with_suffix(".json").write_text(json.dumps(summary, indent=2))
276-
277+
g_savedir.joinpath("microstructures").with_suffix(".json").write_text(json.dumps(mdatas, indent=2))
278+
279+
savedir.joinpath("summary").with_suffix(".json").write_text(json.dumps(summary, indent=2))

0 commit comments

Comments
 (0)