Skip to content

Commit fe10083

Browse files
committed
Adds the pathway organizer back to the project
1 parent 209179f commit fe10083

12 files changed

Lines changed: 468 additions & 2 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.coverage
22
.env/
3+
.pypi-env/
34
.idea/
45
__pycache__/
56
src/kegg_pull.egg-info/

dev/test_main.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import kegg_pull.rest_cli as r_cli
1010
import kegg_pull.pull_cli as p_cli
1111
import kegg_pull.map_cli as map_cli
12+
import kegg_pull.pathway_organizer_cli as po_cli
1213
import dev.utils as u
1314

1415

@@ -19,7 +20,7 @@ def test_help(mocker):
1920
delimiter: str = '-'*80
2021
expected_print_call_args = [
2122
(m.__doc__,), (delimiter,), (p_cli.__doc__,), (delimiter,), (ei_cli.__doc__,), (delimiter,), (map_cli.__doc__,),
22-
(delimiter,), (r_cli.__doc__,)]
23+
(delimiter,), (po_cli.__doc__,), (delimiter,), (r_cli.__doc__,)]
2324
u.assert_call_args(function_mock=print_mock, expected_call_args_list=expected_print_call_args, do_kwargs=False)
2425
for help_arg in (['--help'], ['-h'], []):
2526
help_args = ['kegg_pull']
@@ -198,3 +199,10 @@ def test_map(mocker, print_output: bool, args: list, stdin_mock_str: str, expect
198199
json_output=True)
199200
if stdin_mock:
200201
stdin_mock.assert_called_once_with()
202+
203+
204+
def test_pathway_organizer(mocker, print_output: bool):
205+
args = ['kegg_pull', 'pathway-organizer', '--tln=Metabolism', '--fn=Global and overview maps']
206+
_test_output(
207+
mocker=mocker, args=args, expected_output='dev/test_data/pathway-organizer/metabolic-pathways.json',
208+
print_output=print_output, json_output=True)

dev/test_pathway_organizer.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# noinspection PyPackageRequirements
2+
import pytest as pt
3+
import json
4+
import typing as t
5+
import kegg_pull.pathway_organizer as po
6+
import dev.utils as u
7+
8+
9+
def test_load_from_kegg_warning(mocker, caplog):
10+
get_mock: mocker.MagicMock = _get_get_mock(mocker=mocker)
11+
parse_hierarchy_spy: mocker.MagicMock = mocker.spy(po.PathwayOrganizer, '_parse_hierarchy')
12+
pathway_org: po.PathwayOrganizer = po.PathwayOrganizer.load_from_kegg(top_level_nodes={'invalid-top-level-node'})
13+
get_mock.assert_called_once_with(entry_ids=['br:br08901'], entry_field='json')
14+
u.assert_warning(
15+
message='Top level node name "invalid-top-level-node" is not recognized and will be ignored. Valid values are: "Cellular '
16+
'Processes, Drug Development, Environmental Information Processing, Genetic Information Processing, '
17+
'Human Diseases, Metabolism, Organismal Systems"', caplog=caplog)
18+
parse_hierarchy_spy.assert_called_once_with(pathway_org, level=1, raw_hierarchy_nodes=[], parent_name=None)
19+
assert pathway_org.hierarchy_nodes == dict()
20+
21+
22+
def _get_get_mock(mocker):
23+
def get_mock(**_) -> mocker.MagicMock:
24+
with open('dev/test_data/pathway-organizer/pathway-hierarchy.json', 'r') as file_:
25+
text_body_mock: str = file_.read()
26+
kegg_response_mock = mocker.MagicMock(text_body=text_body_mock)
27+
return kegg_response_mock
28+
return mocker.patch('kegg_pull.pathway_organizer.r.KEGGrest.get', wraps=get_mock)
29+
30+
31+
test_load_from_kegg_data = [
32+
(None, None, 'all-nodes.json'),
33+
({'Metabolism', 'Genetic Information Processing'}, None, 'top-level-nodes.json'),
34+
(None, {'Genetic Information Processing', 'Global and overview maps', '00010 Glycolysis / Gluconeogenesis'}, 'filter-nodes.json')]
35+
36+
37+
@pt.mark.parametrize('top_level_nodes,filter_nodes,hierarchy_nodes_file', test_load_from_kegg_data)
38+
def test_load_from_kegg(mocker, top_level_nodes: set, filter_nodes: set, hierarchy_nodes_file: str):
39+
get_mock: mocker.MagicMock = _get_get_mock(mocker=mocker)
40+
pathway_organizer = po.PathwayOrganizer.load_from_kegg(top_level_nodes=top_level_nodes, filter_nodes=filter_nodes)
41+
get_mock.assert_called_once_with(entry_ids=['br:br08901'], entry_field='json')
42+
if top_level_nodes is not None:
43+
actual_top_level_nodes = {node_key for node_key, node_val in pathway_organizer.hierarchy_nodes.items() if node_val['level'] == 1}
44+
assert actual_top_level_nodes == top_level_nodes
45+
if filter_nodes is not None:
46+
for filter_node in filter_nodes:
47+
assert filter_node not in pathway_organizer.hierarchy_nodes.keys()
48+
expected_hierarchy_nodes: dict = _get_expected_hierarchy_nodes(hierarchy_nodes_file=hierarchy_nodes_file)
49+
assert pathway_organizer.hierarchy_nodes == expected_hierarchy_nodes
50+
51+
52+
def _get_expected_hierarchy_nodes(hierarchy_nodes_file: str) -> dict:
53+
with open(f'dev/test_data/pathway-organizer/{hierarchy_nodes_file}') as file:
54+
expected_hierarchy_nodes: dict = json.load(file)
55+
return expected_hierarchy_nodes
56+
57+
58+
def test_save_to_json(mocker, json_file_path: str):
59+
u.mock_non_instantiable(mocker=mocker)
60+
pathway_organizer = po.PathwayOrganizer()
61+
pathway_organizer.hierarchy_nodes = _get_expected_hierarchy_nodes(hierarchy_nodes_file='top-level-nodes.json')
62+
pathway_organizer.save_to_json(file_path=json_file_path)
63+
u.test_save_to_json(json_file_path=json_file_path, expected_saved_json_object=pathway_organizer.hierarchy_nodes)
64+
65+
66+
def test_load_from_json(json_file_path: str):
67+
expected_hierarchy_nodes: dict = _get_expected_hierarchy_nodes(hierarchy_nodes_file='top-level-nodes.json')
68+
u.test_load_from_json(
69+
json_file_path=json_file_path, saved_object=expected_hierarchy_nodes, method=po.PathwayOrganizer.load_from_json,
70+
expected_loaded_object=expected_hierarchy_nodes, loaded_object_attribute='hierarchy_nodes')
71+
72+
73+
test_invalid_load_from_json_data = [
74+
1, 'a', [], [1, 2], ['a', 'b'], [[], []], [[1], [2]], [['a'], ['b']], [{}, {}], [{'a': {}, 'b': []}], {}, {'a': []}, {'a': {}},
75+
{'a': {'b': 1}}, {'a': {'name': 'b'}}, {'a': {'level': 1, 'b': 'c'}},
76+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': 'x'},
77+
'': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['d'], 'entry_id': None}},
78+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None, 'x': 'y'}},
79+
{'a': {'name': 2, 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
80+
{'a': {'name': '', 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
81+
{'a': {'name': None, 'level': 1, 'parent': 'c', 'children': None, 'entry_id': None}},
82+
{'a': {'name': 'b', 'level': '1', 'parent': 'c', 'children': None, 'entry_id': None}},
83+
{'a': {'name': 'b', 'level': None, 'parent': 'c', 'children': None, 'entry_id': None}},
84+
{'a': {'name': 'b', 'level': 0, 'parent': 'c', 'children': None, 'entry_id': None}},
85+
{'a': {'name': 'b', 'level': 1, 'parent': '', 'children': None, 'entry_id': None}},
86+
{'a': {'name': 'b', 'level': 1, 'parent': 2, 'children': None, 'entry_id': None}},
87+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [], 'entry_id': None}},
88+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [1], 'entry_id': None}},
89+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': [''], 'entry_id': None}},
90+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': 1}},
91+
{'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': ''}}]
92+
93+
94+
@pt.mark.parametrize('invalid_json_object', test_invalid_load_from_json_data)
95+
def test_invalid_load_from_json(caplog, json_file_path: str, invalid_json_object: list | dict | int | float | str):
96+
expected_error_message = f'Failed to load the hierarchy nodes. The pathway organizer JSON file at {json_file_path} is ' \
97+
f'corrupted and will need to be re-created.'
98+
u.test_invalid_load_from_json(
99+
json_file_path=json_file_path, invalid_json_object=invalid_json_object, method=po.PathwayOrganizer.load_from_json,
100+
expected_error_message=expected_error_message, caplog=caplog)

dev/test_pathway_organizer_cli.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# noinspection PyPackageRequirements
2+
import pytest as pt
3+
import json
4+
import kegg_pull.pathway_organizer as po
5+
import kegg_pull.pathway_organizer_cli as po_cli
6+
import dev.utils as u
7+
8+
9+
def test_help(mocker):
10+
u.assert_help(mocker=mocker, module=po_cli, subcommand='pathway-organizer')
11+
12+
13+
method = 'pathway_organizer_cli.po.PathwayOrganizer.load_from_kegg'
14+
test_data = [
15+
(['pathway-organizer', '--tln=-', '--fn=-'], {'top_level_nodes': {'node1'}, 'filter_nodes': {'node2', 'node3'}},
16+
' node1\n---\nnode2\t\nnode3 '),
17+
(['pathway-organizer', '--tln=-', '--fn=node2,node3,node4'],
18+
{'top_level_nodes': {'node1', 'node5'}, 'filter_nodes': {'node2', 'node3', 'node4'}}, '\nnode1\n node5\n'),
19+
(['pathway-organizer', '--tln=node1', '--fn=-'], {'top_level_nodes': {'node1'}, 'filter_nodes': {'node2'}}, 'node2'),
20+
(['pathway-organizer', '--tln=node1,node2', '--fn=node3'], {'top_level_nodes': {'node1', 'node2'}, 'filter_nodes': {'node3'}}, None),
21+
(['pathway-organizer', '--tln=-'], {'top_level_nodes': {'node1', 'node2', 'node3'}, 'filter_nodes': None}, 'node1\nnode2\nnode3'),
22+
(['pathway-organizer', '--fn=-'], {'top_level_nodes': None, 'filter_nodes': {'node1', 'node2', 'node3'}}, 'node1\nnode2\nnode3'),
23+
(['pathway-organizer', '--tln=node1,node2,node3'], {'top_level_nodes': {'node1', 'node2', 'node3'}, 'filter_nodes': None}, None),
24+
(['pathway-organizer', '--fn=node1,node2,node3'], {'top_level_nodes': None, 'filter_nodes': {'node1', 'node2', 'node3'}}, None),
25+
(['pathway-organizer'], {'top_level_nodes': None, 'filter_nodes': None}, None)]
26+
27+
28+
@pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
29+
def test_print(mocker, args: list, kwargs: dict, stdin_mock: str):
30+
pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
31+
u.test_print(
32+
mocker=mocker, argv_mock=args, stdin_mock=stdin_mock, method=method, method_return_value=pathway_org_mock, method_kwargs=kwargs,
33+
module=po_cli, expected_output=expected_output)
34+
35+
36+
def _get_mock_pathway_org_and_expected_output(mocker):
37+
u.mock_non_instantiable(mocker=mocker)
38+
hierarchy_nodes_mock: po.HierarchyNodes = {'a': {'name': 'b', 'level': 1, 'parent': 'c', 'children': ['a'], 'entry_id': 'd'}}
39+
pathway_org_mock = po.PathwayOrganizer()
40+
pathway_org_mock.hierarchy_nodes = hierarchy_nodes_mock
41+
expected_output: str = json.dumps(hierarchy_nodes_mock, indent=2)
42+
return pathway_org_mock, expected_output
43+
44+
45+
@pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
46+
def test_file(mocker, args: list, kwargs: dict, stdin_mock: str, output_file: str):
47+
pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
48+
u.test_file(
49+
mocker=mocker, argv_mock=args, output_file=output_file, stdin_mock=stdin_mock, method=method,
50+
method_return_value=pathway_org_mock, method_kwargs=kwargs, module=po_cli, expected_output=expected_output)
51+
52+
53+
@pt.mark.parametrize('args,kwargs,stdin_mock', test_data)
54+
def test_zip_archive(mocker, args: list, kwargs: dict, stdin_mock: str, zip_archive_data: tuple):
55+
pathway_org_mock, expected_output = _get_mock_pathway_org_and_expected_output(mocker=mocker)
56+
u.test_zip_archive(
57+
mocker=mocker, argv_mock=args, zip_archive_data=zip_archive_data, stdin_mock=stdin_mock, method=method,
58+
method_return_value=pathway_org_mock, method_kwargs=kwargs, module=po_cli, expected_output=expected_output)

dev/test_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import dev.utils as u
66
import kegg_pull.pull as p
77
import kegg_pull.rest as r
8+
import kegg_pull.pathway_organizer as po
89

910

1011
@pt.mark.parametrize('comma_separated_list', [',,', ',', ''])
@@ -33,7 +34,7 @@ def test_get_range_values_exception():
3334

3435

3536
@pt.mark.parametrize(
36-
'NonInstantiable,kwargs', [(p.PullResult, {}), (r.KEGGresponse, {'status': None, 'kegg_url': None})])
37+
'NonInstantiable,kwargs', [(p.PullResult, {}), (r.KEGGresponse, {'status': None, 'kegg_url': None}), (po.PathwayOrganizer, {})])
3738
def test_non_instantiable(NonInstantiable: type, kwargs: dict):
3839
expected_error_message = f'The class "{NonInstantiable.__name__}" cannot be instantiated outside of its module.'
3940
with pt.raises(RuntimeError) as error:

docs/api.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ API
1919
:members:
2020
:undoc-members:
2121

22+
.. automodule:: kegg_pull.pathway_organizer
23+
:members:
24+
:undoc-members:
25+
2226
.. automodule:: kegg_pull.rest
2327
:members:
2428
:undoc-members:

docs/cli.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ If the ``--unsuccessful-threshold`` option is set and surpassed, an ``aborted-pu
7979
:end-before: """
8080
:language: none
8181

82+
.. include:: ../src/kegg_pull/pathway_organizer.py
83+
:start-after: """
84+
:end-before: """
85+
86+
.. literalinclude:: ../src/kegg_pull/pathway_organizer_cli.py
87+
:start-at: Usage:
88+
:end-before: """
89+
:language: none
90+
8291
.. include:: ../src/kegg_pull/rest.py
8392
:start-after: """
8493
:end-before: """

docs/tutorial.rst

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,24 @@ operation or “conv” operation into dictionaries usable in python code.
320320
{'cpd:C00001': {'pubchem:3303'}, 'cpd:C00002': {'pubchem:3304'}}
321321
322322
323+
Pathway Organizer
324+
~~~~~~~~~~~~~~~~~
325+
326+
The ``pathway_organizer`` module flattens a brite hierarchy into a
327+
mapping of the IDs of its nodes to information about those nodes.
328+
329+
.. code:: python3
330+
331+
import kegg_pull.pathway_organizer as po
332+
pathway_org = po.PathwayOrganizer.load_from_kegg()
333+
print(pathway_org.hierarchy_nodes['Metabolism'])
334+
335+
336+
.. parsed-literal::
337+
338+
{'name': 'Metabolism', 'level': 1, 'parent': None, 'children': ['Amino acid metabolism', 'Biosynthesis of other secondary metabolites', 'Carbohydrate metabolism', 'Chemical structure transformation maps', 'Energy metabolism', 'Global and overview maps', 'Glycan biosynthesis and metabolism', 'Lipid metabolism', 'Metabolism of cofactors and vitamins', 'Metabolism of other amino acids', 'Metabolism of terpenoids and polyketides', 'Nucleotide metabolism', 'Xenobiotics biodegradation and metabolism'], 'entry_id': None}
339+
340+
323341
Rest API
324342
~~~~~~~~
325343

@@ -690,6 +708,29 @@ map
690708
"cpd:C00084",
691709
692710
711+
pathway-organizer
712+
~~~~~~~~~~~~~~~~~
713+
714+
.. code:: none
715+
716+
% kegg_pull pathway-organizer --tln=Metabolism --fn="Global and overview maps,Carbohydrate metabolism" --output=hierarchy-nodes.json
717+
% head hierarchy-nodes.json
718+
719+
720+
.. parsed-literal::
721+
722+
{
723+
"path:map00190": {
724+
"name": "00190 Oxidative phosphorylation",
725+
"level": 3,
726+
"parent": "Energy metabolism",
727+
"children": null,
728+
"entry_id": "path:map00190"
729+
},
730+
"path:map00195": {
731+
"name": "00195 Photosynthesis",
732+
733+
693734
rest
694735
~~~~
695736

src/kegg_pull/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
88
``map``
99
10+
``pathway_organizer``
11+
1012
``rest``
1113
1214
``kegg_url``

src/kegg_pull/__main__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
kegg_pull pull ... Pull, separate, and store an arbitrary number of KEGG entries to the local file system.
77
kegg_pull entry-ids ... Obtain a list of KEGG entry IDs.
88
kegg_pull map ... Obtain a mapping of entry IDs (KEGG or outside databases) to the IDs of related entries.
9+
kegg_pull pathway-organizer ... Creates a flattened version of a pathways Brite hierarchy.
910
kegg_pull rest ... Executes one of the KEGG REST API operations.
1011
"""
1112
import sys
1213
from . import __version__
1314
from . import pull_cli as p_cli
1415
from . import entry_ids_cli as ei_cli
1516
from . import map_cli as map_cli
17+
from . import pathway_organizer_cli as po_cli
1618
from . import rest_cli as r_cli
1719

1820

@@ -24,6 +26,8 @@ def main() -> None:
2426
ei_cli.main()
2527
elif first_arg == 'map':
2628
map_cli.main()
29+
elif first_arg == 'pathway-organizer':
30+
po_cli.main()
2731
elif first_arg == 'rest':
2832
r_cli.main()
2933
elif first_arg == '--full-help':
@@ -36,6 +40,8 @@ def main() -> None:
3640
print(separator)
3741
print(map_cli.__doc__)
3842
print(separator)
43+
print(po_cli.__doc__)
44+
print(separator)
3945
print(r_cli.__doc__)
4046
elif first_arg == '--version' or first_arg == '-v':
4147
print(__version__)

0 commit comments

Comments
 (0)