Skip to content

Commit c5de352

Browse files
committed
Adds an add_glycans and add_drugs option to conv operations
1 parent a4e016e commit c5de352

1 file changed

Lines changed: 46 additions & 20 deletions

File tree

src/kegg_pull/map.py

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def database_link(
3737
target_database=target_database)
3838
mapping = _add_glycans_or_drugs(
3939
mapping=mapping, source_database=source_database, target_database=target_database,
40-
add_glycans=add_glycans, add_drugs=add_drugs, kegg_rest=kegg_rest)
40+
add_glycans=add_glycans, add_drugs=add_drugs, kegg_rest=kegg_rest, source_op='link', target_op='link')
4141
return mapping
4242

4343

@@ -78,7 +78,7 @@ def _deduplicate_pathway_ids(mapping: KEGGmapping, deduplicate: bool, source_dat
7878
""" If requested, removes entry IDs corresponding to duplicate pathway map entries (different ID, same entry).
7979
8080
:param mapping: The mapping to deduplicate.
81-
:param deduplicate: Whether or not to deduplicate.
81+
:param deduplicate: Whether to deduplicate.
8282
:param source_database: The name of the source database of the mapping to validate.
8383
:param target_database: The name of the target database of the mapping to validate.
8484
:raises ValueError: Raised if deduplicate is True but neither source_database nor target_database is "pathway".
@@ -123,15 +123,17 @@ def _process_mapping(
123123

124124

125125
def _add_glycans_or_drugs(
126-
mapping: KEGGmapping, source_database: str, target_database: str, add_glycans: bool, add_drugs: bool,
127-
kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
126+
mapping: KEGGmapping, source_database: str, target_database: str, add_glycans: bool, add_drugs: bool, source_op: t.Literal['link', 'conv'],
127+
target_op: t.Literal['link', 'conv'], kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
128128
""" If requested, adds the corresponding compound IDs of equivalent glycan and/or drug entries to a mapping (assuming mapping from "compound" to some target database).
129129
130130
:param mapping: The mapping to add the IDs of compound-equivalents which cross-reference the target database.
131131
:param source_database: Logs a warning if not equal to "compound" and if the target database name is also not equal to "compound".
132132
:param target_database: The database with IDs to which compound IDs are mapped.
133133
:param add_glycans: Whether to add the corresponding compound IDs of KEGG glycan entries.
134134
:param add_drugs: Whether to add the corresponding compound IDs of KEGG drug entries.
135+
:param source_op: The REST operation mapping the compound database to glycan/drug.
136+
:param target_op: The REST operation mapping the glycan/drug database to the target database.
135137
:param kegg_rest: The KEGGrest object to perform the "link" operation(s). If None, one is created with the default parameters.
136138
:return: The dictionary.
137139
:raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
@@ -147,12 +149,12 @@ def _add_glycans_or_drugs(
147149
def add_glycans_or_drugs(mapping: KEGGmapping, target_database: str) -> KEGGmapping:
148150
if add_glycans:
149151
glycan_to_database = indirect_link(
150-
source_database='compound', intermediate_database='glycan', target_database=target_database,
152+
source_database='compound', intermediate_database='glycan', target_database=target_database, source_op=source_op, target_op=target_op,
151153
kegg_rest=kegg_rest)
152154
mapping = combine_mappings(mapping1=mapping, mapping2=glycan_to_database)
153155
if add_drugs:
154156
drug_to_database = indirect_link(
155-
source_database='compound', intermediate_database='drug', target_database=target_database,
157+
source_database='compound', intermediate_database='drug', target_database=target_database, source_op=source_op, target_op=target_op,
156158
kegg_rest=kegg_rest)
157159
mapping = combine_mappings(mapping1=mapping, mapping2=drug_to_database)
158160
return mapping
@@ -164,19 +166,28 @@ def add_glycans_or_drugs(mapping: KEGGmapping, target_database: str) -> KEGGmapp
164166

165167
# noinspection PyShadowingNames
166168
def database_conv(
167-
kegg_database: str, outside_database: str, reverse: bool = False, kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
168-
""" Converts the output of the KEGG "conv" operation (of the form that maps the entry IDs of one database to the entry IDs of another) into a dictionary.
169+
kegg_database: str, outside_database: str, reverse: bool = False, add_glycans: bool = False, add_drugs: bool = False,
170+
kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
171+
""" Converts the output of the KEGG "conv" operation (of the form that maps the entry IDs of a kegg database to the entry IDs of an outside database) into a dictionary.
169172
170173
:param kegg_database: The name of the KEGG database with entry IDs mapped to the outside database.
171174
:param outside_database: The name of the outside database with entry IDs mapped from the KEGG database.
172175
:param reverse: Reverses the mapping with the target becoming the source and the source becoming the target. Equivalent to calling the reverse() function of this module.
176+
:param add_glycans: Whether to add the corresponding compound IDs of equivalent glycan entries. Logs a warning if neither the source nor the target database are "compound".
177+
:param add_drugs: Whether to add the corresponding compound IDs of equivalent drug entries. Logs a warning if neither the source nor the target database are "compound".
173178
:param kegg_rest: The KEGGrest object to perform the "conv" operation. If None, one is created with the default parameters.
174179
:return: The dictionary.
175180
:raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
176181
"""
177-
return _map_and_reverse(
178-
reverse=reverse, kegg_rest=kegg_rest, KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=kegg_database,
182+
mapping = _map_and_reverse(
183+
reverse=True, kegg_rest=kegg_rest, KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=kegg_database,
179184
outside_database=outside_database)
185+
mapping = _add_glycans_or_drugs(
186+
mapping=mapping, source_database=kegg_database, target_database=outside_database,
187+
add_glycans=add_glycans, add_drugs=add_drugs, source_op='link', target_op='conv', kegg_rest=kegg_rest)
188+
if reverse:
189+
return _reverse(mapping)
190+
return mapping
180191

181192

182193
# noinspection PyShadowingNames
@@ -228,16 +239,19 @@ def entries_conv(
228239

229240
def indirect_link(
230241
source_database: str, intermediate_database: str, target_database: str, deduplicate: bool = False,
231-
add_glycans: bool = False, add_drugs: bool = False, kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
232-
""" Creates a dictionary that maps the entry IDs of a source database to those of a target database using an intermediate database ("link" operation) e.g. ko-to-compound where the intermediate is reaction (connecting cross-references of ko-to-reaction and reaction-to-compound).
242+
add_glycans: bool = False, add_drugs: bool = False, source_op: t.Literal['link', 'conv'] = 'link', target_op: t.Literal['link', 'conv'] = 'link',
243+
kegg_rest: r.KEGGrest | None = None) -> KEGGmapping:
244+
""" Creates a dictionary that maps the entry IDs of a source database to those of a target database using an intermediate database e.g. ko-to-compound where the intermediate is reaction (connecting cross-references of ko-to-reaction and reaction-to-compound). The three databases are connected via a combination of either "link" or "conv" operations.
233245
234246
:param source_database: The name of the database with entry IDs to map to the target database.
235247
:param intermediate_database: The name of the database with which two mappings are made i.e. source-to-intermediate and intermediate-to-target, both of which are merged to create source-to-target.
236248
:param target_database: The name of the database with entry IDs to which those of the source database are mapped.
237249
:param deduplicate: Some mappings including "pathway" entry IDs result in half beginning with the normal "path:map" prefix but the other half with a different prefix. If True, removes the IDs corresponding to entries that are identical but with a different prefix.
238250
:param add_glycans: Whether to add the corresponding compound IDs of equivalent glycan entries. Logs a warning if neither the source nor the target database are "compound".
239251
:param add_drugs: Whether to add the corresponding compound IDs of equivalent drug entries. Logs a warning if neither the source nor the target database are "compound".
240-
:param kegg_rest: The KEGGrest object to perform the "link" operations. If None, one is created with the default parameters.
252+
:param source_op: The REST operation mapping the source database to the intermediate database.
253+
:param target_op: The REST operation mapping the intermediate database to the target database.
254+
:param kegg_rest: The KEGGrest object to perform the "link" or "conv" operations. If None, one is created with the default parameters.
241255
:return: The dictionary.
242256
:raises RuntimeError: Raised if the request to the KEGG REST API fails or times out.
243257
:raises ValueError: Raised if deduplicate is True but neither source_database nor target_database is "pathway".
@@ -246,13 +260,25 @@ def indirect_link(
246260
raise ValueError(
247261
f'The source, intermediate, and target database must all be unique. Databases specified: {source_database}, '
248262
f'{intermediate_database}, {target_database}.')
263+
if source_op == 'link':
264+
source_to_intermediate = _to_dict(
265+
kegg_rest=kegg_rest, KEGGurl=ku.DatabaseLinkKEGGurl, source_database=source_database,
266+
target_database=intermediate_database)
267+
elif source_op == 'conv':
268+
source_to_intermediate = _map_and_reverse(
269+
reverse=True, kegg_rest=kegg_rest, KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=source_database, outside_database=intermediate_database)
270+
else:
271+
raise ValueError('source_op must be one of "link" or "conv".')
272+
if target_op == 'link':
273+
intermediate_to_target = _to_dict(
274+
kegg_rest=kegg_rest, KEGGurl=ku.DatabaseLinkKEGGurl, source_database=intermediate_database,
275+
target_database=target_database)
276+
elif target_op == 'conv':
277+
intermediate_to_target = _map_and_reverse(
278+
reverse=True, kegg_rest=kegg_rest, KEGGurl=ku.DatabaseConvKEGGurl, kegg_database=intermediate_database, outside_database=target_database)
279+
else:
280+
raise ValueError('target_op must be one of "link" or "conv".')
249281
source_to_target = KEGGmapping()
250-
source_to_intermediate = _to_dict(
251-
kegg_rest=kegg_rest, KEGGurl=ku.DatabaseLinkKEGGurl, source_database=source_database,
252-
target_database=intermediate_database)
253-
intermediate_to_target = _to_dict(
254-
kegg_rest=kegg_rest, KEGGurl=ku.DatabaseLinkKEGGurl, source_database=intermediate_database,
255-
target_database=target_database)
256282
for source_id, intermediate_ids in source_to_intermediate.items():
257283
for intermediate_id in intermediate_ids:
258284
if intermediate_id in intermediate_to_target.keys():
@@ -262,7 +288,7 @@ def indirect_link(
262288
mapping=source_to_target, deduplicate=deduplicate, source_database=source_database,
263289
target_database=target_database)
264290
source_to_target = _add_glycans_or_drugs(
265-
mapping=source_to_target, source_database=source_database, target_database=target_database,
291+
mapping=source_to_target, source_database=source_database, target_database=target_database, source_op=source_op, target_op=target_op,
266292
add_glycans=add_glycans, add_drugs=add_drugs, kegg_rest=kegg_rest)
267293
return source_to_target
268294

0 commit comments

Comments
 (0)