Skip to content

Commit 96df105

Browse files
author
Nolan Woods
committed
Implement split() and let() JMESPath functions
1 parent c76318d commit 96df105

8 files changed

Lines changed: 37221 additions & 28 deletions

File tree

.idea/csv-plugin.xml

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.rst

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ A web based tool is available to experiment with constructing queries in real ti
5151
dataset to JSON and load it into the `JMESPath playground`_ to begin composing your query. It supports loading JSON files
5252
directly rather than trying to copy/paste the data.
5353

54+
`split()`_ and `let()`_ functions are available in addition to the JMESPath standard functions
55+
5456
Examples:
5557
Append a new record::
5658

@@ -74,12 +76,26 @@ Examples:
7476

7577
Convert dataset to PTT format using text output::
7678

77-
[0].[join(' - 1..', [description, to_string(length(seq))]), join(' ', [to_string(length(features[?type=='CDS' && qualifiers.translation])), 'proteins']), join(`"\t"`, ['Location', 'Strand', 'Length', 'PID', 'Gene', 'Synonym', 'Code', 'COG', 'Product']), (features[?type=='CDS' && qualifiers.translation].[join('..', [to_string(sum([location.start, `1`])), to_string(location.end)]), [location.strand][?@==`1`] && '+' || '-', length(qualifiers.translation[0]), qualifiers.db_xref[?starts_with(@, 'GI')][0] || '-', qualifiers.gene[0] || '-', qualifiers.locus_tag[0] || '-', '-', '-', qualifiers.product[0] ] | [*].join(`"\t"`, [*].to_string(@)) )] | []
79+
[0].[join(' - 1..', [description, to_string(length(seq))]), join(' ', [to_string(length(features[?type=='CDS' && qualifiers.translation])), 'proteins']), join(`"\t"`, ['Location', 'Strand', 'Length', 'PID', 'Gene', 'Synonym', 'Code', 'COG', 'Product']), (features[?type=='CDS' && qualifiers.translation].[join('..', [to_string(sum([location.start, `1`])), to_string(location.end)]), [location.strand][?@==`1`] && '+' || '-', length(qualifiers.translation[0]), (qualifiers.db_xref[?starts_with(@, 'GI')].split(':', @)[1])[0] || '-', qualifiers.gene[0] || '-', qualifiers.locus_tag[0] || '-', '-', '-', qualifiers.product[0] ] | [*].join(`"\t"`, [*].to_string(@)) )] | []
80+
81+
Convert dataset to faa format using fasta output::
82+
83+
[0].let({org: (annotations.organism || annotations.source)}, &(features[?type=='CDS' && qualifiers.translation].{id:
84+
join('|', [
85+
(qualifiers.db_xref[?starts_with(@, 'GI')].['gi', split(':', @)[1]]),
86+
(qualifiers.protein_id[*].['ref', @]),
87+
(qualifiers.locus_tag[*].['locus', @]),
88+
join('', [':', [location][?strand==`-1`] && 'c' || '', to_string(sum([location.start, `1`])), '..', to_string(location.end)])
89+
][][]),
90+
seq: qualifiers.translation[0],
91+
description: (org && join('', [qualifiers.product[0], ' [', org, ']']) || qualifiers.product[0])}))
7892

7993
See CONTRIBUTING.rst_ for information on contributing to this repo.
8094

8195
.. _CONTRIBUTING.rst: CONTRIBUTING.rst
8296
.. _JMESPath: http://jmespath.org/
8397
.. _SeqRecord: https://biopython.org/DIST/docs/api/Bio.SeqRecord.SeqRecord-class.html
8498
.. _constructor parameters: https://biopython.org/DIST/docs/api/Bio.SeqRecord.SeqRecord-class.html#__init__
85-
.. _JMESPath playground: https://glenveegee.github.io/jmespath-edit/
99+
.. _JMESPath playground: https://glenveegee.github.io/jmespath-edit/
100+
.. _split(): https://github.com/jmespath/jmespath.py/issues/159
101+
.. _let(): https://github.com/jmespath/jmespath.site/pull/6

biopython_convert/JMESPathGen.py

Lines changed: 151 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,25 @@
11
import jmespath.parser
22
import jmespath.visitor
33
import jmespath.functions
4+
import jmespath.exceptions
45
import itertools
56
import types
67

8+
from collections import deque
9+
710
# Register generator type in jmespath
811
jmespath.functions.TYPES_MAP['generator'] = 'array'
912
jmespath.functions.REVERSE_TYPES_MAP['array'] += ('generator',)
1013

14+
# Register biopython types in jmespath
15+
jmespath.functions.TYPES_MAP['Seq'] = 'string'
16+
jmespath.functions.REVERSE_TYPES_MAP['string'] += ('Seq',)
17+
jmespath.functions.TYPES_MAP['ExactPosition'] = 'number'
18+
jmespath.functions.REVERSE_TYPES_MAP['number'] += ('ExactPosition',)
19+
20+
# this implementation includes https://github.com/jmespath/jmespath.site/pull/6
21+
# and https://github.com/jmespath/jmespath.py/issues/159
22+
1123

1224
def compile(expression):
1325
return Parser().parse(expression)
@@ -30,9 +42,43 @@ def search(self, value, options=None):
3042
return result
3143

3244

45+
class ExtendedFunctions(jmespath.functions.Functions):
46+
def call_function(self, function_name, resolved_args, **kwargs):
47+
try:
48+
spec = self.FUNCTION_TABLE[function_name]
49+
except KeyError:
50+
raise jmespath.exceptions.UnknownFunctionError(
51+
"Unknown function: %s()" % function_name)
52+
function = spec['function']
53+
signature = spec['signature']
54+
self._validate_arguments(resolved_args, signature, function_name)
55+
return function(self, *resolved_args, **kwargs)
56+
57+
@jmespath.functions.signature({'types': ['object']}, {'types': ['expref']})
58+
def _func_let(self, lexical_scope, expref, **kwargs):
59+
if 'scope' in kwargs:
60+
scope = dict(kwargs['scope'])
61+
scope.update(lexical_scope)
62+
else:
63+
scope = dict(lexical_scope)
64+
kwargs['scope'] = scope
65+
return expref.visit(expref.expression, expref.context, **kwargs)
66+
67+
@jmespath.functions.signature({'types': ['string']}, {'types': ['string']})
68+
def _func_split(self, on, val):
69+
return val.split(on)
70+
71+
72+
class _Expression(jmespath.visitor._Expression):
73+
def __init__(self, expression, interpreter, context):
74+
super().__init__(expression, interpreter)
75+
self.context = context
76+
77+
3378
class TreeInterpreterGenerator(jmespath.visitor.TreeInterpreter):
34-
def __init__(self, *args, **kwargs):
35-
super().__init__(*args, **kwargs)
79+
def __init__(self, options=None, *args, **kwargs):
80+
options = options or jmespath.visitor.Options(custom_functions=ExtendedFunctions())
81+
super().__init__(*args, options=options, **kwargs)
3682
self._generators = {}
3783

3884
def _gen_to_list(self, gen, recurse=False):
@@ -57,49 +103,52 @@ def _gen_to_list(self, gen, recurse=False):
57103
def visit(self, node, *args, **kwargs):
58104
# if a visit caused list conversion, get list. Assume that 'value' is args[0].
59105
if len(args) and isinstance(args[0], types.GeneratorType):
60-
args = list(args) #convert from tuple
106+
args = list(args) # convert from tuple
61107
args[0] = self._generators.get(args[0], args[0])
62108
return super().visit(node, *args, **kwargs)
63109

64-
def visit_field(self, node, value):
110+
def visit_field(self, node, value, scope=None, **kwargs):
65111
try:
66112
return value.get(node['value'])
67113
except AttributeError:
68114
# Allow accessing objects fields TODO push this change upstream, possibly with config flag
69115
try:
70116
return getattr(value, node['value'])
71117
except AttributeError:
72-
return None
118+
# If the field is not defined in the current object, then fall back
119+
# to checking in the scope chain, if there's any that has been
120+
# created.
121+
return scope.get(node['value'], None)
73122

74-
def visit_function_expression(self, node, value):
123+
def visit_function_expression(self, node, value, **kwargs):
75124
resolved_args = []
76125
for child in node['children']:
77-
current = self._gen_to_list(self.visit(child, value), True)
126+
current = self._gen_to_list(self.visit(child, value, **kwargs), True)
78127
resolved_args.append(current)
79128
return self._functions.call_function(node['value'], resolved_args)
80129

81-
def visit_not_expression(self, node, value):
82-
original_result = self.visit(node['children'][0], value)
130+
def visit_not_expression(self, node, value, **kwargs):
131+
original_result = self.visit(node['children'][0], value, **kwargs)
83132
if original_result == 0:
84133
# Special case for 0, !0 should be false, not true.
85134
# 0 is not a special cased integer in jmespath.
86135
return False
87136
return self._is_false(original_result) # TODO bugfix, push this change upstream
88137

89-
def visit_filter_projection(self, node, value):
90-
base = self.visit(node['children'][0], value)
138+
def visit_filter_projection(self, node, value, **kwargs):
139+
base = self.visit(node['children'][0], value, **kwargs)
91140
if not isinstance(base, (list, types.GeneratorType, map, filter)):
92141
return None
93142
comparator_node = node['children'][2]
94143
for element in base:
95-
comparison = self.visit(comparator_node, element)
144+
comparison = self.visit(comparator_node, element, **kwargs)
96145
if self._is_true(comparison):
97-
current = self.visit(node['children'][1], element)
146+
current = self.visit(node['children'][1], element, **kwargs)
98147
if current is not None:
99148
yield current
100149

101-
def visit_flatten(self, node, value):
102-
base = self.visit(node['children'][0], value)
150+
def visit_flatten(self, node, value, **kwargs):
151+
base = self.visit(node['children'][0], value, **kwargs)
103152
if not isinstance(base, (list, types.GeneratorType, map, filter)):
104153
# Can't flatten the object if it's not a list.
105154
return None
@@ -110,40 +159,40 @@ def visit_flatten(self, node, value):
110159
else:
111160
yield element
112161

113-
def visit_index(self, node, value):
162+
def visit_index(self, node, value, **kwargs):
114163
value = self._gen_to_list(value)
115164
return super().visit_index(node, value)
116165

117-
def visit_slice(self, node, value):
166+
def visit_slice(self, node, value, **kwargs):
118167
return itertools.islice(value, *node['children'])
119168

120-
def visit_multi_select_list(self, node, value):
169+
def visit_multi_select_list(self, node, value, **kwargs):
121170
if value is None:
122171
return None
123172
for child in node['children']:
124-
yield self.visit(child, value)
173+
yield self.visit(child, value, **kwargs)
125174

126-
def visit_projection(self, node, value):
127-
base = self.visit(node['children'][0], value)
175+
def visit_projection(self, node, value, **kwargs):
176+
base = self.visit(node['children'][0], value, **kwargs)
128177
if not isinstance(base, (list, types.GeneratorType, map, filter)):
129178
return None
130179
for element in base:
131180
current = self.visit(node['children'][1], element)
132181
if current is not None:
133182
yield current
134183

135-
def visit_value_projection(self, node, value):
136-
base = self.visit(node['children'][0], value)
184+
def visit_value_projection(self, node, value, **kwargs):
185+
base = self.visit(node['children'][0], value, **kwargs)
137186
try:
138187
base = base.values()
139188
except AttributeError:
140189
return None
141190
for element in base:
142-
current = self.visit(node['children'][1], element)
191+
current = self.visit(node['children'][1], element, **kwargs)
143192
if current is not None:
144193
yield current
145194

146-
def _is_false(self, value):
195+
def _is_false(self, value, **kwargs):
147196
if isinstance(value, types.GeneratorType):
148197
# peek generator instead of _gen_to_list()
149198
try:
@@ -155,3 +204,80 @@ def _is_false(self, value):
155204
except StopIteration:
156205
return True
157206
return super()._is_false(value)
207+
208+
def visit_expref(self, node, value, **kwargs):
209+
return _Expression(node['children'][0], self, value, **kwargs)
210+
211+
def visit_subexpression(self, node, value, **kwargs):
212+
result = value
213+
for node in node['children']:
214+
result = self.visit(node, result, **kwargs)
215+
return result
216+
217+
def visit_comparator(self, node, value, **kwargs):
218+
# Common case: comparator is == or !=
219+
comparator_func = self.COMPARATOR_FUNC[node['value']]
220+
if node['value'] in self._EQUALITY_OPS:
221+
return comparator_func(
222+
self.visit(node['children'][0], value, **kwargs),
223+
self.visit(node['children'][1], value, **kwargs)
224+
)
225+
else:
226+
# Ordering operators are only valid for numbers.
227+
# Evaluating any other type with a comparison operator
228+
# will yield a None value.
229+
left = self.visit(node['children'][0], value, **kwargs)
230+
right = self.visit(node['children'][1], value, **kwargs)
231+
num_types = (int, float)
232+
if not (jmespath._is_comparable(left) and
233+
jmespath._is_comparable(right)):
234+
return None
235+
return comparator_func(left, right)
236+
237+
def visit_current(self, node, value, **kwargs):
238+
return super().visit_current(node, value)
239+
240+
def visit_identity(self, node, value, **kwargs):
241+
return super().visit_identity(node, value)
242+
243+
def visit_index_expression(self, node, value, **kwargs):
244+
result = value
245+
for node in node['children']:
246+
result = self.visit(node, result, **kwargs)
247+
return result
248+
249+
def visit_key_val_pair(self, node, value, **kwargs):
250+
return self.visit(node['children'][0], value, **kwargs)
251+
252+
def visit_literal(self, node, value, **kwargs):
253+
return super().visit_literal(node, value)
254+
255+
def visit_multi_select_dict(self, node, value, **kwargs):
256+
if value is None:
257+
return None
258+
collected = self._dict_cls()
259+
for child in node['children']:
260+
collected[child['value']] = self.visit(child, value, **kwargs)
261+
return collected
262+
263+
def visit_or_expression(self, node, value, **kwargs):
264+
matched = self.visit(node['children'][0], value, **kwargs)
265+
if self._is_false(matched):
266+
matched = self.visit(node['children'][1], value, **kwargs)
267+
return matched
268+
269+
def visit_and_expression(self, node, value, **kwargs):
270+
matched = self.visit(node['children'][0], value, **kwargs)
271+
if self._is_false(matched):
272+
return matched
273+
return self.visit(node['children'][1], value, **kwargs)
274+
275+
def visit_pipe(self, node, value, **kwargs):
276+
result = value
277+
for node in node['children']:
278+
result = self.visit(node, result, **kwargs)
279+
return result
280+
281+
def _is_true(self, value, **kwargs):
282+
return super()._is_true(value)
283+

0 commit comments

Comments
 (0)