Skip to content

Commit f4561ca

Browse files
committed
refactor iteration points
1 parent 8c91d96 commit f4561ca

2 files changed

Lines changed: 152 additions & 157 deletions

File tree

datapath/_base.py

Lines changed: 28 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@
1313
Collection,
1414
CollectionKey,
1515
NO_DEFAULT,
16-
ITERATION_POINT,
1716
DatapathError,
1817
ValidationError,
1918
TypeValidationError,
2019
TypeMismatchValidationError,
2120
InvalidIterationError,
2221
PathLookupError,
22+
_IterationPoint,
23+
_ListIterationPoint,
24+
_StarIterationPoint,
25+
_RangeIterationPoint,
2326
)
2427

2528
_key_pattern = '(?P<part>[^[.]+)'
@@ -74,32 +77,6 @@ def validate_path(path: str, iterable: bool = True) -> None:
7477
_split_match(match, iterable=False)
7578

7679

77-
def _parse_range(range_part: str) -> range:
78-
parts = range_part.split(':')
79-
num_parts = len(parts)
80-
if num_parts == 2:
81-
start, stop = parts
82-
step = ''
83-
elif num_parts == 3:
84-
start, stop, step = parts
85-
else:
86-
raise ValueError(f'bug: unhandled number of delimiters ({num_parts-1}) in range syntax')
87-
if start:
88-
start = int(start)
89-
else:
90-
start = 0
91-
if stop:
92-
stop = int(stop)
93-
else:
94-
stop = sys.maxsize
95-
if step:
96-
step = int(step)
97-
else:
98-
step = 1
99-
return range(start, stop, step)
100-
101-
102-
10380
def split(path: str, iterable: bool = False) -> SplitPath:
10481
"""inverse of join() -- split the path string to it's component keys/indexes in order"""
10582
if path == '':
@@ -114,43 +91,21 @@ def _split_match(match: re.Match, iterable: bool) -> SplitPath:
11491
if part[0] == '[' and part[-1] == ']':
11592
index = part[1:-1]
11693
if ':' in index:
117-
if not iterable:
118-
raise InvalidIterationError('iterable range syntax is not allowed here')
119-
split_path.append(_parse_range(index))
94+
path_part = _RangeIterationPoint(part)
12095
elif index:
121-
split_path.append(int(index))
122-
elif iterable:
123-
split_path.append(ITERATION_POINT)
96+
path_part = int(index)
12497
else:
125-
raise InvalidIterationError('iterable empty square brackets is not allowed here')
98+
path_part = _ListIterationPoint(part)
12699
elif '*' in part:
127-
if iterable:
128-
split_path.append(part)
129-
else:
130-
raise InvalidIterationError('iterable *-key is not allowed here')
100+
path_part = _StarIterationPoint(part)
131101
else:
132-
split_path.append(part)
102+
path_part = part
103+
if not iterable and isinstance(path_part, _IterationPoint):
104+
raise InvalidIterationError(f'iterable {path_part.name} {path_part} not allowed here')
105+
split_path.append(path_part)
133106
return tuple(split_path)
134107

135108

136-
def _format_range(range_obj: range) -> str:
137-
start = range_obj.start
138-
stop = range_obj.stop
139-
step = range_obj.step
140-
if range_obj.start == 0:
141-
start = ''
142-
if range_obj.stop == sys.maxsize:
143-
stop = ''
144-
if range_obj.step == 1:
145-
step = ''
146-
if not any((start, stop, step)):
147-
return '[]'
148-
slice_str = f'{start}:{stop}'
149-
if step:
150-
slice_str += f':{step}'
151-
return f'[{slice_str}]'
152-
153-
154109
def join(split_path: Iterable[Key]) -> str:
155110
"""inverse of split() -- combine an iterable of keys/indexes into a dotted-path format
156111
@@ -173,18 +128,10 @@ def join(split_path: Iterable[Key]) -> str:
173128
path = f'{path}[{part}]'
174129
else:
175130
path = f'[{part}]'
176-
elif part is ITERATION_POINT:
177-
if path:
178-
path = f'{path}[]'
179-
else:
180-
path = '[]'
181-
elif isinstance(part, range):
182-
if path:
183-
path = f'{path}{_format_range(part)}'
184-
else:
185-
path = _format_range(part)
131+
elif isinstance(part, _IterationPoint):
132+
path = part.append_path(path)
186133
else:
187-
raise ValidationError(f'index {i} is invalid, must be str/int/range/ITERATION_POINT, '
134+
raise ValidationError(f'index {i} is invalid, must be str/int or iteration point, '
188135
f'got {type(part).__name__}')
189136
return path
190137

@@ -194,7 +141,7 @@ def _validate_key_collection_type(obj: Collection, key: Key) -> None:
194141
validate a collection object and key are valid and corresponding types
195142
raise a ValidationError if they are not
196143
"""
197-
if key is ITERATION_POINT or isinstance(key, range):
144+
if isinstance(key, _IterationPoint):
198145
raise TypeError('bug: iteration not supported here')
199146
if not isinstance(obj, _collection_types):
200147
raise TypeValidationError('object must be list/dict')
@@ -302,44 +249,19 @@ def _iterate(obj: Collection,
302249
if not isinstance(obj, _collection_types):
303250
raise ValidationError(f'{join(base_path + split_path)}: must be list/dict')
304251

305-
star_index = _star_part_index(split_path)
306-
range_index = _range_part_index(split_path)
307-
try:
308-
iter_index = split_path.index(ITERATION_POINT)
309-
except ValueError:
310-
iter_index = sys.maxsize
311-
312-
min_iter_point = min((iter_index, star_index, range_index))
252+
# find first iteration point
253+
iter_index = None
254+
iter_point = None
255+
for index, part in enumerate(split_path):
256+
if isinstance(part, _IterationPoint):
257+
iter_index = index
258+
iter_point = part
259+
break
313260

314-
if min_iter_point == sys.maxsize:
261+
if iter_index is None:
315262
# no iteration points found, just need to get()
316263
yield join(base_path + split_path), _get(obj, split_path, default)
317264
return
318-
elif min_iter_point == star_index:
319-
# first iteration point is a *-key, we need a dict and need to filter for wildcard matches
320-
iter_index = star_index
321-
check = _check_dict_iter
322-
def iter_collection(collection):
323-
for key, value in collection.items():
324-
if not _wildcard_match(split_path[star_index], key):
325-
continue
326-
yield key, value
327-
elif min_iter_point == range_index:
328-
# first iteration point is a [x:y:z] range, we need a list and the original indicies
329-
iter_index = range_index
330-
check = _check_list_iter
331-
def iter_collection(collection):
332-
for index in split_path[range_index]:
333-
try:
334-
yield index, collection[index]
335-
except IndexError:
336-
break
337-
elif min_iter_point == iter_index:
338-
# first iteration point is a [] list iterator, just need to enumerate a list
339-
check = _check_list_iter
340-
iter_collection = enumerate
341-
else:
342-
raise RuntimeError('bug: unhandled min iter point')
343265

344266
# find the collection referred to by the portion of the path before the first iteration point
345267
before_split_path = split_path[:iter_index]
@@ -353,11 +275,11 @@ def iter_collection(collection):
353275
path = '<root>'
354276
key = before_split_path[-1]
355277
raise PathLookupError(f'{path}: could not find collection at key/index {key!r} to iterate') from None
356-
check(collection)
357-
after_split_path = split_path[iter_index+1:]
358278

359279
# iterate the collection
360-
for key, element in iter_collection(collection):
280+
iter_point.check(collection)
281+
after_split_path = split_path[iter_index+1:]
282+
for key, element in iter_point.iter(collection):
361283
key_split_path = base_path + before_split_path + (key,)
362284
if after_split_path:
363285
# if there is a path after the iteration point, element must be a Collection
@@ -367,38 +289,6 @@ def iter_collection(collection):
367289
yield join(key_split_path), element
368290

369291

370-
def _star_part_index(split_path: SplitPath) -> int:
371-
for index, part in enumerate(split_path):
372-
if isinstance(part, str) and '*' in part:
373-
return index
374-
return sys.maxsize
375-
376-
377-
def _range_part_index(split_path: SplitPath) -> int:
378-
for index, part in enumerate(split_path):
379-
if isinstance(part, range):
380-
return index
381-
return sys.maxsize
382-
383-
384-
def _wildcard_match(star_part: str, key: str) -> bool:
385-
if star_part == '*':
386-
return True
387-
substrings = map(re.escape, star_part.split('*'))
388-
pattern = '^' + '.*?'.join(substrings) + '$'
389-
return bool(re.match(pattern, key))
390-
391-
392-
def _check_dict_iter(collection: Collection):
393-
if not isinstance(collection, dict):
394-
raise InvalidIterationError('*-keys must be preceeded by a dict')
395-
396-
397-
def _check_list_iter(collection: Collection):
398-
if not isinstance(collection, list):
399-
raise InvalidIterationError('[] must be preceeded by a list')
400-
401-
402292
def put(obj: Collection, path: str, value: Any) -> None:
403293
"""set the value at the path
404294

datapath/types.py

Lines changed: 124 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,9 @@
1-
from typing import Any, NewType
2-
3-
4-
class _IterationPoint:
5-
def __str__(self) -> str:
6-
return 'ITERATION_POINT'
7-
8-
def __repr__(self) -> str:
9-
return 'ITERATION_POINT'
10-
11-
12-
class _NoDefault:
13-
def __str__(self) -> str:
14-
return 'NO_DEFAULT'
15-
16-
def __repr__(self) -> str:
17-
return 'NO_DEFAULT'
1+
import sys
2+
from typing import Any, NewType, Generator
183

4+
import regex as re
195

206
Key = NewType('Key', int|str)
21-
SplitPath = NewType('SplitPath', tuple[Key|_IterationPoint|range, ...])
227
Map = NewType('Map', dict[str, Any])
238
Collection = NewType('Collection', list|Map)
249
CollectionKey = NewType('CollectionKey', tuple[list, int]|tuple[Map, str])
@@ -28,8 +13,128 @@ def __repr__(self) -> str:
2813
PathDict = NewType('PathDict', dict[str, Any])
2914
RootPathDict = NewType('RootPathDict', dict[str, Collection])
3015

16+
17+
class _IterationPoint:
18+
name: str = '<none>'
19+
20+
def __init__(self, path_part: str):
21+
self.path_part = path_part
22+
23+
def __str__(self) -> str:
24+
return repr(self.path_part)
25+
26+
def check(self, collection: Collection) -> None:
27+
raise NotImplementedError()
28+
29+
def iter(self, collection: Collection) -> Generator[tuple[Key, Any], None, None]:
30+
raise NotImplementedError()
31+
32+
def append_path(self, base_path: str) -> str:
33+
raise NotImplementedError()
34+
35+
36+
class _StarIterationPoint(_IterationPoint):
37+
name: str = '*-key'
38+
39+
def __init__(self, path_part: str):
40+
_IterationPoint.__init__(self, path_part)
41+
if path_part == '*':
42+
self._re = None
43+
else:
44+
substrings = map(re.escape, path_part.split('*'))
45+
self._re = re.compile('^' + '.*?'.join(substrings) + '$')
46+
47+
def check(self, collection: Collection) -> None:
48+
if not isinstance(collection, dict):
49+
raise InvalidIterationError('*-keys must be preceeded by a dict')
50+
51+
def _match(self, key: str) -> bool:
52+
if self._re:
53+
return bool(self._re.match(key))
54+
else:
55+
return True
56+
57+
def iter(self, collection: Collection) -> Generator[tuple[Key, Any], None, None]:
58+
for key, value in collection.items():
59+
if not self._match(key):
60+
continue
61+
yield key, value
62+
63+
def append_path(self, base_path: str) -> str:
64+
if base_path:
65+
return f'{base_path}.{self.path_part}'
66+
else:
67+
return self.path_part
68+
69+
70+
class _BaseListIterationPoint(_IterationPoint):
71+
def check(self, collection: Collection) -> None:
72+
if not isinstance(collection, list):
73+
raise InvalidIterationError('[] must be preceeded by a list')
74+
75+
def append_path(self, base_path: str) -> str:
76+
if base_path:
77+
return f'{base_path}{self.path_part}'
78+
else:
79+
return self.path_part
80+
81+
82+
class _ListIterationPoint(_BaseListIterationPoint):
83+
name: str = 'empty square brackets'
84+
85+
def iter(self, collection: Collection) -> Generator[tuple[Key, Any], None, None]:
86+
yield from enumerate(collection)
87+
88+
89+
class _RangeIterationPoint(_BaseListIterationPoint):
90+
name: str = 'slice syntax'
91+
92+
def __init__(self, path_part: str):
93+
_IterationPoint.__init__(self, path_part)
94+
self._range = self._parse_slice(path_part)
95+
96+
@staticmethod
97+
def _parse_slice(path_part: str) -> range:
98+
parts = path_part.strip('[]').split(':')
99+
num_parts = len(parts)
100+
if num_parts == 2:
101+
start, stop = parts
102+
step = ''
103+
elif num_parts == 3:
104+
start, stop, step = parts
105+
else:
106+
raise ValueError(f'bug: unhandled number of delimiters ({num_parts-1}) in range syntax')
107+
if start:
108+
start = int(start)
109+
else:
110+
start = 0
111+
if stop:
112+
stop = int(stop)
113+
else:
114+
stop = sys.maxsize
115+
if step:
116+
step = int(step)
117+
else:
118+
step = 1
119+
return range(start, stop, step)
120+
121+
def iter(self, collection: Collection) -> Generator[tuple[Key, Any], None, None]:
122+
for index in self._range:
123+
try:
124+
yield index, collection[index]
125+
except IndexError:
126+
break
127+
128+
129+
SplitPath = NewType('SplitPath', tuple[Key|_IterationPoint, ...])
130+
131+
132+
class _NoDefault:
133+
def __repr__(self) -> str:
134+
return 'NO_DEFAULT'
135+
136+
31137
NO_DEFAULT = _NoDefault()
32-
ITERATION_POINT = _IterationPoint()
33138

34139

35140
class DatapathError(Exception):

0 commit comments

Comments
 (0)