Skip to content

Commit f6025e4

Browse files
committed
add format() and format_iterate()
1 parent f6f3a25 commit f6025e4

9 files changed

Lines changed: 266 additions & 6 deletions

File tree

REFERENCE.md

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,19 @@ obtain the value at the path
3030
* if default is passed, return it if the leaf value was not found
3131
* if default is not passed and the leaf value is not found, propagate the LookupError
3232

33+
### function `format()`
34+
35+
```
36+
format(obj: datapath.types.Collection, format_string: str) -> str
37+
```
38+
39+
Given a standard Python format string with {} notation, interpret the identifiers
40+
as a datapath within `obj`, and apply standard formatting language to the result.
41+
3342
### function `iterate()`
3443

3544
```
36-
iterate(obj: datapath.types.Collection, path: str, default: Any = NO_DEFAULT) -> Generator[tuple[str, Any], NoneType, NoneType]
45+
iterate(obj: datapath.types.Collection, path: str, default: Any = NO_DEFAULT) -> Generator[datapath._base.iterate_result[str, Any], NoneType, NoneType]
3746
```
3847

3948
yield entries from a collection using an iterable path -- that is, one containing one or more
@@ -60,6 +69,65 @@ Examples:
6069
* `test1.*test*` # "test1" in a root dict must be a dict, yield each key that contains "test"
6170
* `test1[].*` # combining dict and list iteration works
6271

72+
### function `format_iterate()`
73+
74+
```
75+
format_iterate(obj: datapath.types.Collection, format_string: str, default: Any = NO_DEFAULT, iter_func: Callable = <class 'zip'>) -> Generator[str, NoneType, NoneType]
76+
```
77+
78+
Given a standard Python format string with {} notation, interpret the identifiers as iterable datapaths within `obj`.
79+
One value will be consumed from each iterable path and formatted using the standard language.
80+
81+
`default` is passed through to all `iterate()` calls, which in turn passes it through to the leaf `get()` calls.
82+
There is no way to use a different default value for different iterable datapaths in replacement fields.
83+
84+
By default, the values from the iterators will be obtained with the
85+
[`zip()` builtin](https://docs.python.org/3/library/functions.html#zip) with `strict=False`, meaning if the different
86+
iterable format strings produce a differnt number of results, iteration will stop when the shortest one stops, and
87+
the values will all correspond to the same index from each `iterate()` result.
88+
89+
Example:
90+
91+
```
92+
>>> test_obj = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'a': 5, 'b': 6}]
93+
>>> for text in format_iterate(test_obj, 'a {[].a} b {[].b}'):
94+
... print(text)
95+
...
96+
a 1 b 2
97+
a 3 b 4
98+
a 5 b 6
99+
100+
```
101+
102+
If different behavior is desired, a different function can be passed:
103+
104+
`iter_func` must have approximately the same basic signature as `builtins.zip()`,
105+
[`itertools.product()`](https://docs.python.org/3/library/itertools.html#itertools.product),
106+
and [`itertools.zip_longest()`](https://docs.python.org/3/library/itertools.html#itertools.zip_longest).
107+
108+
More specifically, it must accept an arbitrary number of Iterables (specifically the Generator
109+
returned by `datapath.iterate()`), and yield a Sequence with a value from each one in order when the return
110+
value is iterated.
111+
112+
You can supply extra keyword arguments to any function with this signature by utilizing
113+
[`functools.partial()`](https://docs.python.org/3/library/functools.html#functools.partial). Passing positional
114+
arguments to a partial will probably not work as expected, and is not recommended.
115+
116+
Example with a partial and `itertools.zip_longest()`:
117+
118+
```
119+
>>> import functools, itertools
120+
>>> test_obj = {'a': list('123'), 'b': list('4567')}
121+
>>> for text in format_iterate(test_obj, 'a {a[]} b {b[]}',
122+
... iter_func=functools.partial(itertools.zip_longest, fillvalue='X')):
123+
... print(text)
124+
a 1 b 4
125+
a 2 b 5
126+
a 3 b 6
127+
a X b 7
128+
129+
```
130+
63131
### function `put()`
64132

65133
```
@@ -144,6 +212,7 @@ Example:
144212
```
145213
>>> join(['a', 'b', 5])
146214
'a.b[5]'
215+
147216
```
148217

149218
### function `leaf()`

build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ python3 -m unittest -v
1515
pylint -E datapath setup.py docs.py
1616
python3 docs.py
1717

18+
rm -rf dist/*
19+
1820
python3 setup.py sdist
1921
python3 setup.py bdist_wheel
2022

datapath/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
complete_collection,
2020
UnfoldProcessor,
2121
)
22+
from .format import format, format_iterate
2223
from .types import (
2324
DatapathError,
2425
ValidationError,
@@ -28,7 +29,9 @@
2829

2930
__all__ = [
3031
'get',
32+
'format',
3133
'iterate',
34+
'format_iterate',
3235
'put',
3336
'delete',
3437
'discard',

datapath/_base.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def join(split_path: Iterable[Key]) -> str:
123123
```
124124
>>> join(['a', 'b', 5])
125125
'a.b[5]'
126+
126127
```
127128
"""
128129
path = ''
@@ -187,9 +188,13 @@ def _get(obj: Collection, split_path: SplitPath, default: Any = NO_DEFAULT) -> A
187188
return default
188189

189190

191+
class iterate_result(tuple):
192+
pass
193+
194+
190195
def iterate(obj: Collection,
191196
path: str,
192-
default: Any = NO_DEFAULT) -> Generator[tuple[str, Any], None, None]:
197+
default: Any = NO_DEFAULT) -> Generator[iterate_result[str, Any], None, None]:
193198
"""
194199
yield entries from a collection using an iterable path -- that is, one containing one or more
195200
sets of empty square brackets (`[]`) or a key with a `*` (`*`/`wild*cards*`/etc.)
@@ -222,7 +227,7 @@ def iterate(obj: Collection,
222227
def _iterate(obj: Collection,
223228
split_path: SplitPath,
224229
base_path: SplitPath,
225-
default: Any) -> Generator[tuple[str, Any], None, None]:
230+
default: Any) -> Generator[iterate_result[str, Any], None, None]:
226231
"""recursive core of iterate()"""
227232
if not isinstance(obj, _collection_types):
228233
raise ValidationError(f'{join(base_path + split_path)}: must be list/dict')
@@ -238,7 +243,7 @@ def _iterate(obj: Collection,
238243

239244
if iter_index is None:
240245
# no iteration points found, just need to get()
241-
yield join(base_path + split_path), _get(obj, split_path, default)
246+
yield iterate_result((join(base_path + split_path), _get(obj, split_path, default)))
242247
return
243248

244249
# find the collection referred to by the portion of the path before the first iteration point
@@ -264,7 +269,7 @@ def _iterate(obj: Collection,
264269
yield from _iterate(element, after_split_path, key_split_path, default)
265270
else:
266271
# if there is no path after, then this element is what we're after
267-
yield join(key_split_path), element
272+
yield iterate_result((join(key_split_path), element))
268273

269274

270275
def put(obj: Collection, path: str, value: Any) -> None:

datapath/format.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import builtins
2+
import string
3+
from typing import Any, Callable, Generator
4+
5+
from ._base import get, iterate, iterate_result
6+
from .types import Collection, NO_DEFAULT
7+
8+
9+
class _Format(string.Formatter):
10+
"""implements the `format()` function"""
11+
12+
def __init__(self, obj: Collection):
13+
string.Formatter.__init__(self)
14+
self._datapath_obj = obj
15+
16+
def get_field(self, field_name, args, kwargs):
17+
return get(self._datapath_obj, field_name), None
18+
19+
20+
def format(obj: Collection, format_string: str) -> str:
21+
"""
22+
Given a standard Python format string with {} notation, interpret the identifiers
23+
as a datapath within `obj`, and apply standard formatting language to the result.
24+
"""
25+
return _Format(obj).format(format_string)
26+
27+
28+
def _do_format(value: Any, format_spec: str, conversion: str) -> str:
29+
"""do the standard !r / !s / !a format string conversions, followed by builtins.format"""
30+
if not conversion:
31+
pass
32+
elif conversion == 'r':
33+
value = repr(value)
34+
elif conversion == 's':
35+
value = str(value)
36+
elif conversion == 'a':
37+
value = ascii(value)
38+
else:
39+
raise ValueError(f'unhandled conversion flag {conversion!r}')
40+
return builtins.format(value, format_spec)
41+
42+
43+
def format_iterate(obj: Collection,
44+
format_string: str,
45+
default: Any = NO_DEFAULT,
46+
iter_func: Callable = zip) -> Generator[str, None, None]:
47+
"""
48+
Given a standard Python format string with {} notation, interpret the identifiers as iterable datapaths within `obj`.
49+
One value will be consumed from each iterable path and formatted using the standard language.
50+
51+
`default` is passed through to all `iterate()` calls, which in turn passes it through to the leaf `get()` calls.
52+
There is no way to use a different default value for different iterable datapaths in replacement fields.
53+
54+
By default, the values from the iterators will be obtained with the
55+
[`zip()` builtin](https://docs.python.org/3/library/functions.html#zip) with `strict=False`, meaning if the different
56+
iterable format strings produce a differnt number of results, iteration will stop when the shortest one stops, and
57+
the values will all correspond to the same index from each `iterate()` result.
58+
59+
Example:
60+
61+
```
62+
>>> test_obj = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'a': 5, 'b': 6}]
63+
>>> for text in format_iterate(test_obj, 'a {[].a} b {[].b}'):
64+
... print(text)
65+
...
66+
a 1 b 2
67+
a 3 b 4
68+
a 5 b 6
69+
70+
```
71+
72+
If different behavior is desired, a different function can be passed:
73+
74+
`iter_func` must have approximately the same basic signature as `builtins.zip()`,
75+
[`itertools.product()`](https://docs.python.org/3/library/itertools.html#itertools.product),
76+
and [`itertools.zip_longest()`](https://docs.python.org/3/library/itertools.html#itertools.zip_longest).
77+
78+
More specifically, it must accept an arbitrary number of Iterables (specifically the Generator
79+
returned by `datapath.iterate()`), and yield a Sequence with a value from each one in order when the return
80+
value is iterated.
81+
82+
You can supply extra keyword arguments to any function with this signature by utilizing
83+
[`functools.partial()`](https://docs.python.org/3/library/functools.html#functools.partial). Passing positional
84+
arguments to a partial will probably not work as expected, and is not recommended.
85+
86+
Example with a partial and `itertools.zip_longest()`:
87+
88+
```
89+
>>> import functools, itertools
90+
>>> test_obj = {'a': list('123'), 'b': list('4567')}
91+
>>> for text in format_iterate(test_obj, 'a {a[]} b {b[]}',
92+
... iter_func=functools.partial(itertools.zip_longest, fillvalue='X')):
93+
... print(text)
94+
a 1 b 4
95+
a 2 b 5
96+
a 3 b 6
97+
a X b 7
98+
99+
```
100+
"""
101+
iterators = []
102+
path_formats = []
103+
plain_format_string = ''
104+
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(format_string):
105+
plain_format_string += literal_text
106+
if not field_name:
107+
continue
108+
plain_format_string += '{}'
109+
iterators.append(iterate(obj, field_name, default))
110+
path_formats.append((format_spec, conversion))
111+
112+
for results in iter_func(*iterators):
113+
values = []
114+
for index, result in enumerate(results):
115+
if isinstance(result, iterate_result):
116+
_, value = result
117+
else:
118+
value = result
119+
format_spec, conversion = path_formats[index]
120+
values.append(_do_format(value, format_spec, conversion))
121+
yield plain_format_string.format(*values)

pre_deploy.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ set -xeo pipefail
55
rm -rf build dist *.egg-info
66

77
build_version='3.10'
8+
docker pull "python:$build_version"
89
docker run -it --rm -v "$PWD:/repo" -w /repo "python:$build_version" '/repo/build.sh'
910

1011
for version in '3.10' '3.11' '3.12'; do
12+
docker pull "python:$version"
1113
docker run -it --rm -v "$PWD/dist:/dist" -v "$PWD/test:/repo/test" -w /repo "python:$version" '/repo/test/docker_test.sh'
1214
done

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='python-datapath',
8-
version='0.1.2',
8+
version='0.2.0',
99

1010
author='Alex Shafer',
1111
author_email='ashafer@pm.me',

test/test_datapath.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import doctest
12
import unittest
23

34
import datapath
@@ -42,6 +43,11 @@
4243
)
4344

4445

46+
def load_tests(loader, tests, ignore):
47+
tests.addTests(doctest.DocTestSuite(datapath._base))
48+
return tests
49+
50+
4551
class TestDatapath(unittest.TestCase):
4652
def test_validate_path_valid_cases_iterable_false(self):
4753
for valid_path in valid_paths:

test/test_format.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import doctest
2+
import unittest
3+
from importlib import import_module
4+
5+
import datapath
6+
7+
8+
def load_tests(loader, tests, ignore):
9+
tests.addTests(doctest.DocTestSuite(import_module('datapath.format')))
10+
return tests
11+
12+
13+
class TestFormat(unittest.TestCase):
14+
def test_format_simple(self):
15+
format_strings = (
16+
('one {} two {} three', 2),
17+
('{} one', 1),
18+
('one {}', 1),
19+
('{}{}{}', 3),
20+
('{}', 1),
21+
)
22+
test_obj = {
23+
'a': list('123'),
24+
'b': [{'c': list('456')}, 7],
25+
}
26+
paths = (
27+
('{a[0]}', test_obj['a'][0]),
28+
('{b[1]}', test_obj['b'][1]),
29+
('{b[0].c[2]}', test_obj['b'][0]['c'][2]),
30+
)
31+
for index, (format_string, num_paths) in enumerate(format_strings):
32+
with self.subTest(msg=f'index {index}'):
33+
my_paths = (path[0] for path in paths[:num_paths])
34+
values = (path[1] for path in paths[:num_paths])
35+
real_format_string = format_string.format(*my_paths)
36+
expected = format_string.format(*values)
37+
actual = datapath.format(test_obj, real_format_string)
38+
self.assertEqual(expected, actual)
39+
40+
41+
class TestFormatIterate(unittest.TestCase):
42+
def test_format_iterate_no_literal(self):
43+
test_obj = {'a': list('1234')}
44+
expected = '1234'
45+
for index, value in enumerate(datapath.format_iterate(test_obj, '{a[]}')):
46+
self.assertEqual(value, expected[index])
47+
48+
def test_format_iterate_trailing_literal(self):
49+
test_obj = {'a': list('1234')}
50+
expected = '1234'
51+
for index, value in enumerate(datapath.format_iterate(test_obj, '{a[]} x')):
52+
self.assertEqual(value, expected[index] + ' x')

0 commit comments

Comments
 (0)