Skip to content

Commit 1912367

Browse files
bbengfortlooselycoupled
authored andcommitted
Annotations JSON (#60)
1 parent e136360 commit 1912367

6 files changed

Lines changed: 194 additions & 53 deletions

File tree

btrdb/stream.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
from copy import deepcopy
2222
from collections.abc import Sequence
2323

24+
from btrdb.utils.buffer import PointBuffer
2425
from btrdb.point import RawPoint, StatPoint
2526
from btrdb.transformers import StreamSetTransformer
26-
from btrdb.utils.buffer import PointBuffer
27-
from btrdb.utils.timez import currently_as_ns, to_nanoseconds
28-
from btrdb.utils.conversion import AnnotationEncoder
2927
from btrdb.exceptions import BTrDBError, InvalidOperation
28+
from btrdb.utils.timez import currently_as_ns, to_nanoseconds
29+
from btrdb.utils.conversion import AnnotationEncoder, AnnotationDecoder
3030

3131

3232
##########################################################################
@@ -39,7 +39,7 @@
3939

4040
try:
4141
RE_PATTERN = re._pattern_type
42-
except:
42+
except Exception:
4343
RE_PATTERN = re.Pattern
4444

4545

@@ -74,29 +74,23 @@ def __init__(self, btrdb, uuid, **db_values):
7474
self._btrdb = btrdb
7575
self._uuid = uuid
7676

77-
7877
def refresh_metadata(self):
7978
"""
8079
Refreshes the locally cached meta data for a stream
8180
8281
Queries the BTrDB server for all stream metadata including collection,
8382
annotation, and tags. This method requires a round trip to the server.
84-
8583
"""
8684

8785
ep = self._btrdb.ep
8886
self._collection, self._property_version, self._tags, self._annotations, _ = ep.streamInfo(self._uuid, False, True)
8987
self._known_to_exist = True
9088

9189
# deserialize annoation values
92-
parts = []
93-
for k, v in self._annotations.items():
94-
try:
95-
parts.append([k, json.loads(v)])
96-
except json.decoder.JSONDecodeError:
97-
parts.append([k, v])
98-
99-
self._annotations = dict(parts)
90+
self._annotations = {
91+
key: json.loads(val, cls=AnnotationDecoder)
92+
for key, val in self._annotations.items()
93+
}
10094

10195
def exists(self):
10296
"""
@@ -432,9 +426,14 @@ def _update_tags_collection(self, tags, collection):
432426
)
433427

434428
def _update_annotations(self, annotations, encoder):
435-
serialized = dict(
436-
[[k, json.dumps(v, cls=encoder)] for k, v in annotations.items()]
437-
)
429+
# make a copy of the annotations to prevent accidental mutable object mutation
430+
serialized = deepcopy(annotations)
431+
if encoder is not None:
432+
serialized = {
433+
k: json.dumps(v, cls=encoder, indent=None, allow_nan=True)
434+
for k, v in serialized.items()
435+
}
436+
438437
self._btrdb.ep.setStreamAnnotations(
439438
uu=self.uuid,
440439
expected=self._property_version,
@@ -453,8 +452,9 @@ def update(self, tags=None, annotations=None, collection=None, encoder=Annotatio
453452
dict of annotation information for the stream.
454453
collection: str
455454
The collection prefix for a stream
456-
encoder: json.JSONEncoder
457-
JSON encoder to class to use for annotation serializations
455+
encoder: json.JSONEncoder or None
456+
JSON encoder to class to use for annotation serializations, set to
457+
None to prevent JSON encoding of the annotations.
458458
459459
Returns
460460
-------

btrdb/utils/conversion.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,18 @@
1919

2020
import uuid
2121
import json
22+
import pytz
23+
2224
from datetime import datetime
2325

26+
try:
27+
import numpy as np
28+
except ImportError:
29+
np = None
30+
31+
32+
RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
33+
2434

2535
##########################################################################
2636
## Classes
@@ -30,23 +40,44 @@ class AnnotationEncoder(json.JSONEncoder):
3040
"""Default JSON encoder class for saving stream annotations"""
3141

3242
def default(self, obj):
33-
RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
43+
"""Handle complex and user-specific types"""
44+
# handle UUID objects
45+
if isinstance(obj, uuid.UUID):
46+
return str(obj)
3447

3548
# handle Python datetime
49+
# TODO: better handling for timezone naive datetimes
3650
if isinstance(obj, datetime):
3751
return obj.strftime(RFC3339)
3852

3953
# handle numpy datetime64
40-
try:
41-
import numpy as np
42-
if isinstance(obj, np.datetime64):
43-
return obj.astype(datetime).strftime(RFC3339)
44-
except ImportError:
45-
pass
54+
if np is not None and isinstance(obj, np.datetime64):
55+
# We assume that np.datetime64 is UTC timezone because the datetime
56+
# will always be timezone naive -- this is kind of shitty
57+
# https://numpy.org/devdocs/reference/arrays.datetime.html#changes-with-numpy-1-11
58+
return pytz.utc.localize(obj.astype(datetime)).strftime(RFC3339)
4659

4760
# Let the base class default method raise the TypeError
4861
return json.JSONEncoder.default(self, obj)
4962

63+
def encode(self, obj):
64+
"""Do not serialize simple string values with quotes"""
65+
serialized = super(AnnotationEncoder, self).encode(obj)
66+
if serialized.startswith('"') and serialized.endswith('"'):
67+
serialized = serialized.strip('"')
68+
return serialized
69+
70+
71+
class AnnotationDecoder(json.JSONDecoder):
72+
"""Default JSON decoder class for deserializing stream annotations"""
73+
74+
def decode(self, s):
75+
"""Do not raise JSONDecodeError, just return the raw string"""
76+
try:
77+
return super(AnnotationDecoder, self).decode(s)
78+
except json.JSONDecodeError:
79+
return s
80+
5081

5182
##########################################################################
5283
## Functions

btrdb/utils/timez.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@
3030
##########################################################################
3131

3232
DATETIME_FORMATS = (
33-
"%Y-%m-%d %H:%M:%S.%f%z", # most common RFC3339 nanoseconds
34-
"%Y-%m-%d %H:%M:%S.%f", # expects UTC default timezone
35-
"%Y-%m-%dT%H:%M:%S.%fZ", # JSON encoding, UTC timezone
36-
"%Y-%m-%dT%H:%M:%SZ", # JSON encoding, UTC timezone
37-
"%Y-%m-%dT%H:%M:%S.%f%z", # less common JSON-ish encoding
38-
"%Y-%m-%dT%H:%M:%S.%f", # for completeness, UTC default timezone
39-
"%Y-%m-%d %H:%M:%S%z", # human readable date time with TZ
40-
"%Y-%m-%d %H:%M:%S", # human readable date time UTC default
33+
"%Y-%m-%d %H:%M:%S.%f%z", # most common RFC3339 nanoseconds
34+
"%Y-%m-%d %H:%M:%S.%f", # expects UTC default timezone
35+
"%Y-%m-%dT%H:%M:%S.%fZ", # JSON encoding, UTC timezone
36+
"%Y-%m-%dT%H:%M:%SZ", # JSON encoding, UTC timezone
37+
"%Y-%m-%dT%H:%M:%S.%f%z", # less common JSON-ish encoding
38+
"%Y-%m-%dT%H:%M:%S.%f", # for completeness, UTC default timezone
39+
"%Y-%m-%d %H:%M:%S%z", # human readable date time with TZ
40+
"%Y-%m-%d %H:%M:%S", # human readable date time UTC default
41+
"%Y-%m-%d", # helper to get midnight on a particular date
4142
)
4243

44+
4345
##########################################################################
4446
## Functions
4547
##########################################################################
@@ -92,6 +94,7 @@ def datetime_to_ns(dt):
9294
dt_utc = aware.astimezone(pytz.utc)
9395
return int(dt_utc.timestamp() * 1e9)
9496

97+
9598
def to_nanoseconds(val):
9699
"""
97100
Converts datetime, datetime64, float, str (RFC 2822) to nanoseconds. If a
@@ -129,6 +132,8 @@ def to_nanoseconds(val):
129132
+--------------------------------+------------------------------------------+
130133
| %Y-%m-%d %H:%M:%S | human readable date time UTC default |
131134
+--------------------------------+------------------------------------------+
135+
| %Y-%m-%d | midnight at a particular date |
136+
+--------------------------------+------------------------------------------+
132137
133138
"""
134139
if val is None or isinstance(val, int):

tests/btrdb/test_stream.py

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
import json
2121
import uuid
2222
import pytz
23-
import datetime
2423
import pytest
24+
import datetime
2525
from unittest.mock import Mock, PropertyMock, patch, call
2626

2727
from btrdb.conn import BTrDB
@@ -34,6 +34,7 @@
3434

3535
RawPointProto = btrdb_pb2.RawPoint
3636
StatPointProto = btrdb_pb2.StatPoint
37+
EST = pytz.timezone('America/New_York')
3738

3839

3940
##########################################################################
@@ -116,8 +117,31 @@ def test_refresh_metadata_deserializes_annotations(self):
116117
Assert refresh_metadata deserializes annotation values
117118
"""
118119
uu = uuid.uuid4()
119-
serialized = {"parent": '{"child": 42}', "sentence": "the quick brown fox"}
120-
expected = {"parent": {"child": 42}, "sentence": "the quick brown fox"}
120+
serialized = {
121+
'acronym': 'VPHM',
122+
'description': 'El Segundo PMU 42 Ean',
123+
'devacronym': 'PMU!EL_SEG_PMU_42',
124+
'enabled': 'true',
125+
'id': '76932ae4-09bc-472c-8dc6-64fea68d2797',
126+
'phase': 'A',
127+
'label': 'null',
128+
'frequency': '30',
129+
'control': '2019-11-07 13:21:23.000000-0500',
130+
"calibrate": '{"racf": 1.8, "pacf": 0.005}',
131+
}
132+
expected = {
133+
'acronym': 'VPHM',
134+
'description': 'El Segundo PMU 42 Ean',
135+
'devacronym': 'PMU!EL_SEG_PMU_42',
136+
'enabled': True,
137+
'id': '76932ae4-09bc-472c-8dc6-64fea68d2797',
138+
'phase': 'A',
139+
'label': None,
140+
'frequency': 30,
141+
'control': '2019-11-07 13:21:23.000000-0500',
142+
"calibrate": {"racf": 1.8, "pacf": 0.005},
143+
}
144+
121145
endpoint = Mock(Endpoint)
122146
endpoint.streamInfo = Mock(return_value=("koala", 42, {}, serialized, None))
123147
stream = Stream(btrdb=BTrDB(endpoint), uuid=uu)
@@ -221,19 +245,43 @@ def test_update_annotations(self):
221245
endpoint = Mock(Endpoint)
222246
endpoint.streamInfo = Mock(return_value=("koala", 42, {}, {}, None))
223247
stream = Stream(btrdb=BTrDB(endpoint), uuid=uu)
224-
annotations = {"owner": "rabbit"}
248+
249+
# Test realistic annotations with multiple types
250+
annotations = {
251+
"acronym": "VPHM",
252+
"description": "El Segundo PMU 42 Ean",
253+
"devacronym": "PMU!EL_SEG_PMU_42",
254+
"enabled": True,
255+
"id": uuid.UUID('76932ae4-09bc-472c-8dc6-64fea68d2797'),
256+
"phase": "A",
257+
"label": None,
258+
"frequency": 30,
259+
"control": EST.localize(datetime.datetime(2019, 11, 7, 13, 21, 23)),
260+
"calibrate": {"racf": 1.8, "pacf": 0.005},
261+
}
225262

226263
stream.refresh_metadata()
227264
stream.update(annotations=annotations)
228265
stream._btrdb.ep.setStreamAnnotations.assert_called_once_with(
229266
uu=uu,
230267
expected=42,
231-
changes={"owner": '"rabbit"'}
268+
changes={
269+
'acronym': 'VPHM',
270+
'description': 'El Segundo PMU 42 Ean',
271+
'devacronym': 'PMU!EL_SEG_PMU_42',
272+
'enabled': 'true',
273+
'id': '76932ae4-09bc-472c-8dc6-64fea68d2797',
274+
'phase': 'A',
275+
'label': 'null',
276+
'frequency': '30',
277+
'control': '2019-11-07 13:21:23.000000-0500',
278+
"calibrate": '{"racf": 1.8, "pacf": 0.005}',
279+
}
232280
)
233281
stream._btrdb.ep.setStreamTags.assert_not_called()
234282

235283

236-
def test_nested_conversions(self):
284+
def test_update_annotations_nested_conversions(self):
237285
"""
238286
Assert update correctly encodes nested annotation data
239287
"""
@@ -278,7 +326,24 @@ def test_nested_conversions(self):
278326
}
279327
)
280328

329+
def test_update_annotations_no_encoder(self):
330+
uu = uuid.UUID('0d22a53b-e2ef-4e0a-ab89-b2d48fb2592a')
331+
endpoint = Mock(Endpoint)
332+
endpoint.streamInfo = Mock(return_value=("koala", 42, {}, {}, None))
333+
stream = Stream(btrdb=BTrDB(endpoint), uuid=uu)
334+
335+
annotations = {"foo": "this is a string", "bar": "3.14"}
336+
337+
stream.refresh_metadata()
338+
stream.update(annotations=annotations, encoder=None)
339+
stream._btrdb.ep.setStreamAnnotations.assert_called_once_with(
340+
uu=uu,
341+
expected=42,
342+
changes=annotations,
343+
)
281344

345+
# TODO: mock json.dumps
346+
# assert mock_dumps.assert_not_called()
282347

283348
##########################################################################
284349
## exists tests

0 commit comments

Comments
 (0)