Skip to content

Commit 0e5061e

Browse files
Merge branch 'release-v5.5.3'
2 parents c4a8c72 + 2657527 commit 0e5061e

14 files changed

Lines changed: 484 additions & 63 deletions

File tree

btrdb/stream.py

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
from copy import deepcopy
2222
from collections.abc import Sequence
2323

24+
from btrdb.utils.buffer import PointBuffer
2425
from btrdb.point import RawPoint, StatPoint
2526
from btrdb.transformers import StreamSetTransformer
26-
from btrdb.utils.buffer import PointBuffer
27-
from btrdb.utils.timez import currently_as_ns, to_nanoseconds
28-
from btrdb.utils.conversion import AnnotationEncoder
2927
from btrdb.exceptions import BTrDBError, InvalidOperation
28+
from btrdb.utils.timez import currently_as_ns, to_nanoseconds
29+
from btrdb.utils.conversion import AnnotationEncoder, AnnotationDecoder
3030

3131

3232
##########################################################################
@@ -39,7 +39,7 @@
3939

4040
try:
4141
RE_PATTERN = re._pattern_type
42-
except:
42+
except Exception:
4343
RE_PATTERN = re.Pattern
4444

4545

@@ -74,29 +74,23 @@ def __init__(self, btrdb, uuid, **db_values):
7474
self._btrdb = btrdb
7575
self._uuid = uuid
7676

77-
7877
def refresh_metadata(self):
7978
"""
8079
Refreshes the locally cached meta data for a stream
8180
8281
Queries the BTrDB server for all stream metadata including collection,
8382
annotation, and tags. This method requires a round trip to the server.
84-
8583
"""
8684

8785
ep = self._btrdb.ep
8886
self._collection, self._property_version, self._tags, self._annotations, _ = ep.streamInfo(self._uuid, False, True)
8987
self._known_to_exist = True
9088

9189
# deserialize annoation values
92-
parts = []
93-
for k, v in self._annotations.items():
94-
try:
95-
parts.append([k, json.loads(v)])
96-
except json.decoder.JSONDecodeError:
97-
parts.append([k, v])
98-
99-
self._annotations = dict(parts)
90+
self._annotations = {
91+
key: json.loads(val, cls=AnnotationDecoder)
92+
for key, val in self._annotations.items()
93+
}
10094

10195
def exists(self):
10296
"""
@@ -128,6 +122,42 @@ def exists(self):
128122
return False
129123
raise bte
130124

125+
def count(self, start=MINIMUM_TIME, end=MAXIMUM_TIME, pointwidth=62, version=0):
126+
"""
127+
Compute the total number of points in the stream
128+
129+
Counts the number of points in the specified window and version. By
130+
default returns the latest total count of points in the stream. This
131+
helper method sums the counts of all StatPoints returned by
132+
``aligned_windows``. Because of this, note that the start and end
133+
timestamps may be adjusted if they are not powers of 2. For smaller
134+
windows of time, you may also need to adjust the pointwidth to ensure
135+
that the count granularity is captured appropriately.
136+
137+
Parameters
138+
----------
139+
start : int or datetime like object, default: MINIMUM_TIME
140+
The start time in nanoseconds for the range to be queried. (see
141+
:func:`btrdb.utils.timez.to_nanoseconds` for valid input types)
142+
143+
end : int or datetime like object, default: MAXIMUM_TIME
144+
The end time in nanoseconds for the range to be queried. (see
145+
:func:`btrdb.utils.timez.to_nanoseconds` for valid input types)
146+
147+
pointwidth : int, default: 62
148+
Specify the number of ns between data points (2**pointwidth)
149+
150+
version : int, default: 0
151+
Version of the stream to query
152+
153+
Returns
154+
-------
155+
int
156+
The total number of points in the stream for the specified window.
157+
"""
158+
points = self.aligned_windows(start, end, pointwidth, version)
159+
return sum([point.count for point, _ in points])
160+
131161
@property
132162
def btrdb(self):
133163
"""
@@ -396,9 +426,14 @@ def _update_tags_collection(self, tags, collection):
396426
)
397427

398428
def _update_annotations(self, annotations, encoder):
399-
serialized = dict(
400-
[[k, json.dumps(v, cls=encoder)] for k, v in annotations.items()]
401-
)
429+
# make a copy of the annotations to prevent accidental mutable object mutation
430+
serialized = deepcopy(annotations)
431+
if encoder is not None:
432+
serialized = {
433+
k: json.dumps(v, cls=encoder, indent=None, allow_nan=True)
434+
for k, v in serialized.items()
435+
}
436+
402437
self._btrdb.ep.setStreamAnnotations(
403438
uu=self.uuid,
404439
expected=self._property_version,
@@ -417,8 +452,9 @@ def update(self, tags=None, annotations=None, collection=None, encoder=Annotatio
417452
dict of annotation information for the stream.
418453
collection: str
419454
The collection prefix for a stream
420-
encoder: json.JSONEncoder
421-
JSON encoder to class to use for annotation serializations
455+
encoder: json.JSONEncoder or None
456+
JSON encoder to class to use for annotation serializations, set to
457+
None to prevent JSON encoding of the annotations.
422458
423459
Returns
424460
-------
@@ -772,6 +808,44 @@ def versions(self):
772808
"""
773809
return self._pinned_versions if self._pinned_versions else self._latest_versions()
774810

811+
def count(self):
812+
"""
813+
Compute the total number of points in the streams using filters.
814+
815+
Computes the total number of points across all streams using the
816+
specified filters. By default, this returns the latest total count of
817+
all points in the streams. The count is modified by start and end
818+
filters or by pinning versions.
819+
820+
Note that this helper method sums the counts of all StatPoints returned
821+
by ``aligned_windows``. Because of this the start and end timestamps
822+
may be adjusted if they are not powers of 2. You can also set the
823+
pointwidth property for smaller windows of time to ensure that the
824+
count granularity is captured appropriately.
825+
826+
Parameters
827+
----------
828+
None
829+
830+
Returns
831+
-------
832+
int
833+
The total number of points in all streams for the specified filters.
834+
"""
835+
params = self._params_from_filters()
836+
start = params.get("start", MINIMUM_TIME)
837+
end = params.get("end", MAXIMUM_TIME)
838+
839+
pointwidth = self.pointwidth if self.pointwidth is not None else 62
840+
versions = self._pinned_versions if self._pinned_versions else {}
841+
842+
count = 0
843+
for s in self._streams:
844+
version = versions.get(s.uuid, 0)
845+
count += s.count(start, end, pointwidth, version)
846+
847+
return count
848+
775849
def earliest(self):
776850
"""
777851
Returns earliest points of data in streams using available filters.

btrdb/utils/conversion.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,18 @@
1919

2020
import uuid
2121
import json
22+
import pytz
23+
2224
from datetime import datetime
2325

26+
try:
27+
import numpy as np
28+
except ImportError:
29+
np = None
30+
31+
32+
RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
33+
2434

2535
##########################################################################
2636
## Classes
@@ -30,23 +40,44 @@ class AnnotationEncoder(json.JSONEncoder):
3040
"""Default JSON encoder class for saving stream annotations"""
3141

3242
def default(self, obj):
33-
RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
43+
"""Handle complex and user-specific types"""
44+
# handle UUID objects
45+
if isinstance(obj, uuid.UUID):
46+
return str(obj)
3447

3548
# handle Python datetime
49+
# TODO: better handling for timezone naive datetimes
3650
if isinstance(obj, datetime):
3751
return obj.strftime(RFC3339)
3852

3953
# handle numpy datetime64
40-
try:
41-
import numpy as np
42-
if isinstance(obj, np.datetime64):
43-
return obj.astype(datetime).strftime(RFC3339)
44-
except ImportError:
45-
pass
54+
if np is not None and isinstance(obj, np.datetime64):
55+
# We assume that np.datetime64 is UTC timezone because the datetime
56+
# will always be timezone naive -- this is kind of shitty
57+
# https://numpy.org/devdocs/reference/arrays.datetime.html#changes-with-numpy-1-11
58+
return pytz.utc.localize(obj.astype(datetime)).strftime(RFC3339)
4659

4760
# Let the base class default method raise the TypeError
4861
return json.JSONEncoder.default(self, obj)
4962

63+
def encode(self, obj):
64+
"""Do not serialize simple string values with quotes"""
65+
serialized = super(AnnotationEncoder, self).encode(obj)
66+
if serialized.startswith('"') and serialized.endswith('"'):
67+
serialized = serialized.strip('"')
68+
return serialized
69+
70+
71+
class AnnotationDecoder(json.JSONDecoder):
72+
"""Default JSON decoder class for deserializing stream annotations"""
73+
74+
def decode(self, s):
75+
"""Do not raise JSONDecodeError, just return the raw string"""
76+
try:
77+
return super(AnnotationDecoder, self).decode(s)
78+
except json.JSONDecodeError:
79+
return s
80+
5081

5182
##########################################################################
5283
## Functions

btrdb/utils/timez.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@
3030
##########################################################################
3131

3232
DATETIME_FORMATS = (
33-
"%Y-%m-%d %H:%M:%S.%f%z", # most common RFC3339 nanoseconds
34-
"%Y-%m-%d %H:%M:%S.%f", # expects UTC default timezone
35-
"%Y-%m-%dT%H:%M:%S.%fZ", # JSON encoding, UTC timezone
36-
"%Y-%m-%dT%H:%M:%SZ", # JSON encoding, UTC timezone
37-
"%Y-%m-%dT%H:%M:%S.%f%z", # less common JSON-ish encoding
38-
"%Y-%m-%dT%H:%M:%S.%f", # for completeness, UTC default timezone
39-
"%Y-%m-%d %H:%M:%S%z", # human readable date time with TZ
40-
"%Y-%m-%d %H:%M:%S", # human readable date time UTC default
33+
"%Y-%m-%d %H:%M:%S.%f%z", # most common RFC3339 nanoseconds
34+
"%Y-%m-%d %H:%M:%S.%f", # expects UTC default timezone
35+
"%Y-%m-%dT%H:%M:%S.%fZ", # JSON encoding, UTC timezone
36+
"%Y-%m-%dT%H:%M:%SZ", # JSON encoding, UTC timezone
37+
"%Y-%m-%dT%H:%M:%S.%f%z", # less common JSON-ish encoding
38+
"%Y-%m-%dT%H:%M:%S.%f", # for completeness, UTC default timezone
39+
"%Y-%m-%d %H:%M:%S%z", # human readable date time with TZ
40+
"%Y-%m-%d %H:%M:%S", # human readable date time UTC default
41+
"%Y-%m-%d", # helper to get midnight on a particular date
4142
)
4243

44+
4345
##########################################################################
4446
## Functions
4547
##########################################################################
@@ -92,6 +94,7 @@ def datetime_to_ns(dt):
9294
dt_utc = aware.astimezone(pytz.utc)
9395
return int(dt_utc.timestamp() * 1e9)
9496

97+
9598
def to_nanoseconds(val):
9699
"""
97100
Converts datetime, datetime64, float, str (RFC 2822) to nanoseconds. If a
@@ -129,6 +132,8 @@ def to_nanoseconds(val):
129132
+--------------------------------+------------------------------------------+
130133
| %Y-%m-%d %H:%M:%S | human readable date time UTC default |
131134
+--------------------------------+------------------------------------------+
135+
| %Y-%m-%d | midnight at a particular date |
136+
+--------------------------------+------------------------------------------+
132137
133138
"""
134139
if val is None or isinstance(val, int):

btrdb/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
__version_info__ = {
1919
'major': 5,
2020
'minor': 5,
21-
'micro': 2,
21+
'micro': 3,
2222
'releaselevel': 'final',
23-
'serial': 10,
23+
'serial': 11,
2424
}
2525

2626
##########################################################################

docs/source/working/images/multiprocessing_architecture.png renamed to docs/source/_static/figures/multiprocessing_architecture.png

File renamed without changes.
5.58 MB
Loading

docs/source/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,5 @@
209209

210210
# If true, `todo` and `todoList` produce output, else they produce nothing.
211211
todo_include_todos = True
212+
213+
numfig = True

0 commit comments

Comments
 (0)