Skip to content

Commit 7b75d7f

Browse files
committed
Convert floats representing integers to int in sample set serialization
1 parent e5a8e3b commit 7b75d7f

4 files changed

Lines changed: 153 additions & 1 deletion

File tree

dimod/sampleset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1767,7 +1767,7 @@ def to_serializable(self, use_bytes=False, bytes_type=bytes,
17671767
:meth:`~.SampleSet.from_serializable`
17681768
17691769
"""
1770-
schema_version = "3.1.0"
1770+
schema_version = "3.2.0"
17711771

17721772
# developer note: numpy's record array stores the samples, energies,
17731773
# num_occ. etc as a struct array. If we dumped that array directly to

dimod/serialization/utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,47 @@
1313
# limitations under the License.
1414

1515
import collections.abc as abc
16+
1617
from numbers import Integral, Number
18+
from typing import List, Union
1719

1820
import numpy as np
1921

2022

23+
__all__ = ["serialize_ndarray", "deserialize_ndarray",
24+
"serialize_ndarrays", "deserialize_ndarrays",
25+
"pack_samples", "unpack_samples",
26+
]
27+
28+
29+
def _replace_float_with_int(arr: Union[List[float], List[List]]):
30+
"""Replace floats representing integers with ints in a list representing an array.
31+
32+
Take a list of floats, as produced by :meth:`numpy.ndarray.tolist` from an array
33+
of floating types, and convert any ``float`` representing an integer value into
34+
``int``.
35+
36+
This function assumes some uniformity of the list structure. For instance giving it
37+
a list like ``[0.0, 0]`` or ``[0.0, [0.0]`` will cause it to fail.
38+
39+
Acts on the list(s) in-place.
40+
"""
41+
if not len(arr):
42+
# nothing to do when the list is empty
43+
pass
44+
45+
elif isinstance(arr[0], List):
46+
for subarr in arr:
47+
_replace_float_with_int(subarr)
48+
49+
elif hasattr(arr[0], "is_integer"):
50+
arr[:] = (int(a) if a.is_integer() else a for a in arr)
51+
52+
else:
53+
raise ValueError("expected a (possibly nested) list of floats, "
54+
f"recieved a (possible nested) list of {type(arr[0])}")
55+
56+
2157
def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
2258
"""Serialize a NumPy array.
2359
@@ -43,6 +79,10 @@ def serialize_ndarray(arr, use_bytes=False, bytes_type=bytes):
4379
data = bytes_type(arr.tobytes(order='C'))
4480
else:
4581
data = arr.tolist()
82+
83+
if np.issubdtype(arr.dtype, np.floating):
84+
_replace_float_with_int(data)
85+
4686
return dict(type='array',
4787
data=data,
4888
data_type=arr.dtype.name,
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
features:
3+
- |
4+
Implement ``SampleSet`` serialization schema version 3.2.0.
5+
6+
Version 3.2.0 replaces ``float`` values that represent integers with ``int``
7+
in the ``"data"`` field of any arrays returned by ``SampleSet.to_serializable()``.
8+
In some pathological cases this can result in a much smaller representation
9+
when the data dictionaries are json-serialized by avoiding the redundant
10+
``.0`` appended to every value.
11+
12+
This is a backwards-compatible change.

tests/test_serialization_utils.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import numbers
1516
import unittest
1617

1718
import numpy as np
@@ -47,3 +48,102 @@ def test_functional_3x3triu(self):
4748
new = deserialize_ndarray(obj)
4849
np.testing.assert_array_equal(arr, new)
4950
self.assertEqual(arr.dtype, new.dtype)
51+
52+
def test_replacing_floats_with_ints(self):
53+
54+
floating_dtypes = [np.float16, np.float32, np.float64]
55+
56+
if int(np.__version__.split(".")[1]) >= 22:
57+
# Numpy<1.22.0 didn't support `is_integer()` on floating types
58+
# so float128 etc don't work out-of-the-box because `tolist()`
59+
# doesn't convert those to Python float.
60+
floating_dtypes.append(np.longdouble)
61+
62+
for dtype in floating_dtypes:
63+
with self.subTest(f"{dtype}, all integer"):
64+
arr = np.ones(3, dtype=dtype)
65+
arr[0] = 2
66+
arr[1] = -0.0
67+
68+
obj = serialize_ndarray(arr)
69+
70+
# test the round trip
71+
new = deserialize_ndarray(obj)
72+
np.testing.assert_array_equal(arr, new)
73+
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored
74+
75+
# test the ones that can be are mapped to int
76+
self.assertIsInstance(obj["data"][0], int)
77+
self.assertIsInstance(obj["data"][1], int)
78+
self.assertIsInstance(obj["data"][2], int)
79+
80+
with self.subTest(f"{dtype}, all float"):
81+
arr = np.empty(3, dtype=dtype)
82+
arr[0] = 1.5
83+
arr[1] = float("inf")
84+
arr[2] = float("nan")
85+
86+
obj = serialize_ndarray(arr)
87+
88+
# test the round trip
89+
new = deserialize_ndarray(obj)
90+
np.testing.assert_array_equal(arr, new)
91+
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored
92+
93+
# test the ones that can be are mapped to int
94+
self.assertIsInstance(obj["data"][0], numbers.Real)
95+
self.assertIsInstance(obj["data"][1], numbers.Real)
96+
self.assertIsInstance(obj["data"][2], numbers.Real)
97+
98+
with self.subTest(f"{dtype}, mixed"):
99+
arr = np.ones(3, dtype=dtype)
100+
arr[0] = 1.5
101+
arr[1] = -0.0
102+
103+
obj = serialize_ndarray(arr)
104+
105+
# test the round trip
106+
new = deserialize_ndarray(obj)
107+
np.testing.assert_array_equal(arr, new)
108+
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored
109+
110+
# test the ones that can be are mapped to int
111+
self.assertIsInstance(obj["data"][0], numbers.Real)
112+
self.assertIsInstance(obj["data"][1], int)
113+
self.assertIsInstance(obj["data"][2], int)
114+
115+
with self.subTest("complex, mixed"):
116+
arr = np.ones(3, dtype=complex)
117+
arr[0] = 1.5
118+
arr[1] = -0.0
119+
120+
obj = serialize_ndarray(arr)
121+
122+
# test the round trip
123+
new = deserialize_ndarray(obj)
124+
np.testing.assert_array_equal(arr, new)
125+
self.assertEqual(arr.dtype, new.dtype)
126+
127+
# in this case everything is kept as a complex number
128+
self.assertIsInstance(obj["data"][0], complex)
129+
self.assertIsInstance(obj["data"][1], complex)
130+
self.assertIsInstance(obj["data"][2], complex)
131+
132+
for dtype in [np.int8, np.int16, np.int32, np.int64]:
133+
with self.subTest(dtype):
134+
arr = np.empty(3, dtype=dtype)
135+
arr[0] = 2
136+
arr[1] = 0
137+
arr[2] = -1
138+
139+
obj = serialize_ndarray(arr)
140+
141+
# test the round trip
142+
new = deserialize_ndarray(obj)
143+
np.testing.assert_array_equal(arr, new)
144+
self.assertEqual(arr.dtype, new.dtype) # original vartype is restored
145+
146+
# test the ones that can be are mapped to int
147+
self.assertIsInstance(obj["data"][0], int)
148+
self.assertIsInstance(obj["data"][1], int)
149+
self.assertIsInstance(obj["data"][2], int)

0 commit comments

Comments
 (0)