Skip to content

Commit 8935276

Browse files
authored
Make map encoding deterministic by default (#5)
To ensure predictable and stable output, map encoding is now deterministic by default. This is critical for use cases like generating content hashes, digital signatures, and reliable caching. The new default behavior guarantees that encoding the same map will always produce the exact same binary output, regardless of the map's internal key order or the underlying Elixir/OTP version. For performance-critical applications where byte-for-byte determinism is not required, the previous non-deterministic behavior can be restored by passing the `deterministic: false` option. BREAKING CHANGE: The binary output for all maps now uses sorted keys and will differ from the output generated in v1.x of this library. To migrate and retain the old behavior, explicitly pass the `deterministic: false` option to the `encode/2` function.
1 parent ee4cee4 commit 8935276

6 files changed

Lines changed: 116 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [v2.0.0] - 2025-08-10
9+
10+
### Changed
11+
12+
- **BREAKING:** Map encoding is now deterministic by default
13+
- `Msgpack.encode/2` sorts map keys according to Elixir's standard term
14+
ordering before serialization
15+
- This guarantees that identical maps produce identical binary output, but it
16+
alters the output compared to previous versions of this library
17+
18+
### Added
19+
20+
- Added a `:deterministic` option to `Msgpack.encode/2`
21+
- You can set this to `false` to disable key sorting for higher performance in
22+
contexts where deterministic output is not required.
23+
824
## [v1.1.1] - 2025-08-09
925

1026
### Fixed

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,36 @@ iex> encoded_stream |> Stream.take(3) |> Enum.to_list()
7474
]
7575
```
7676

77+
### Map Encoding
78+
79+
By default, `Msgpack.encode/2` serializes Elixir maps in a **deterministic**
80+
manner.
81+
82+
It achieves this by sorting the map keys according to Elixir's standard term
83+
ordering before encoding. This ensures that encoding the same map will always
84+
produce the exact same binary output, which is critical for tasks like
85+
generating signatures or comparing hashes.
86+
87+
```elixir
88+
iex> map1 = %{a: 1, b: 2}
89+
iex> map2 = %{b: 2, a: 1}
90+
91+
# Both produce the same output because their keys are sorted [:a, :b]
92+
iex> Msgpack.encode!(map1) == Msgpack.encode!(map2)
93+
true
94+
```
95+
96+
#### Performance Opt-Out
97+
98+
Sorting keys has a performance cost (O(N log N)).
99+
100+
If you are working in a performance-critical context where byte-for-byte
101+
determinism is not required, you can disable it:
102+
103+
```elixir
104+
Msgpack.encode(map, deterministic: false)
105+
```
106+
77107
## Full Documentation
78108

79109
For detailed information on all features, options, and functions, see the [full

lib/msgpack.ex

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ defmodule Msgpack do
8787
payload with non-UTF-8 strings, which may be incompatible with other
8888
MessagePack decoders.
8989
90+
* `:deterministic` - Controls whether map keys are sorted before encoding.
91+
* `true` (default) - Enables key sorting, which ensures that encoding the
92+
same map always produces the same binary.
93+
* `false` - Disables key sorting, which can provide a performance gain in
94+
cases where determinism is not required.
95+
9096
## Examples
9197
9298
### Standard Encoding

lib/msgpack/encoder.ex

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ defmodule Msgpack.Encoder do
1515
def default_opts() do
1616
[
1717
atoms: :string,
18-
string_validation: true
18+
string_validation: true,
19+
deterministic: true
1920
]
2021
end
2122

@@ -160,6 +161,15 @@ defmodule Msgpack.Encoder do
160161

161162
# ==== Maps ====
162163
defp do_encode(map, opts) when is_map(map) do
164+
enumerable =
165+
if Keyword.get(opts, :deterministic, true) == false do
166+
map
167+
else
168+
map
169+
|> Map.to_list()
170+
|> Enum.sort_by(fn {key, _value} -> key end)
171+
end
172+
163173
acc = {:ok, []}
164174

165175
reducer = fn {key, value}, {:ok, acc_list} ->
@@ -172,7 +182,7 @@ defmodule Msgpack.Encoder do
172182
end
173183
end
174184

175-
case Enum.reduce(map, acc, reducer) do
185+
case Enum.reduce(enumerable, acc, reducer) do
176186
{:ok, encoded_pairs} ->
177187
size = map_size(map)
178188
{:ok, [encode_map_header(size), Enum.reverse(encoded_pairs)]}

mix.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
defmodule MsgpackElixir.MixProject do
22
use Mix.Project
33

4-
@version "1.1.1"
4+
@version "2.0.0"
55
@source_url "https://github.com/nrednav/msgpack_elixir"
66

77
def project do

test/msgpack_test.exs

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,55 @@ defmodule MsgpackTest do
7676
string_32 = String.duplicate("a", 32)
7777
assert_encode(string_32, <<0xD9, 32, string_32::binary>>)
7878
end
79+
80+
test "produces identical output for maps with different key orders, by default" do
81+
map1 = %{c: 3, b: 2, a: 1}
82+
map2 = %{a: 1, c: 3, b: 2}
83+
84+
expected_binary = <<0x83, 0xA1, "a", 1, 0xA1, "b", 2, 0xA1, "c", 3>>
85+
86+
assert_encode(map1, expected_binary)
87+
assert_encode(map2, expected_binary)
88+
end
89+
90+
test "correctly sorts maps with mixed key types, by default" do
91+
map = %{"a" => 1, 100 => 2, :z => 3, nil => 4}
92+
expected_binary = <<0x84, 100, 2, 0xC0, 4, 0xA1, "z", 3, 0xA1, "a", 1>>
93+
94+
assert_encode(map, expected_binary)
95+
end
96+
97+
test "applies sorting to nested maps, by default" do
98+
map1 = %{b: %{y: 2, x: 1}, a: 10}
99+
map2 = %{a: 10, b: %{x: 1, y: 2}}
100+
101+
{:ok, expected_binary} = Msgpack.encode(map2)
102+
103+
assert_encode(map1, expected_binary)
104+
end
105+
106+
test "with `deterministic: false` opts out of sorted key encoding" do
107+
# Per the Erlang docs:
108+
# https://www.erlang.org/doc/system/maps.html#how-large-maps-are-implemented,
109+
# maps with 32 or fewer elements are internally stored with sorted keys.
110+
# To reliably test the non-deterministic path, a large map (33+ elements)
111+
# must be used, which uses a HAMT implementation and does not iterate in
112+
# key-sorted order.
113+
large_map =
114+
Enum.into(1..33, %{}, fn i ->
115+
key = String.to_atom(<<123 - i>> <> "_#{i}")
116+
{key, i}
117+
end)
118+
119+
assert map_size(large_map) == 33
120+
121+
{:ok, sorted_binary} = Msgpack.encode(large_map)
122+
{:ok, unsorted_binary} = Msgpack.encode(large_map, deterministic: false)
123+
124+
refute unsorted_binary == sorted_binary,
125+
"Expected binaries to be different, but both were identical. The
126+
non-deterministic path may be producing sorted output."
127+
end
79128
end
80129

81130
describe "decode/2" do
@@ -357,8 +406,8 @@ defmodule MsgpackTest do
357406

358407
# ==== Helpers ====
359408

360-
defp assert_encode(input, expected_binary) do
361-
assert Msgpack.encode(input) == {:ok, expected_binary}
409+
defp assert_encode(input, expected_binary, opts \\ []) do
410+
assert Msgpack.encode(input, opts) == {:ok, expected_binary}
362411
end
363412

364413
defp assert_encode_error(input, expected_reason, opts \\ []) do

0 commit comments

Comments
 (0)