leanSpec/src/lean_spec/subspecs/xmss/poseidon.py at 3b820abe3ebfe0f4f1800fa9bee64a070fd6e0e0 · tcoratger/leanSpec · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""
Defines the Poseidon1 hash functions for the Generalized XMSS scheme.

### The Cryptographic Engine: Why Poseidon1?

This module provides the low-level cryptographic engine for all internal hashing
operations. It is built on **Poseidon1** hash function.

The choice of Poseidon1 is deliberate and critical for the scheme's ultimate goal.
Unlike traditional hashes like SHA-3, Poseidon1 is an **arithmetization-friendly**
(or **SNARK-friendly**) hash function. Its algebraic structure is simple, making it
exponentially faster to prove and verify inside a zero-knowledge proof system,
which is essential for aggregating many signatures into a single, compact proof.

This file provides wrappers for the two primary ways Poseidon1 is used:

1.  **Compression Mode**: A fast, fixed-input-size mode for hashing small,
    predictable data structures like a single hash digest or a pair of them.
2.  **Sponge Mode**: A flexible, variable-input-size mode for hashing large
    amounts of data, like the many digests that form a Merkle tree leaf.
"""

from __future__ import annotations

from pydantic import PrivateAttr, model_validator

from lean_spec.types import StrictBaseModel

from ..koalabear import Fp
from ..poseidon1.permutation import (
    PARAMS_16,
    PARAMS_24,
    Poseidon1,
    Poseidon1Params,
)
from ._validation import enforce_strict_types
from .utils import int_to_base_p


class PoseidonXmss(StrictBaseModel):
    """An instance of the Poseidon1 hash engine for the XMSS scheme."""

    params16: Poseidon1Params
    """Poseidon1 parameters for 16-width permutation."""

    params24: Poseidon1Params
    """Poseidon1 parameters for 24-width permutation."""

    _engine16: Poseidon1 | None = PrivateAttr(default=None)
    _engine24: Poseidon1 | None = PrivateAttr(default=None)

    @model_validator(mode="after")
    def _validate_strict_types(self) -> PoseidonXmss:
        """Reject subclasses to prevent type confusion attacks."""
        enforce_strict_types(self, params16=Poseidon1Params, params24=Poseidon1Params)
        return self

    def _get_engine(self, width: int) -> Poseidon1:
        """Return a cached Poseidon1 engine for the given width."""
        if width == 16:
            if self._engine16 is None:
                self._engine16 = Poseidon1(self.params16)
            return self._engine16
        if self._engine24 is None:
            self._engine24 = Poseidon1(self.params24)
        return self._engine24

    def compress(self, input_vec: list[Fp], width: int, output_len: int) -> list[Fp]:
        """
        Implements the Poseidon1 hash in **compression mode**.

        This mode is used for hashing fixed-size inputs and is the most efficient
        way to use Poseidon1. It is used for traversing hash chains and building
        the internal nodes of the Merkle tree.

        ### Compression Algorithm

        The function computes: `Truncate(Permute(padded_input) + padded_input)`.
        1.  **Padding**: The `input_vec` is padded with zeros to match the full state `width`.
        2.  **Permutation**: The core cryptographic permutation is applied to the padded state.
        3.  **Feed-Forward**: The original padded input is added element-wise to the
            permuted state. This is a key feature of the Poseidon1 design that
            provides security against certain attacks.
        4.  **Truncation**: The result is truncated to the desired `output_len`.

        Args:
            input_vec: The list of field elements to be hashed.
            width: The state width of the Poseidon1 permutation (16 or 24).
            output_len: The number of field elements in the output digest.

        Returns:
            A hash digest of `output_len` field elements.
        """
        # Check that the input vector is long enough to produce the output.
        if len(input_vec) < output_len:
            raise ValueError("Input vector is too short for requested output length.")

        # Select the correct permutation parameters based on the state width.
        if width not in (16, 24):
            raise ValueError(f"Width must be 16 or 24, got {width}")
        engine = self._get_engine(width)

        # Create a padded input by extending with zeros to match the state width.
        padded_input = list(input_vec) + [Fp(value=0)] * (width - len(input_vec))

        # Apply the Poseidon1 permutation.
        permuted_state = engine.permute(padded_input)

        # Apply the feed-forward step, adding the input back element-wise.
        final_state = [p + i for p, i in zip(permuted_state, padded_input, strict=True)]

        # Truncate the state to the desired output length and return.
        return final_state[:output_len]

    def safe_domain_separator(self, lengths: list[int], capacity_len: int) -> list[Fp]:
        """
        Computes a unique domain separator for the sponge construction (SAFE API).

        A sponge's security relies on its initial state being unique for each distinct
        hashing task. This function creates a unique "configuration" or
        "initialization vector" (`capacity_value`) by hashing the high-level
        parameters of the sponge's usage (e.g., the dimensions of the data
        being hashed). This prevents multi-user or cross-context attacks.

        Args:
            lengths: A list of integer parameters that define the hash context.
            capacity_len: The desired length of the output capacity value.

        Returns:
            A list of `capacity_len` field elements for initializing the sponge.
        """
        # Pack all the length parameters into a single, large, unambiguous integer.
        acc = 0
        for length in lengths:
            acc = (acc << 32) | length

        # Decompose this integer into a fixed-size list of field elements.
        #
        # This list serves as the input to a one-off compression hash.
        # NOTE: we always use this mode with a 24 width.
        input_vec = int_to_base_p(acc, 24)

        # Compress the decomposed vector to produce the capacity value.
        return self.compress(input_vec, 24, capacity_len)

    def sponge(
        self,
        input_vec: list[Fp],
        capacity_value: list[Fp],
        output_len: int,
        width: int,
    ) -> list[Fp]:
        """
        Implements the Poseidon1 hash using the **sponge construction**.

        This mode is used for hashing large or variable-length inputs. In this scheme,
        it is specifically used to hash the Merkle tree leaves, which consist of many
        concatenated hash digests.

        ### Sponge Algorithm

        1.  **Initialization**: The internal state is divided into a `rate` (for data)
            and a `capacity` (for security). The `capacity` part is initialized
            with the domain-separating `capacity_value`.

        2.  **Absorbing**: The input data is processed in `rate`-sized chunks. In each
            step, a chunk is added to the `rate` part of the state, and then the
            entire state is scrambled by the `permute` function.

        3.  **Squeezing**: Once all input is absorbed, the `rate` part of the state is
            extracted as output. If more output is needed, the state is permuted again,
            and more is extracted, repeating until `output_len` elements are generated.

        Args:
            input_vec: The input data of arbitrary length.
            capacity_value: The domain-separating value from `safe_domain_separator`.
            output_len: The number of field elements in the final output digest.
            width: The width of the Poseidon1 permutation.

        Returns:
            A hash digest of `output_len` field elements.
        """
        # Ensure that the capacity value is not too long.
        if len(capacity_value) >= width:
            raise ValueError("Capacity length must be smaller than the state width.")

        # Determine the permutation parameters and the size of the rate.
        if width not in (16, 24):
            raise ValueError(f"Width must be 16 or 24, got {width}")
        engine = self._get_engine(width)
        rate = width - len(capacity_value)

        # Pad the input vector with zeros to be an exact multiple of the rate size.
        num_extra = (rate - (len(input_vec) % rate)) % rate
        padded_input = input_vec + [Fp(value=0)] * num_extra

        # Initialize the state:
        # - capacity part (domain separator) at the beginning,
        # - rate part (zero) follows.
        cap_len = len(capacity_value)
        state = [Fp(value=0)] * width
        state[:cap_len] = capacity_value

        # Absorb the input in rate-sized chunks via replacement.
        for i in range(0, len(padded_input), rate):
            chunk = padded_input[i : i + rate]
            # Replace the rate part of the state with the chunk.
            for j in range(rate):
                state[cap_len + j] = chunk[j]
            # Apply the cryptographic permutation to mix the state.
            state = engine.permute(state)

        # Squeeze the output until enough elements have been generated.
        output: list[Fp] = []
        while len(output) < output_len:
            # Extract the rate part of the state (after capacity) as output.
            output.extend(state[cap_len : cap_len + rate])
            # Permute the state.
            state = engine.permute(state)

        # Truncate to the final output length and return.
        return output[:output_len]


PROD_POSEIDON = PoseidonXmss(params16=PARAMS_16, params24=PARAMS_24)
"""An instance configured for production-level parameters."""

TEST_POSEIDON = PROD_POSEIDON
"""Test and production use the same Poseidon1 parameters; only XmssConfig differs."""