Skip to content

Commit aad104f

Browse files
authored
Merge pull request #161 from cipherstash/jsonb-contains-variants-fix
feat(ste_vec): add jsonb parameter variants for containment functions
2 parents bf6e01f + 6ed67f5 commit aad104f

7 files changed

Lines changed: 1052 additions & 140 deletions

File tree

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,11 @@ tests/ste_vec_*M.sql*
221221

222222
# Rust build artifacts (using sccache)
223223
tests/sqlx/target/
224+
225+
# Work files (agent-generated, not for version control)
226+
.work/
227+
.serena/
228+
229+
# Build variants - protect variant deps
230+
src/deps-protect.txt
231+
src/deps-ordered-protect.txt

src/ste_vec/functions.sql

Lines changed: 129 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -241,18 +241,16 @@ $$ LANGUAGE plpgsql;
241241

242242

243243

244-
--! @brief Extract encrypted JSONB as array for GIN indexing
244+
--! @brief Extract full encrypted JSONB elements as array
245245
--!
246-
--! Extracts the encrypted JSONB data and returns it as a native jsonb[]
247-
--! array. This enables efficient GIN indexing using PostgreSQL's built-in array_ops
248-
--! which has native hash support for jsonb elements.
246+
--! Extracts all JSONB elements from the STE vector including non-deterministic fields.
247+
--! Use jsonb_array() instead for GIN indexing and containment queries.
249248
--!
250249
--! @param val jsonb containing encrypted EQL payload
251-
--! @return jsonb[] Array of JSONB elements for indexing
250+
--! @return jsonb[] Array of full JSONB elements
252251
--!
253-
--! @note Preferred for GIN indexes as jsonb has native hash support
254-
--! @see eql_v2.jsonb_array(eql_v2_encrypted)
255-
CREATE FUNCTION eql_v2.jsonb_array(val jsonb)
252+
--! @see eql_v2.jsonb_array
253+
CREATE FUNCTION eql_v2.jsonb_array_from_array_elements(val jsonb)
256254
RETURNS jsonb[]
257255
IMMUTABLE STRICT PARALLEL SAFE
258256
LANGUAGE SQL
@@ -266,21 +264,53 @@ AS $$
266264
$$;
267265

268266

269-
--! @brief Extract encrypted JSONB as array from encrypted column value
270-
--!
271-
--! Extracts the encrypted JSONB data from an encrypted column value and returns it as a
272-
--! native jsonb[] array for GIN indexing.
267+
--! @brief Extract full encrypted JSONB elements as array from encrypted column
273268
--!
274269
--! @param val eql_v2_encrypted Encrypted column value
275-
--! @return jsonb[] Array of JSONB elements for indexing
270+
--! @return jsonb[] Array of full JSONB elements
276271
--!
277-
--! @example
278-
--! -- Create GIN index for containment queries
279-
--! CREATE INDEX idx_jsonb ON mytable USING GIN (eql_v2.jsonb_array(encrypted_col));
272+
--! @see eql_v2.jsonb_array_from_array_elements(jsonb)
273+
CREATE FUNCTION eql_v2.jsonb_array_from_array_elements(val eql_v2_encrypted)
274+
RETURNS jsonb[]
275+
IMMUTABLE STRICT PARALLEL SAFE
276+
LANGUAGE SQL
277+
AS $$
278+
SELECT eql_v2.jsonb_array_from_array_elements(val.data);
279+
$$;
280+
281+
282+
--! @brief Extract deterministic fields as array for GIN indexing
283+
--!
284+
--! Extracts only deterministic search term fields (s, b3, hm, ocv, ocf) from each
285+
--! STE vector element. Excludes non-deterministic ciphertext for correct containment
286+
--! comparison using PostgreSQL's native @> operator.
287+
--!
288+
--! @param val jsonb containing encrypted EQL payload
289+
--! @return jsonb[] Array of JSONB elements with only deterministic fields
290+
--!
291+
--! @note Use this for GIN indexes and containment queries
292+
--! @see eql_v2.jsonb_contains
293+
CREATE FUNCTION eql_v2.jsonb_array(val jsonb)
294+
RETURNS jsonb[]
295+
IMMUTABLE STRICT PARALLEL SAFE
296+
LANGUAGE SQL
297+
AS $$
298+
SELECT ARRAY(
299+
SELECT jsonb_object_agg(kv.key, kv.value)
300+
FROM jsonb_array_elements(
301+
CASE WHEN val ? 'sv' THEN val->'sv' ELSE jsonb_build_array(val) END
302+
) AS elem,
303+
LATERAL jsonb_each(elem) AS kv(key, value)
304+
WHERE kv.key IN ('s', 'b3', 'hm', 'ocv', 'ocf')
305+
GROUP BY elem
306+
);
307+
$$;
308+
309+
310+
--! @brief Extract deterministic fields as array from encrypted column
280311
--!
281-
--! -- Query using containment
282-
--! SELECT * FROM mytable
283-
--! WHERE eql_v2.jsonb_array(encrypted_col) @> eql_v2.jsonb_array(search_value);
312+
--! @param val eql_v2_encrypted Encrypted column value
313+
--! @return jsonb[] Array of JSONB elements with only deterministic fields
284314
--!
285315
--! @see eql_v2.jsonb_array(jsonb)
286316
CREATE FUNCTION eql_v2.jsonb_array(val eql_v2_encrypted)
@@ -321,6 +351,46 @@ AS $$
321351
$$;
322352

323353

354+
--! @brief GIN-indexable JSONB containment check (encrypted, jsonb)
355+
--!
356+
--! Checks if encrypted value 'a' contains all JSONB elements from jsonb value 'b'.
357+
--! Uses jsonb[] arrays internally for native PostgreSQL GIN index support.
358+
--!
359+
--! @param a eql_v2_encrypted Container value (typically a table column)
360+
--! @param b jsonb JSONB value to search for
361+
--! @return Boolean True if a contains all elements of b
362+
--!
363+
--! @see eql_v2.jsonb_array
364+
--! @see eql_v2.jsonb_contains(eql_v2_encrypted, eql_v2_encrypted)
365+
CREATE FUNCTION eql_v2.jsonb_contains(a eql_v2_encrypted, b jsonb)
366+
RETURNS boolean
367+
IMMUTABLE STRICT PARALLEL SAFE
368+
LANGUAGE SQL
369+
AS $$
370+
SELECT eql_v2.jsonb_array(a) @> eql_v2.jsonb_array(b);
371+
$$;
372+
373+
374+
--! @brief GIN-indexable JSONB containment check (jsonb, encrypted)
375+
--!
376+
--! Checks if jsonb value 'a' contains all JSONB elements from encrypted value 'b'.
377+
--! Uses jsonb[] arrays internally for native PostgreSQL GIN index support.
378+
--!
379+
--! @param a jsonb Container JSONB value
380+
--! @param b eql_v2_encrypted Encrypted value to search for
381+
--! @return Boolean True if a contains all elements of b
382+
--!
383+
--! @see eql_v2.jsonb_array
384+
--! @see eql_v2.jsonb_contains(eql_v2_encrypted, eql_v2_encrypted)
385+
CREATE FUNCTION eql_v2.jsonb_contains(a jsonb, b eql_v2_encrypted)
386+
RETURNS boolean
387+
IMMUTABLE STRICT PARALLEL SAFE
388+
LANGUAGE SQL
389+
AS $$
390+
SELECT eql_v2.jsonb_array(a) @> eql_v2.jsonb_array(b);
391+
$$;
392+
393+
324394
--! @brief GIN-indexable JSONB "is contained by" check
325395
--!
326396
--! Checks if all JSONB elements from 'a' are contained in 'b'.
@@ -341,6 +411,46 @@ AS $$
341411
$$;
342412

343413

414+
--! @brief GIN-indexable JSONB "is contained by" check (encrypted, jsonb)
415+
--!
416+
--! Checks if all JSONB elements from encrypted value 'a' are contained in jsonb value 'b'.
417+
--! Uses jsonb[] arrays internally for native PostgreSQL GIN index support.
418+
--!
419+
--! @param a eql_v2_encrypted Value to check (typically a table column)
420+
--! @param b jsonb Container JSONB value
421+
--! @return Boolean True if all elements of a are contained in b
422+
--!
423+
--! @see eql_v2.jsonb_array
424+
--! @see eql_v2.jsonb_contained_by(eql_v2_encrypted, eql_v2_encrypted)
425+
CREATE FUNCTION eql_v2.jsonb_contained_by(a eql_v2_encrypted, b jsonb)
426+
RETURNS boolean
427+
IMMUTABLE STRICT PARALLEL SAFE
428+
LANGUAGE SQL
429+
AS $$
430+
SELECT eql_v2.jsonb_array(a) <@ eql_v2.jsonb_array(b);
431+
$$;
432+
433+
434+
--! @brief GIN-indexable JSONB "is contained by" check (jsonb, encrypted)
435+
--!
436+
--! Checks if all JSONB elements from jsonb value 'a' are contained in encrypted value 'b'.
437+
--! Uses jsonb[] arrays internally for native PostgreSQL GIN index support.
438+
--!
439+
--! @param a jsonb Value to check
440+
--! @param b eql_v2_encrypted Container encrypted value
441+
--! @return Boolean True if all elements of a are contained in b
442+
--!
443+
--! @see eql_v2.jsonb_array
444+
--! @see eql_v2.jsonb_contained_by(eql_v2_encrypted, eql_v2_encrypted)
445+
CREATE FUNCTION eql_v2.jsonb_contained_by(a jsonb, b eql_v2_encrypted)
446+
RETURNS boolean
447+
IMMUTABLE STRICT PARALLEL SAFE
448+
LANGUAGE SQL
449+
AS $$
450+
SELECT eql_v2.jsonb_array(a) <@ eql_v2.jsonb_array(b);
451+
$$;
452+
453+
344454
--! @brief Check if STE vector array contains a specific encrypted element
345455
--!
346456
--! Tests whether any element in the STE vector array 'a' contains the encrypted value 'b'.

tests/sqlx/src/helpers.rs

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//! Common utilities for working with encrypted data in tests.
44
55
use anyhow::{Context, Result};
6+
use serde_json;
67
use sqlx::{PgPool, Row};
78

89
/// Fetch ORE encrypted value from pre-seeded ore table
@@ -83,7 +84,7 @@ pub async fn get_ore_encrypted_as_jsonb(pool: &PgPool, id: i32) -> Result<String
8384
result.with_context(|| format!("ore table returned NULL for id={}", id))
8485
}
8586

86-
/// Fetch STE vec encrypted value from a specified table
87+
/// Fetch STE vec encrypted value from a specified table as serde_json::Value
8788
///
8889
/// Default tables:
8990
/// - `ste_vec`: Created by migration `003_install_ste_vec_data.sql`, 10 records (ids 1-10)
@@ -93,22 +94,93 @@ pub async fn get_ore_encrypted_as_jsonb(pool: &PgPool, id: i32) -> Result<String
9394
/// - Records have selectors for $.hello (a7cea93975ed8c01f861ccb6bd082784) with ore_cllw_var_8
9495
/// - Records have selectors for $.n (2517068c0d1f9d4d41d2c666211f785e) with ore_cllw_u64_8
9596
///
97+
/// Returns the encrypted value as parsed JSON, allowing callers to:
98+
/// - Inspect structure programmatically
99+
/// - Use .to_string() when a literal string is needed
100+
/// - Avoid double-quoting issues with embedded apostrophes
101+
///
96102
/// # Arguments
97103
/// * `pool` - Database connection pool
98104
/// * `table` - Table name to query (e.g., "ste_vec" or "ste_vec_vast")
99105
/// * `id` - Row id to fetch
100-
pub async fn get_ste_vec_encrypted(pool: &PgPool, table: &str, id: i32) -> Result<String> {
101-
let sql = format!("SELECT e::text FROM {} WHERE id = {}", table, id);
102-
let row = sqlx::query(&sql)
106+
pub async fn get_ste_vec_encrypted(
107+
pool: &PgPool,
108+
table: &str,
109+
id: i32,
110+
) -> Result<serde_json::Value> {
111+
let sql = format!("SELECT (e).data::jsonb FROM {} WHERE id = {}", table, id);
112+
let result: serde_json::Value = sqlx::query_scalar(&sql)
103113
.fetch_one(pool)
104114
.await
105115
.with_context(|| format!("fetching {} encrypted value for id={}", table, id))?;
106116

107-
let result: Option<String> = row
108-
.try_get(0)
109-
.with_context(|| format!("extracting text column for id={}", id))?;
117+
Ok(result)
118+
}
119+
120+
/// Fetch two STE vec encrypted values from the same table
121+
///
122+
/// Useful for encrypted-to-encrypted containment tests where we need
123+
/// two distinct encrypted values from the same table.
124+
///
125+
/// # Arguments
126+
/// * `pool` - Database connection pool
127+
/// * `table` - Table name to query
128+
/// * `id1` - First row id
129+
/// * `id2` - Second row id
130+
///
131+
/// # Returns
132+
/// Tuple of (enc1, enc2) as serde_json::Value
133+
pub async fn get_ste_vec_encrypted_pair(
134+
pool: &PgPool,
135+
table: &str,
136+
id1: i32,
137+
id2: i32,
138+
) -> Result<(serde_json::Value, serde_json::Value)> {
139+
let enc1 = get_ste_vec_encrypted(pool, table, id1).await?;
140+
let enc2 = get_ste_vec_encrypted(pool, table, id2).await?;
141+
Ok((enc1, enc2))
142+
}
143+
144+
/// Extract a single SV element from an encrypted value as serde_json::Value
145+
///
146+
/// Fetches an encrypted value from the specified table and extracts
147+
/// a specific element from its sv array by index.
148+
///
149+
/// # Arguments
150+
/// * `pool` - Database connection pool
151+
/// * `table` - Table name to query (e.g., "ste_vec" or "ste_vec_vast")
152+
/// * `id` - Row id to fetch
153+
/// * `sv_index` - Index into the sv array (0-based)
154+
///
155+
/// # Returns
156+
/// The sv element as serde_json::Value, suitable for use in containment queries
157+
/// Use .to_string() when a literal string is needed for SQL interpolation
158+
pub async fn get_ste_vec_sv_element(
159+
pool: &PgPool,
160+
table: &str,
161+
id: i32,
162+
sv_index: i32,
163+
) -> Result<serde_json::Value> {
164+
let sql = format!(
165+
"SELECT ((e).data->'sv'->{})::jsonb FROM {} WHERE id = {}",
166+
sv_index, table, id
167+
);
168+
let result: Option<serde_json::Value> = sqlx::query_scalar(&sql)
169+
.fetch_one(pool)
170+
.await
171+
.with_context(|| {
172+
format!(
173+
"extracting sv element {} from {} id={}",
174+
sv_index, table, id
175+
)
176+
})?;
110177

111-
result.with_context(|| format!("{} table returned NULL for id={}", table, id))
178+
result.with_context(|| {
179+
format!(
180+
"{} sv element extraction returned NULL for id={}, index={}",
181+
table, id, sv_index
182+
)
183+
})
112184
}
113185

114186
/// Extract selector term using SQL helper functions

tests/sqlx/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ pub use assertions::QueryAssertion;
1313
pub use helpers::{
1414
analyze_table, assert_uses_index, assert_uses_seq_scan, create_jsonb_gin_index, explain_query,
1515
get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, get_ste_vec_encrypted,
16-
get_ste_vec_selector_term, get_ste_vec_term_by_id,
16+
get_ste_vec_encrypted_pair, get_ste_vec_selector_term, get_ste_vec_sv_element,
17+
get_ste_vec_term_by_id,
1718
};
1819
pub use index_types as IndexTypes;
1920
pub use selectors::Selectors;

0 commit comments

Comments
 (0)