diff --git a/README.md b/README.md index 08af24d..7d45e50 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ WHERE eql_v2.hmac_256(email) = eql_v2.hmac_256( -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records WHERE eql_v2.hmac_256(email) = eql_v2.hmac_256( - '{"hm":"0f4f3b99671e74c0f8b5a1d2e3f4...","ob":null,"bf":null,"i":{"t":"patient_records","c":"email"}}' + '{"hm":"0f4f3b99671e74c0f8b5a1d2e3f4a5b6c7d8...","ob":null,"bf":null,"i":{"t":"patient_records","c":"email"}}' ); ``` @@ -225,7 +225,7 @@ WHERE eql_v2.ore_block_u64_8_256(systolic_bp) = eql_v2.ore_block_u64_8_256( -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records WHERE eql_v2.ore_block_u64_8_256(systolic_bp) = eql_v2.ore_block_u64_8_256( - '{"hm":null,"ob":["0x1a2b3c..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}' + '{"hm":null,"ob":["99f7adadadadadadc68b2822197a849e..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}' ); -- Find patients with blood pressure above specified threshold @@ -238,7 +238,7 @@ WHERE eql_v2.ore_block_u64_8_256(systolic_bp) >= eql_v2.ore_block_u64_8_256( -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records WHERE eql_v2.ore_block_u64_8_256(systolic_bp) >= eql_v2.ore_block_u64_8_256( - '{"hm":null,"ob":["0x1a2b3c..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}' + '{"hm":null,"ob":["99f7adadadadadadc68b2822197a849e..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}' ); -- Find patients with blood pressure in specified range @@ -251,8 +251,8 @@ WHERE eql_v2.ore_block_u64_8_256(systolic_bp) BETWEEN -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records WHERE eql_v2.ore_block_u64_8_256(systolic_bp) BETWEEN - eql_v2.ore_block_u64_8_256('{"hm":null,"ob":["0x1f5e2d..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}') - AND eql_v2.ore_block_u64_8_256('{"hm":null,"ob":["0x9c8b7a..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}'); + eql_v2.ore_block_u64_8_256('{"hm":null,"ob":["99f7adadadadadadc68b2822197a849e..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}') + AND eql_v2.ore_block_u64_8_256('{"hm":null,"ob":["99f7adadadadadadc68b2822197a849e..."],"bf":null,"i":{"t":"patient_records","c":"systolic_bp"}}'); -- Order patients by blood pressure from lowest to highest SELECT * FROM patient_records @@ -330,7 +330,7 @@ WHERE eql_v2.bloom_filter(medical_notes) @> eql_v2.bloom_filter( -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records WHERE eql_v2.bloom_filter(medical_notes) @> eql_v2.bloom_filter( - '{"hm":null,"ob":null,"bf":[142,891,1337,1847,2001],"i":{"t":"patient_records","c":"medical_notes"}}' + '{"hm":null,"ob":null,"bf":[1397,378,1463,1673,1474,1226],"i":{"t":"patient_records","c":"medical_notes"}}' ); ``` @@ -346,7 +346,7 @@ Basic usage: 'cast_as' => 'jsonb', 'indexes' => [ 'ste_vec' => [ - 'prefix' => 'patient_records/health_assessment', + 'prefix' => 'patient_records.health_assessment', ], ], ], @@ -357,7 +357,7 @@ Configuration parameters: | Parameter | Type | Required | Default | Description | |-----------|------|----------|---------|-------------| -| `prefix` | `string` | ✓ | - | Unique identifier prefix for the encryption context (recommended format is `table_name/column_name`) | +| `prefix` | `string` | ✓ | - | Domain separator for cryptographic hashing that must be unique per column (recommended format is `table_name.column_name`) | Example SQL queries: @@ -369,12 +369,12 @@ WHERE cs_ste_vec_v2(health_assessment, '{"conditions":["diabetes","hypertension" -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records -WHERE health_assessment @> '{"sv":[{"tokenized_selector":"dd4659b9c279af040dd05ce21b2a22f7","term":"00a6343301fae638379a8b1f9147eda082","record":"mBbLOIqSF%n4>5ajY+w?-+!*eKqJt(|G8c0rEaxnXm!MLTLGT0tuse(H;lHjz!hWQpW&^_vF3;xdm^M%l{vX7mB05%=#-7DSapsQ$y(uxxWphCxN}>hI__Q00^;tc;bvpcK_`<{cx)595mX{~O#Z^4zy","parent_is_array":false}],"i":{"t":"patient_records","c":"health_assessment"}}'; +WHERE health_assessment @> '{"sv":[{"s":"dd4659b9c279af040dd05ce21b2a22f7...","t":"22303061363334333330316661653633...","r":"mBbL}QHJ&a(@rwS5n)u^G+Fb+t}Soo-h...","pa":false}],"i":{"t":"patient_records","c":"health_assessment"}}'; -- Find records where encrypted data is contained by specified values -- Using search terms (encrypted ahead of time, plaintext not loggable): SELECT * FROM patient_records -WHERE health_assessment <@ '{"sv":[{"tokenized_selector":"cff40d3394bcb913237661f679280999","term":"022d3a7feb298b2d93b9f3a2cd0c0bebf8c524b8a991f9eedfbfe52477fe7b3817de6ae2fec499e5b3e7b0a5daefc88ea45923e2cc5c6658c18f477f7eb6542106","record":"mBbLOIqSF%n4>5ajY+w?-+!*e9B5XGJ%B#Hphr%zC1ge&;+sJ+zW5p~UC^A%;KU#qxN}>hI__Q00^;tc;bvpcK_`<{cx)595mX{~O#Z^4zy","parent_is_array":false}],"i":{"t":"patient_records","c":"health_assessment"}}'; +WHERE health_assessment <@ '{"sv":[{"s":"df08a4c4157bdb5bf6fa9be89cf18d10...","t":"22303063343133306135646334356130...","r":"mBbL}QHJ&a(@rwS5n)u^G+Fb+Ex8ofB!...","pa":false}],"i":{"t":"patient_records","c":"health_assessment"}}'; ``` This index differs from other indexes in its query patterns. Plaintext queries use `cs_ste_vec_v2()` with JSON data and only support the PostgreSQL `@>` operator, while search term queries can use both PostgreSQL `@>` and `<@` operators with pre-computed vectors from the `sv` response field in the search terms response. @@ -414,7 +414,7 @@ $config = [ 'cast_as' => 'jsonb', 'indexes' => [ 'ste_vec' => [ - 'prefix' => 'patient_records/health_assessment', + 'prefix' => 'patient_records.health_assessment', ], ], ], @@ -472,7 +472,7 @@ try { $ciphertext = $result['c']; echo $ciphertext; - // mBbM8rvts7^sycKCI!-Y9x2kL8vN... + // mBbKlk}G7QdaGiNj$dL7#+AOrA^}*VJx... } finally { $client->freeClient($clientPtr); } @@ -492,9 +492,9 @@ For columns configured with the `unique`, `ore`, and/or `match` indexes: ```json { "k": "ct", - "c": "mBbM8rvts7^sycKCI!-Y9x2kL8vN...", + "c": "mBbKlk}G7QdaGiNj$dL7#+AOrA^}*VJx...", "dt": "text", - "hm": "0f4f3b99671e74c0f8b5a1d2e3f4...", + "hm": "f3ca71fd39ae9d3d1d1fc25141bcb6da...", "ob": null, "bf": null, "i": { @@ -525,14 +525,14 @@ For columns configured with the `ste_vec` index: ```json { "k": "sv", - "c": "mBbKND$(wyS}0*#KjqS!Is$dX...", + "c": "mBbLQ2^Io|1eh_K2*n^LSCVVQuGhkL>w...", "dt": "jsonb", "sv": [ { - "tokenized_selector": "dd4659b9c279af040dd05ce21b2a22f7", - "term": "00a6343301fae638379a8b1f9147eda082", - "record": "mBbKND$(wyS}0*#KjqS!Is$dX...", - "parent_is_array": false + "s": "dd4659b9c279af040dd05ce21b2a22f7...", + "t": "22303061363334333330316661653633...", + "r": "mBbLQ2^Io|1eh_K2*n^LSCVVQuGhkL>w...", + "pa": false } ], "i": { @@ -551,10 +551,10 @@ Response parameters: | `c` | `string` | Always | Base85-encoded ciphertext containing the encrypted data | | `dt` | `string` | Always | Data type for casting (from `cast_as` configuration parameter) | | `sv` | `array` | `ste_vec` | Structured text encryption vector for JSONB containment queries | -| `sv[].tokenized_selector` | `string` | `ste_vec` | Encrypted selector for the JSON path | -| `sv[].term` | `string` | `ste_vec` | Encrypted term value | -| `sv[].record` | `string` | `ste_vec` | Base85-encoded encrypted record data | -| `sv[].parent_is_array` | `boolean` | `ste_vec` | Whether the parent JSON element is an array | +| `sv[].s` | `string` | `ste_vec` | Tokenized selector representing the encrypted JSON path to the value | +| `sv[].t` | `string` | `ste_vec` | Encrypted term value for equality and order-preserving queries | +| `sv[].r` | `string` | `ste_vec` | Base85-encoded ciphertext containing the encrypted record data | +| `sv[].pa` | `boolean` | `ste_vec` | Whether the parent JSON element is an array | | `i` | `object` | Always | Table and column identifier for this encrypted value: `{"t":"table_name","c":"column_name"}` | | `v` | `int` | Always | Schema version for backward compatibility | @@ -792,7 +792,7 @@ try { $ciphertext = $encryptedData['c']; echo $ciphertext; - // mBbM8rvts7^sycKCI!-Y9x2kL8vN... + // mBbKuXT|+vBh~K2WV-!n5_W3DBFd4`Mp... } } finally { $client->freeClient($clientPtr); @@ -859,7 +859,7 @@ try { $ciphertextItemsJson = json_encode($ciphertextItems, JSON_THROW_ON_ERROR); echo $ciphertextItemsJson; - // [{"ciphertext":"mBbM8rvts7^sycKCI!-Y9x2kL8vN..."},{"ciphertext":"nCcN9swus8^tzdLDJ!-Z0y3lM9wO..."}] + // [{"ciphertext":"mBbK>BcAYctW$Gy)vK2)Y$&nBBKz{oL1..."},{"ciphertext":"mBbJ<8tOEI+Z`KFUV`q&kmdWtO#DKxW|..."}] $decryptedResultJson = $client->decryptBulk($clientPtr, $ciphertextItemsJson); @@ -926,7 +926,7 @@ try { foreach ($result as $searchTerms) { echo json_encode($searchTerms); - // {"hm":"0f4f3b99671e74c0f8b5a1d2e3f4...","ob":null,"bf":null,"i":{"t":"patient_records","c":"email"}} + // {"hm":"f3ca71fd39ae9d3d1d1fc25141bcb6da...","ob":null,"bf":null,"i":{"t":"patient_records","c":"email"}} } } finally { $client->freeClient($clientPtr); @@ -945,7 +945,7 @@ For columns configured with `unique`, `ore`, and/or `match` indexes: ```json { - "hm": "0f4f3b99671e74c0f8b5a1d2e3f4...", + "hm": "f3ca71fd39ae9d3d1d1fc25141bcb6da...", "ob": null, "bf": null, "i": { @@ -972,16 +972,16 @@ For columns configured with `ste_vec` indexes: { "sv": [ { - "tokenized_selector": "dd4659b9c279af040dd05ce21b2a22f7", - "term": "00a6343301fae638379a8b1f9147eda082", - "record": "mBbM0GYe4Wa7OJ<2HG_ZQ42Z5KmmLn7{+K)z~e9h*+$l...", - "parent_is_array": false + "s": "dd4659b9c279af040dd05ce21b2a22f7...", + "t": "22303061363334333330316661653633...", + "r": "mBbLkCZcaJ2U|G333rRC>f;r}uFEp7Tg...", + "pa": false }, { - "tokenized_selector": "cff40d3394bcb913237661f679280999", - "term": "022d3a7feb298b2d93b9f3a2cd0c0bebf8c524b8a991...", - "record": "mBbM0GYe4Wa7OJ<2HG_ZQ42Z59Mj-WD;uRkcn7ZHj&a4...", - "parent_is_array": false + "s": "df08a4c4157bdb5bf6fa9be89cf18d10...", + "t": "22303063343133306135646334356130...", + "r": "mBbLkCZcaJ2U|G333rRC>f;r}E&d@?`;...", + "pa": false } ], "i": { @@ -996,10 +996,10 @@ Response parameters: | Parameter | Type | Source | Description | |-----------|------|--------|-------------| | `sv` | `array` | `ste_vec` | Structured text encryption vector for JSONB containment queries | -| `sv[].tokenized_selector` | `string` | `ste_vec` | Encrypted selector for the JSON path | -| `sv[].term` | `string` | `ste_vec` | Encrypted term value | -| `sv[].record` | `string` | `ste_vec` | Base85-encoded encrypted record data | -| `sv[].parent_is_array` | `boolean` | `ste_vec` | Whether the parent JSON element is an array | +| `sv[].s` | `string` | `ste_vec` | Tokenized selector representing the encrypted JSON path to the value | +| `sv[].t` | `string` | `ste_vec` | Encrypted term value for equality and order-preserving queries | +| `sv[].r` | `string` | `ste_vec` | Base85-encoded ciphertext containing the encrypted record data | +| `sv[].pa` | `boolean` | `ste_vec` | Whether the parent JSON element is an array | | `i` | `object` | Always | Table and column identifier for this encrypted value: `{"t":"table_name","c":"column_name"}` | ### Error Handling @@ -1045,7 +1045,7 @@ try { $ciphertext = $result['c']; echo $ciphertext; - // mBbM8rvts7^sycKCI!-Y9x2kL8vN... + // mBbKlk}G7QdaGiNj$dL7#+AOrA^}*VJx... } catch (FFIException $e) { error_log($e->getMessage()); diff --git a/crates/protect-ffi/src/encrypt_config.rs b/crates/protect-ffi/src/encrypt_config.rs index 0a5de83..a4547a9 100644 --- a/crates/protect-ffi/src/encrypt_config.rs +++ b/crates/protect-ffi/src/encrypt_config.rs @@ -494,7 +494,7 @@ mod tests { "cast_as": "jsonb", "indexes": { "ste_vec": { - "prefix": "documents/content" + "prefix": "documents.content" } } } @@ -509,7 +509,7 @@ mod tests { assert_eq!( column.indexes[0].index_type, IndexType::SteVec { - prefix: "documents/content".into() + prefix: "documents.content".into() } ); diff --git a/crates/protect-ffi/src/lib.rs b/crates/protect-ffi/src/lib.rs index 2f89229..6e382fa 100644 --- a/crates/protect-ffi/src/lib.rs +++ b/crates/protect-ffi/src/lib.rs @@ -14,7 +14,7 @@ use cipherstash_client::{ credentials::{ServiceCredentials, ServiceToken}, encryption::{ self, EncryptionError, IndexTerm, Plaintext, PlaintextTarget, ReferencedPendingPipeline, - ScopedCipher, SteVec, TypeParseError, + ScopedCipher, TypeParseError, }, schema::ColumnConfig, zerokms::{self, EncryptedRecord, WithContext, ZeroKMSWithClientKey}, @@ -50,6 +50,23 @@ pub struct Client { encrypt_config: Arc>, } +/// A structured text encryption vector entry. +#[derive(Debug, Deserialize, Serialize)] +pub struct SteVecEntry { + /// Tokenized selector representing the encrypted JSON path to the value. + #[serde(rename = "s")] + tokenized_selector: String, + /// Encrypted term value for equality and order-preserving queries. + #[serde(rename = "t")] + term: String, + /// Base85-encoded ciphertext containing the encrypted record data. + #[serde(rename = "r")] + record: String, + /// Whether the parent JSON element is an array. + #[serde(rename = "pa")] + parent_is_array: bool, +} + /// An encrypted value with associated encryption indexes or structured text encryption vectors. #[derive(Debug, Deserialize, Serialize)] #[serde(tag = "k")] @@ -91,7 +108,7 @@ pub enum Encrypted { data_type: String, /// Structured text encryption vector for JSONB containment queries. #[serde(rename = "sv")] - ste_vec_index: SteVec<16>, + ste_vec_index: Vec, /// Table and column identifier for this encrypted value. #[serde(rename = "i")] identifier: Identifier, @@ -483,10 +500,31 @@ fn to_eql_encrypted( // Instead, we use `map_err`. .map_err(|err| Error::Base85(err.to_string()))?; + let ste_vec_entries: Result, Error> = ste_vec_index + .into_iter() + .map(|entry| { + let record = entry + .record + .to_mp_base85() + // The error type from `to_mp_base85` isn't public, so we don't derive an error for this one. + // Instead, we use `map_err`. + .map_err(|err| Error::Base85(err.to_string()))?; + + Ok(SteVecEntry { + tokenized_selector: hex::encode(entry.tokenized_selector.as_bytes()), + term: hex::encode(&serde_json::to_vec(&entry.term).map_err(Error::Parse)?), + record, + parent_is_array: entry.parent_is_array, + }) + }) + .collect(); + + let ste_vec_entries = ste_vec_entries?; + Ok(Encrypted::SteVec { ciphertext, data_type: cast_as.to_string(), - ste_vec_index, + ste_vec_index: ste_vec_entries, identifier: identifier.to_owned(), version: 2, }) @@ -934,18 +972,27 @@ mod lib { #[test] fn test_encrypted_stevec_json_format() { - let json = r#"{"k":"sv","c":"test-ciphertext","dt":"jsonb","sv":[],"i":{"t":"docs","c":"content"},"v":2}"#; + let json = r#"{"k":"sv","c":"test-ciphertext","dt":"jsonb","sv":[{"s":"test-selector","t":"test-term","r":"test-record","pa":false}],"i":{"t":"test_table","c":"test_column"},"v":2}"#; let parsed: serde_json::Value = serde_json::from_str(json).unwrap(); assert_eq!(parsed["k"], "sv"); assert_eq!(parsed["c"], "test-ciphertext"); assert_eq!(parsed["dt"], "jsonb"); - assert_eq!(parsed["sv"], serde_json::Value::Array(vec![])); - assert_eq!(parsed["v"], 2); + + let sv_array = parsed["sv"].as_array().unwrap(); + assert_eq!(sv_array.len(), 1); + + let sv_entry = &sv_array[0]; + assert_eq!(sv_entry["s"], "test-selector"); + assert_eq!(sv_entry["t"], "test-term"); + assert_eq!(sv_entry["r"], "test-record"); + assert_eq!(sv_entry["pa"], false); let identifier = &parsed["i"]; - assert_eq!(identifier["t"], "docs"); - assert_eq!(identifier["c"], "content"); + assert_eq!(identifier["t"], "test_table"); + assert_eq!(identifier["c"], "test_column"); + + assert_eq!(parsed["v"], 2); } #[test] diff --git a/include/protectphp.h b/include/protectphp.h index f7ff10d..3c8fd38 100644 --- a/include/protectphp.h +++ b/include/protectphp.h @@ -1,7 +1,7 @@ /** * C interface for the Protect.php FFI library. * - * This header provides the C interface for the CipherStash Client, + * This header provides the C interface for the CipherStash Client SDK, * enabling integration through PHP's Foreign Function Interface (FFI). * All functions declared here are exposed by the underlying Rust library. */ diff --git a/platforms/darwin-arm64/libprotect_ffi.dylib b/platforms/darwin-arm64/libprotect_ffi.dylib index ddb4612..b8ee7b1 100644 Binary files a/platforms/darwin-arm64/libprotect_ffi.dylib and b/platforms/darwin-arm64/libprotect_ffi.dylib differ diff --git a/platforms/darwin-x64/libprotect_ffi.dylib b/platforms/darwin-x64/libprotect_ffi.dylib index 2f9f33e..09ded9d 100644 Binary files a/platforms/darwin-x64/libprotect_ffi.dylib and b/platforms/darwin-x64/libprotect_ffi.dylib differ diff --git a/platforms/linux-arm64-gnu/libprotect_ffi.so b/platforms/linux-arm64-gnu/libprotect_ffi.so index 9366501..98853b3 100644 Binary files a/platforms/linux-arm64-gnu/libprotect_ffi.so and b/platforms/linux-arm64-gnu/libprotect_ffi.so differ diff --git a/platforms/linux-x64-gnu/libprotect_ffi.so b/platforms/linux-x64-gnu/libprotect_ffi.so index 270a4e1..b2c7384 100644 Binary files a/platforms/linux-x64-gnu/libprotect_ffi.so and b/platforms/linux-x64-gnu/libprotect_ffi.so differ diff --git a/platforms/win32-x64-msvc/protect_ffi.dll b/platforms/win32-x64-msvc/protect_ffi.dll index ead9a01..69da079 100644 Binary files a/platforms/win32-x64-msvc/protect_ffi.dll and b/platforms/win32-x64-msvc/protect_ffi.dll differ diff --git a/tests/Integration/ClientTest.php b/tests/Integration/ClientTest.php index 02606e4..68d25e8 100644 --- a/tests/Integration/ClientTest.php +++ b/tests/Integration/ClientTest.php @@ -41,7 +41,7 @@ public static function setUpBeforeClass(): void 'cast_as' => 'jsonb', 'indexes' => [ 'ste_vec' => [ - 'prefix' => 'users/metadata', + 'prefix' => 'users.metadata', ], ], ], @@ -647,17 +647,17 @@ public function test_encrypt_decrypt_bulk_roundtrip(): void foreach ($metadataResult['sv'] as $svEntry) { $this->assertIsArray($svEntry); - $this->assertArrayHasKey('tokenized_selector', $svEntry); - $this->assertArrayHasKey('term', $svEntry); - $this->assertArrayHasKey('record', $svEntry); - $this->assertArrayHasKey('parent_is_array', $svEntry); - $this->assertIsString($svEntry['tokenized_selector']); - $this->assertIsString($svEntry['term']); - $this->assertIsString($svEntry['record']); - $this->assertIsBool($svEntry['parent_is_array']); - $this->assertNotEmpty($svEntry['tokenized_selector']); - $this->assertNotEmpty($svEntry['term']); - $this->assertNotEmpty($svEntry['record']); + $this->assertArrayHasKey('s', $svEntry); + $this->assertArrayHasKey('t', $svEntry); + $this->assertArrayHasKey('r', $svEntry); + $this->assertArrayHasKey('pa', $svEntry); + $this->assertIsString($svEntry['s']); + $this->assertIsString($svEntry['t']); + $this->assertIsString($svEntry['r']); + $this->assertIsBool($svEntry['pa']); + $this->assertNotEmpty($svEntry['s']); + $this->assertNotEmpty($svEntry['t']); + $this->assertNotEmpty($svEntry['r']); } $this->assertArrayHasKey('i', $metadataResult);