From ed123de984f50db560f8367b0b941dc8afcc804d Mon Sep 17 00:00:00 2001 From: Alex Newman Date: Sat, 30 May 2026 23:06:46 +0000 Subject: [PATCH 1/3] Reapply "Serve noise.ita_token so clients verify attestation without minting (#267)" This reverts commit 79fa0c909728ba265f62db733bca8dcd212e20b7. --- src/agent.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/agent.rs b/src/agent.rs index b49ff90..c6bcb0a 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -64,8 +64,14 @@ struct St { /// CP keys off this to look up the tunnel_id. agent_id: String, started: Instant, - /// Current Intel-signed JWT. Refreshed by a background task. + /// Current Intel-signed JWT minted over a freshness nonce. Used by the CP's + /// scrape-and-verify loop. Refreshed by a background task. ita_token: Arc>, + /// Intel-signed JWT minted over the *Noise* quote (binds the Noise pubkey in + /// `report_data`). Served as `noise.ita_token` so clients can verify the + /// agent's attestation against Intel's public JWKS without an ITA account of + /// their own — they verify, they don't mint. Refreshed alongside `ita_token`. + noise_ita_token: Arc>, /// Live set of per-workload ingress rules this agent has asked /// the CP to publish. Seeded from boot `cfg.extra_ingress`; /// appended each time a POSTed workload declares `expose`. The @@ -174,6 +180,34 @@ pub async fn run() -> Result<()> { .map_err(|e| Error::Internal(format!("noise keypair: {e}")))?, ); eprintln!("agent: noise_pubkey={}", hex::encode(attestor.public_key())); + + // Mint an ITA token over the Noise quote so clients can verify the agent's + // attestation against Intel's public JWKS without minting (and thus without + // an Intel account). Stable per boot; refreshed before expiry like the + // registration token. + let noise_ita_token = Arc::new(RwLock::new( + mint_noise_ita(&cfg, &attestor).await.unwrap_or_else(|e| { + eprintln!("agent: initial noise ITA mint failed ({e}); serving empty until refresh"); + String::new() + }), + )); + { + let cfg = cfg.clone(); + let attestor = attestor.clone(); + let token = noise_ita_token.clone(); + tokio::spawn(async move { + loop { + tokio::time::sleep(ITA_REFRESH).await; + match mint_noise_ita(&cfg, &attestor).await { + Ok(t) => *token.write().await = t, + Err(e) => { + eprintln!("agent: noise ITA refresh failed (keeping stale token): {e}") + } + } + } + }); + } + let ee_token = std::env::var("EE_TOKEN").ok(); let upstream = Arc::new(noise_gateway::upstream::EeAgent::new( std::path::PathBuf::from(noise_gateway::upstream::DEFAULT_EE_AGENT_SOCK), @@ -212,6 +246,7 @@ pub async fn run() -> Result<()> { agent_id: b.agent_id, started: Instant::now(), ita_token, + noise_ita_token, extras: Arc::new(RwLock::new(cfg.extra_ingress.clone())), gh, attest: attestor, @@ -487,6 +522,18 @@ async fn mint_ita(cfg: &Cfg, ee: &Ee) -> Result { ita::mint(&cfg.ita.base_url, &cfg.ita.api_key, "e_b64).await } +/// Mint an Intel-signed appraisal of the *Noise* quote (the one binding the +/// Noise pubkey into `report_data`). Served on `/health` as `noise.ita_token` +/// so clients can verify it against Intel's public JWKS without an account. +async fn mint_noise_ita(cfg: &Cfg, attest: &noise_gateway::attest::Attestor) -> Result { + if cfg.ita.mode == ItaMode::Local { + return ita::mint_local(&cfg.ita.issuer, &cfg.ita.api_key, &cfg.common.vm_name); + } + use base64::Engine; + let quote_b64 = base64::engine::general_purpose::STANDARD.encode(attest.quote()); + ita::mint(&cfg.ita.base_url, &cfg.ita.api_key, "e_b64).await +} + fn spawn_cloudflared(token: String) { tokio::spawn(async move { eprintln!("agent: spawning cloudflared"); @@ -546,6 +593,7 @@ async fn health(State(s): State) -> Json { .unwrap_or_default(); let m = metrics::collect().await; let ita_token = s.ita_token.read().await.clone(); + let noise_ita_token = s.noise_ita_token.read().await.clone(); let agent_owner = s.agent_owner.read().await.clone(); let oracles = s.oracles.read().await.clone(); let taint_reasons = s.taint.snapshot().await; @@ -630,6 +678,10 @@ async fn health(State(s): State) -> Json { "noise": { "quote_b64": base64::engine::general_purpose::STANDARD.encode(s.attest.quote()), "pubkey_hex": hex::encode(s.attest.public_key()), + // Intel-signed appraisal of the Noise quote. Clients verify this + // against Intel's public JWKS (no account) instead of minting their + // own. Empty if minting failed at boot / in local ITA mode. + "ita_token": noise_ita_token, }, })) } From 5275d25e69d65da06e6807b7ca0cbfa26d6fef5b Mon Sep 17 00:00:00 2001 From: Alex Newman Date: Sat, 30 May 2026 23:06:46 +0000 Subject: [PATCH 2/3] =?UTF-8?q?Reapply=20"Pin=20enclave=20measurement=20(M?= =?UTF-8?q?RTD/TCB)=20=E2=80=94=20attestation=20that=20proves=20our=20code?= =?UTF-8?q?=20[L1]=20(#269)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 055c5b6f878389188e4956644ea2466c892d33b9. --- src/collector.rs | 10 +++++ src/cp.rs | 27 ++++++++++++ src/ita.rs | 110 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 146 insertions(+), 1 deletion(-) diff --git a/src/collector.rs b/src/collector.rs index eed32db..d858a14 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -227,6 +227,7 @@ pub async fn run( cp_hostname: String, ee: Arc, verifier: Arc, + expected: Arc, wake: Arc, scrape_interval: Duration, discovery_interval: Duration, @@ -270,6 +271,7 @@ pub async fn run( &prefix, &ee, &verifier, + &expected, &env_label, &cp_hostname, discover, @@ -289,6 +291,7 @@ async fn tick( prefix: &str, ee: &Arc, verifier: &Arc, + expected: &Arc, env_label: &str, cp_hostname: &str, discover: bool, @@ -375,6 +378,13 @@ async fn tick( continue; } }; + if let Err(reason) = expected.check(&claims) { + if expected.enforce { + eprintln!("cp: collector: {name} measurement mismatch — dropping: {reason}"); + continue; + } + eprintln!("cp: collector: {name} measurement mismatch (not enforced): {reason}"); + } // Store key is the tunnel name (authoritative on the CP side), // NOT the agent's self-reported agent_id. let mut s = store.lock().await; diff --git a/src/cp.rs b/src/cp.rs index f659cc6..87e164c 100644 --- a/src/cp.rs +++ b/src/cp.rs @@ -48,6 +48,9 @@ struct St { collector_wake: Arc, started: Instant, verifier: Arc, + /// Expected enclave measurement allowlist (MRTD/TCB). Pins the fleet to + /// known-good code; unpinned = observe-only. + expected: Arc, /// The CP's own ITA token. Refreshed by a background task. cp_ita_token: Arc>, /// GH OIDC verifier for `/api/agents` callers (CI, humans). Same @@ -289,6 +292,18 @@ pub async fn run() -> Result<()> { // Start the collector with the verifier. It re-verifies each // scraped agent's ita_token, so expired / revoked / unsigned // agents drop off the dashboard automatically. + let expected = Arc::new(ita::ExpectedMeasurements::from_env()); + if expected.is_pinned() { + eprintln!( + "cp: measurement pinning ON ({} mrtd(s), tcb={}, enforce={})", + expected.mrtds.len(), + expected.tcb_status.as_deref().unwrap_or("any"), + expected.enforce + ); + } else { + eprintln!("cp: measurement pinning OFF (DD_EXPECTED_MRTD unset) — observe only"); + } + let collector_wake = Arc::new(Notify::new()); tokio::spawn(collector::run( store.clone(), @@ -297,6 +312,7 @@ pub async fn run() -> Result<()> { cfg.hostname.clone(), ee.clone(), verifier.clone(), + expected.clone(), collector_wake.clone(), Duration::from_secs(cfg.scrape_interval_secs), Duration::from_secs(cfg.discovery_interval_secs), @@ -313,6 +329,7 @@ pub async fn run() -> Result<()> { collector_wake, started: Instant::now(), verifier, + expected, cp_ita_token, gh, }; @@ -495,6 +512,16 @@ async fn register( ita_claims.mrtd.as_deref().unwrap_or("?"), ita_claims.tcb_status.as_deref().unwrap_or("?") ); + if let Err(reason) = s.expected.check(&ita_claims) { + if s.expected.enforce { + eprintln!("cp: REJECT register {}: {reason}", req.vm_name); + return Err(Error::Unauthorized); + } + eprintln!( + "cp: WARN register {} measurement mismatch (not enforced): {reason}", + req.vm_name + ); + } let http = cf::http_client(); let name = cf::agent_tunnel_name(&s.cfg.common.env_label); diff --git a/src/ita.rs b/src/ita.rs index 603ce33..b9c7f96 100644 --- a/src/ita.rs +++ b/src/ita.rs @@ -67,12 +67,83 @@ impl Claims { attester_type: get("attester_type"), mrtd: get("tdx_mrtd"), mrsigner: get("tdx_mrsigner"), - report_data: get("attester_held_data"), + // Real Intel tokens carry the quote's report_data as `tdx_report_data`; + // the local dev issuer (`mint_local`) puts it in `attester_held_data`. + report_data: get("tdx_report_data").or_else(|| get("attester_held_data")), extra: v, } } } +/// Expected enclave measurement allowlist — pins attestation to known-good code. +/// +/// Sourced from env (see [`Self::from_env`]). When no MRTD is configured the +/// fleet/measurement is *unpinned*: [`Self::check`] passes everything (the agent +/// MRTD is still logged at register/scrape). Once pinned, mismatches are rejected +/// (or, with enforcement off, logged as a warning for canary rollout). +#[derive(Debug, Clone, Default)] +pub struct ExpectedMeasurements { + /// Accepted MRTDs (lowercase hex), any-of. Empty = unpinned. + pub mrtds: Vec, + /// Required TCB status (e.g. "UpToDate"); defaults to "UpToDate" once pinned. + pub tcb_status: Option, + /// Reject on mismatch (true) vs. warn-and-allow (false, canary). + pub enforce: bool, +} + +impl ExpectedMeasurements { + /// `DD_EXPECTED_MRTD` = comma/space-separated hex MRTDs (empty = unpinned). + /// `DD_EXPECTED_TCB` = required TCB status (default "UpToDate" when pinned). + /// `DD_MEASUREMENT_ENFORCE` = "0"/"false" to warn instead of reject. + pub fn from_env() -> Self { + let mrtds: Vec = std::env::var("DD_EXPECTED_MRTD") + .unwrap_or_default() + .split([',', ' ', '\n', '\t']) + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(); + let tcb_status = std::env::var("DD_EXPECTED_TCB") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .or_else(|| (!mrtds.is_empty()).then(|| "UpToDate".to_string())); + let enforce = !matches!( + std::env::var("DD_MEASUREMENT_ENFORCE").as_deref(), + Ok("0") | Ok("false") | Ok("no") + ); + Self { + mrtds, + tcb_status, + enforce, + } + } + + pub fn is_pinned(&self) -> bool { + !self.mrtds.is_empty() + } + + /// `Ok(())` if unpinned or matching; `Err(reason)` if pinned and mismatched. + pub fn check(&self, claims: &Claims) -> std::result::Result<(), String> { + if self.mrtds.is_empty() { + return Ok(()); + } + let mrtd = claims.mrtd.as_deref().unwrap_or("").to_lowercase(); + if !self.mrtds.contains(&mrtd) { + return Err(format!( + "mrtd {} not in expected allowlist", + if mrtd.is_empty() { "" } else { &mrtd } + )); + } + if let Some(want) = &self.tcb_status { + let got = claims.tcb_status.as_deref().unwrap_or(""); + if got != want { + return Err(format!("tcb_status {got:?} != expected {want:?}")); + } + } + Ok(()) + } +} + // ── Minter ────────────────────────────────────────────────────────────── #[derive(Serialize)] @@ -315,4 +386,41 @@ mod tests { assert_eq!(c.report_data.as_deref(), Some("cc")); assert_eq!(c.extra, v); } + + fn claims_with(mrtd: &str, tcb: &str) -> Claims { + Claims { + mrtd: Some(mrtd.into()), + tcb_status: Some(tcb.into()), + ..Default::default() + } + } + + #[test] + fn unpinned_allows_anything() { + let exp = ExpectedMeasurements::default(); + assert!(!exp.is_pinned()); + assert!(exp.check(&claims_with("deadbeef", "OutOfDate")).is_ok()); + } + + #[test] + fn pinned_accepts_match_rejects_mismatch() { + let exp = ExpectedMeasurements { + mrtds: vec!["aa".into(), "bb".into()], + tcb_status: Some("UpToDate".into()), + enforce: true, + }; + assert!(exp.check(&claims_with("bb", "UpToDate")).is_ok()); + assert!(exp.check(&claims_with("cc", "UpToDate")).is_err()); // wrong mrtd + assert!(exp.check(&claims_with("aa", "OutOfDate")).is_err()); // bad tcb + } + + #[test] + fn mrtd_match_is_case_insensitive() { + let exp = ExpectedMeasurements { + mrtds: vec!["abcd".into()], + tcb_status: None, + enforce: true, + }; + assert!(exp.check(&claims_with("ABCD", "UpToDate")).is_ok()); + } } From 5e3230a733820fe15739e41244c785c2f62b6bc0 Mon Sep 17 00:00:00 2001 From: Alex Newman Date: Sat, 30 May 2026 23:06:46 +0000 Subject: [PATCH 3/3] Reapply "Encrypt session history end-to-end to paired device keys (#268)" This reverts commit 3da4805f6ed29ddcf135f69869a9898f7bb0491e. --- Cargo.lock | 10 ++ Cargo.toml | 1 + src/agent.rs | 37 +++++ src/devices.rs | 13 ++ src/sessiond.rs | 388 ++++++++++++++++++++++++++++++++++++++---------- 5 files changed, 368 insertions(+), 81 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 865fe94..d18f58e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,6 +373,7 @@ dependencies = [ "futures-util", "hex", "hickory-resolver", + "hkdf", "hmac", "jsonwebtoken", "libc", @@ -665,6 +666,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" diff --git a/Cargo.toml b/Cargo.toml index cd7cf19..be21109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ chacha20poly1305 = "0.10" chrono = { version = "0.4", features = ["serde"] } hex = "0.4" futures-util = "0.3" +hkdf = "0.12" hickory-resolver = { version = "0.25", default-features = false, features = ["tokio"] } hmac = "0.12" jsonwebtoken = "9" diff --git a/src/agent.rs b/src/agent.rs index c6bcb0a..99435b1 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -258,6 +258,23 @@ pub async fn run() -> Result<()> { http: crate::system_http_client(), devices, }; + // Seed sessiond's E2E history recipient set in the background, retrying + // until sessiond is up. Without this, sessiond starts with no recipients + // and (correctly) refuses to persist history until a device mutation pushes + // the set. Re-pushed on every later enroll/revoke from their handlers. + { + let seed_state = state.clone(); + tokio::spawn(async move { + for _ in 0..60 { + if push_history_recipients(&seed_state).await.is_ok() { + return; + } + tokio::time::sleep(Duration::from_secs(2)).await; + } + eprintln!("agent: gave up seeding sessiond history recipients after retries"); + }); + } + let api_state = state.clone(); let api_ng_state = ng_state.clone(); @@ -1022,6 +1039,9 @@ async fn create_device( .upsert(device.clone()) .await .map_err(|e| Error::Internal(format!("devices upsert: {e}")))?; + if let Err(e) = push_history_recipients(&s).await { + eprintln!("agent: failed to push history recipients after enroll: {e}"); + } Ok((axum::http::StatusCode::CREATED, Json(device))) } @@ -1043,6 +1063,9 @@ async fn revoke_device( if !ok { return Err(Error::NotFound); } + if let Err(e) = push_history_recipients(&s).await { + eprintln!("agent: failed to push history recipients after revoke: {e}"); + } Ok(Json(serde_json::json!({ "revoked": pubkey, "at_ms": now, @@ -1196,6 +1219,20 @@ async fn attach_to_sessiond( Ok(()) } +/// Push the current non-revoked device pubkey set to `dd-sessiond` so it seals +/// persisted history to exactly those recipients. Called at startup (with retry, +/// since sessiond may still be coming up) and after every device mutation. +async fn push_history_recipients(s: &St) -> Result<()> { + let pubkeys = s.devices.live_pubkeys().await; + sessiond_post_empty_json( + s, + "/api/recipients", + &serde_json::json!({ "pubkeys": pubkeys }), + ) + .await + .map(|_| ()) +} + async fn sessiond_get(s: &St, path: &str) -> Result { let url = format!("{}{}", s.sessiond_http_url.trim_end_matches('/'), path); let resp = s.http.get(url).send().await?; diff --git a/src/devices.rs b/src/devices.rs index 3b9110a..bfd34dd 100644 --- a/src/devices.rs +++ b/src/devices.rs @@ -75,6 +75,19 @@ impl Store { self.inner.read().await.values().cloned().collect() } + /// Hex pubkeys of all non-revoked devices — the recipient set that + /// `dd-sessiond` seals persisted history to. Mirrors the membership of the + /// noise-gateway [`TrustHandle`], but as hex strings ready to POST. + pub async fn live_pubkeys(&self) -> Vec { + self.inner + .read() + .await + .values() + .filter(|d| d.revoked_at_ms.is_none()) + .map(|d| d.pubkey.clone()) + .collect() + } + pub async fn upsert(&self, device: Device) -> anyhow::Result<()> { { let mut w = self.inner.write().await; diff --git a/src/sessiond.rs b/src/sessiond.rs index 51bbf9f..a76ae67 100644 --- a/src/sessiond.rs +++ b/src/sessiond.rs @@ -21,9 +21,10 @@ use axum::{Json, Router}; use base64::Engine as _; use chacha20poly1305::aead::{Aead, KeyInit}; use chacha20poly1305::{ChaCha20Poly1305, Key, Nonce}; +use hkdf::Hkdf; use rand::RngCore; use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; +use sha2::Sha256; use tokio::fs::File as TokioFile; use tokio::fs::OpenOptions; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; @@ -31,6 +32,7 @@ use tokio::net::{TcpListener, TcpStream}; use tokio::process::{Child, Command}; use tokio::sync::{broadcast, Mutex, RwLock}; use uuid::Uuid; +use x25519_dalek::{PublicKey, StaticSecret}; use crate::error::{Error, Result}; use crate::taint::IntegrityState; @@ -136,12 +138,19 @@ exec "$PODMAN" run --rm --replace -it --pull=missing \ /bin/sh "#; +/// Live set of paired device X25519 pubkeys that persisted history is +/// end-to-end-encrypted to. Pushed in by `dd-agent` (which owns the device +/// registry) via `POST /api/recipients`. The enclave can encrypt to these but, +/// holding no device private key, cannot decrypt the result. +type Recipients = Arc>>; + #[derive(Clone)] struct App { sessions: Arc>>>, store: TranscriptStore, recipes: Arc>, scratch_root: PathBuf, + recipients: Recipients, } struct Session { @@ -172,6 +181,20 @@ pub struct SessionMeta { pub updated_at: i64, pub status: SessionStatus, pub exit_code: Option, + /// Whether persisted scrollback is being captured (E2E) or dropped because + /// no device is paired to decrypt it. Reflects the recipient set at session + /// start. + pub history: HistoryState, +} + +#[derive(Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum HistoryState { + /// History is sealed to paired device pubkeys; the enclave cannot read it. + E2e, + /// No paired device at session start — history capture is disabled rather + /// than fall back to a server-readable key. + DisabledNoDevice, } #[derive(Clone, Deserialize, Serialize)] @@ -222,10 +245,19 @@ pub struct ResizeSession { pub rows: u16, } +/// Replay returns the raw end-to-end-encrypted records. The enclave does not +/// (and cannot) decrypt them — the paired device decrypts client-side. `records` +/// are the on-disk sealed lines in order; see [`SealedLine`] for the format. #[derive(Deserialize, Serialize)] pub struct ReplayResponse { pub id: String, - pub bytes_b64: String, + pub version: u32, + pub records: Vec, +} + +#[derive(Deserialize)] +struct SetRecipients { + pubkeys: Vec, } struct RecipeSeed { @@ -239,7 +271,10 @@ struct RecipeSeed { #[derive(Clone)] struct TranscriptStore { dir: PathBuf, - key: [u8; 32], + recipients: Recipients, + /// Set once we've logged the "no paired devices" warning, so it isn't + /// repeated on every PTY chunk. + warned_no_recipients: Arc, } #[derive(Serialize, Deserialize)] @@ -249,6 +284,26 @@ struct TranscriptRecord { data_b64: String, } +/// One sealed transcript line. `body` is the record encrypted under a random +/// content key (CEK); each `rcpts` stanza wraps that CEK to one device pubkey +/// via an ephemeral X25519 key + HKDF-SHA256. The enclave discards the CEK and +/// ephemeral secrets after sealing, so only a holder of a device private key can +/// recover the plaintext. +#[derive(Serialize, Deserialize)] +struct SealedLine { + v: u32, + rcpts: Vec, + bn: String, + body: String, +} + +#[derive(Serialize, Deserialize)] +struct RecipientStanza { + epk: String, + n: String, + wk: String, +} + pub async fn run() -> Result<()> { let http_addr = std::env::var("DD_SESSIOND_HTTP_ADDR").unwrap_or_else(|_| DEFAULT_HTTP_ADDR.to_string()); @@ -265,7 +320,8 @@ pub async fn run() -> Result<()> { let root = PathBuf::from(&dir); wait_for_required_mounts(&[root.as_path(), scratch_root.as_path()]).await?; - let store = TranscriptStore::new(root.clone()).await?; + let recipients: Recipients = Arc::new(RwLock::new(Vec::new())); + let store = TranscriptStore::new(root.clone(), recipients.clone()).await?; tokio::fs::create_dir_all(&scratch_root).await?; set_private_dir_permissions(&scratch_root).await?; let recipe_dir = root.join("recipes"); @@ -278,6 +334,7 @@ pub async fn run() -> Result<()> { store, recipes, scratch_root, + recipients, }; { @@ -296,6 +353,7 @@ pub async fn run() -> Result<()> { .route("/api/sessions/{id}/replay", get(replay_session)) .route("/api/sessions/{id}/resize", post(resize_session)) .route("/api/sessions/{id}/close", post(close_session)) + .route("/api/recipients", post(set_recipients)) .with_state(app_state); eprintln!("dd-sessiond: http listening on {http_addr}"); @@ -370,6 +428,12 @@ async fn create_session( let master_fd = input.as_raw_fd(); let (tx, _) = broadcast::channel(512); + let history = if app.recipients.read().await.is_empty() { + HistoryState::DisabledNoDevice + } else { + HistoryState::E2e + }; + let meta = SessionMeta { id: id.clone(), name, @@ -385,6 +449,7 @@ async fn create_session( updated_at: now, status: SessionStatus::Running, exit_code: None, + history, }; app.store.append_meta(&meta).await?; @@ -414,13 +479,38 @@ async fn replay_session( State(app): State, AxPath(id): AxPath, ) -> Result> { - let bytes = app.store.replay(&id).await?; + let records = app.store.replay(&id).await?; Ok(Json(ReplayResponse { id, - bytes_b64: base64::engine::general_purpose::STANDARD.encode(bytes), + version: 2, + records, })) } +/// Loopback-only: `dd-agent` pushes the current non-revoked device pubkey set +/// here whenever it changes. Subsequent transcript records are sealed to this +/// set. Replaces the set wholesale. +async fn set_recipients( + State(app): State, + Json(req): Json, +) -> Result { + let mut parsed = Vec::with_capacity(req.pubkeys.len()); + for pk in &req.pubkeys { + let bytes = hex::decode(pk) + .map_err(|_| Error::BadRequest("recipient pubkey must be hex".into()))?; + if bytes.len() != 32 { + return Err(Error::BadRequest( + "recipient pubkey must decode to 32 bytes".into(), + )); + } + let mut k = [0u8; 32]; + k.copy_from_slice(&bytes); + parsed.push(k); + } + *app.recipients.write().await = parsed; + Ok(StatusCode::NO_CONTENT) +} + async fn resize_session( State(app): State, AxPath(id): AxPath, @@ -913,10 +1003,13 @@ async fn push_ring(session: &Session, bytes: &[u8]) { } impl TranscriptStore { - async fn new(dir: PathBuf) -> Result { + async fn new(dir: PathBuf, recipients: Recipients) -> Result { tokio::fs::create_dir_all(&dir).await?; - let key = history_key(&dir).await?; - Ok(Self { dir, key }) + Ok(Self { + dir, + recipients, + warned_no_recipients: Arc::new(std::sync::atomic::AtomicBool::new(false)), + }) } async fn append_meta(&self, meta: &SessionMeta) -> Result<()> { @@ -925,13 +1018,28 @@ impl TranscriptStore { } async fn append_bytes(&self, id: &str, kind: &str, bytes: &[u8]) -> Result<()> { + let recipients = self.recipients.read().await.clone(); + if recipients.is_empty() { + // No paired device means nothing can ever decrypt this history. We + // refuse to persist rather than fall back to a server-readable key, + // which would defeat end-to-end encryption. Warn once. + use std::sync::atomic::Ordering; + if !self.warned_no_recipients.swap(true, Ordering::Relaxed) { + eprintln!( + "dd-sessiond: no paired devices; history capture disabled \ + (end-to-end encryption requires at least one recipient)" + ); + } + return Ok(()); + } + let record = TranscriptRecord { ts: unix_ts(), kind: kind.to_string(), data_b64: base64::engine::general_purpose::STANDARD.encode(bytes), }; let plain = serde_json::to_vec(&record).map_err(|e| Error::Internal(e.to_string()))?; - let line = self.encrypt_line(&plain)?; + let line = seal_record(&recipients, &plain)?; let path = self.path(id); let mut f = OpenOptions::new() .create(true) @@ -943,92 +1051,84 @@ impl TranscriptStore { Ok(()) } - async fn replay(&self, id: &str) -> Result> { + /// Returns the raw sealed records in order. The enclave does not decrypt + /// them — the caller's paired device does, client-side. + async fn replay(&self, id: &str) -> Result> { let path = self.path(id); if !Path::new(&path).exists() { return Err(Error::NotFound); } let text = tokio::fs::read_to_string(path).await?; - let mut out = Vec::new(); - for line in text.lines().filter(|l| !l.trim().is_empty()) { - let plain = self.decrypt_line(line)?; - let record: TranscriptRecord = - serde_json::from_slice(&plain).map_err(|e| Error::Internal(e.to_string()))?; - if record.kind == "pty" || record.kind == "stdout" || record.kind == "stderr" { - let bytes = base64::engine::general_purpose::STANDARD - .decode(record.data_b64) - .map_err(|e| Error::Internal(e.to_string()))?; - out.extend_from_slice(&bytes); - } - } - Ok(out) + Ok(text + .lines() + .filter(|l| !l.trim().is_empty()) + .map(|l| l.to_string()) + .collect()) } fn path(&self, id: &str) -> PathBuf { self.dir.join(format!("{id}.log.enc")) } - - fn encrypt_line(&self, plain: &[u8]) -> Result { - let cipher = ChaCha20Poly1305::new(Key::from_slice(&self.key)); - let mut nonce = [0u8; 12]; - rand::thread_rng().fill_bytes(&mut nonce); - let ciphertext = cipher - .encrypt(Nonce::from_slice(&nonce), plain) - .map_err(|e| Error::Internal(format!("encrypt transcript: {e}")))?; - let mut packed = nonce.to_vec(); - packed.extend_from_slice(&ciphertext); - Ok(base64::engine::general_purpose::STANDARD.encode(packed)) - } - - fn decrypt_line(&self, line: &str) -> Result> { - let packed = base64::engine::general_purpose::STANDARD - .decode(line) - .map_err(|e| Error::Internal(e.to_string()))?; - if packed.len() < 13 { - return Err(Error::Internal("truncated transcript record".into())); - } - let (nonce, ciphertext) = packed.split_at(12); - let cipher = ChaCha20Poly1305::new(Key::from_slice(&self.key)); - cipher - .decrypt(Nonce::from_slice(nonce), ciphertext) - .map_err(|e| Error::Internal(format!("decrypt transcript: {e}"))) - } } -async fn history_key(dir: &Path) -> Result<[u8; 32]> { - if let Ok(raw) = std::env::var("DD_SESSIOND_HISTORY_KEY") { - let bytes = base64::engine::general_purpose::STANDARD - .decode(raw.trim()) - .or_else(|_| hex::decode(raw.trim())) - .map_err(|_| { - Error::BadRequest("DD_SESSIOND_HISTORY_KEY must be base64 or hex".into()) - })?; - if bytes.len() != 32 { - return Err(Error::BadRequest( - "DD_SESSIOND_HISTORY_KEY must decode to 32 bytes".into(), - )); - } - let mut key = [0u8; 32]; - key.copy_from_slice(&bytes); - return Ok(key); - } +/// Domain-separated per-recipient key-wrapping key from an X25519 shared secret. +fn derive_wrap_key(shared: &[u8; 32], epk: &[u8; 32], rpk: &[u8; 32]) -> [u8; 32] { + let hk = Hkdf::::new(None, shared); + let mut info = Vec::with_capacity(b"dd-sessiond-e2e-v2".len() + 64); + info.extend_from_slice(b"dd-sessiond-e2e-v2"); + info.extend_from_slice(epk); + info.extend_from_slice(rpk); + let mut out = [0u8; 32]; + hk.expand(&info, &mut out) + .expect("hkdf expand of 32 bytes never fails"); + out +} - let key_path = dir.join("history.key"); - if let Ok(bytes) = tokio::fs::read(&key_path).await { - if bytes.len() == 32 { - let mut key = [0u8; 32]; - key.copy_from_slice(&bytes); - return Ok(key); - } +/// Seal `plain` to every recipient pubkey. A random content key encrypts the +/// body; that content key is wrapped to each recipient via an ephemeral X25519 +/// key + HKDF. The ephemeral secrets and content key are dropped on return, so +/// the enclave cannot recover the plaintext afterwards. +fn seal_record(recipients: &[[u8; 32]], plain: &[u8]) -> Result { + let mut rng = rand::thread_rng(); + + let mut cek = [0u8; 32]; + rng.fill_bytes(&mut cek); + + let mut bn = [0u8; 12]; + rng.fill_bytes(&mut bn); + let body = ChaCha20Poly1305::new(Key::from_slice(&cek)) + .encrypt(Nonce::from_slice(&bn), plain) + .map_err(|e| Error::Internal(format!("seal transcript body: {e}")))?; + + let mut rcpts = Vec::with_capacity(recipients.len()); + for r in recipients { + let mut e_bytes = [0u8; 32]; + rng.fill_bytes(&mut e_bytes); + let e_sk = StaticSecret::from(e_bytes); + let e_pk = PublicKey::from(&e_sk); + let shared = e_sk.diffie_hellman(&PublicKey::from(*r)); + let wrap_key = derive_wrap_key(shared.as_bytes(), e_pk.as_bytes(), r); + + let mut n = [0u8; 12]; + rng.fill_bytes(&mut n); + let wk = ChaCha20Poly1305::new(Key::from_slice(&wrap_key)) + .encrypt(Nonce::from_slice(&n), cek.as_ref()) + .map_err(|e| Error::Internal(format!("wrap content key: {e}")))?; + + rcpts.push(RecipientStanza { + epk: hex::encode(e_pk.as_bytes()), + n: hex::encode(n), + wk: base64::engine::general_purpose::STANDARD.encode(wk), + }); } - let mut material = [0u8; 32]; - rand::thread_rng().fill_bytes(&mut material); - tokio::fs::write(&key_path, material).await?; - let mut hasher = Sha256::new(); - hasher.update(material); - hasher.update(b"dd-sessiond-history-v1"); - Ok(hasher.finalize().into()) + let line = SealedLine { + v: 2, + rcpts, + bn: hex::encode(bn), + body: base64::engine::general_purpose::STANDARD.encode(body), + }; + serde_json::to_string(&line).map_err(|e| Error::Internal(e.to_string())) } fn unix_ts() -> i64 { @@ -1037,3 +1137,129 @@ fn unix_ts() -> i64 { .unwrap_or(Duration::ZERO) .as_secs() as i64 } + +#[cfg(test)] +mod tests { + use super::*; + + const B64: base64::engine::general_purpose::GeneralPurpose = + base64::engine::general_purpose::STANDARD; + + /// A test client device. Holds the X25519 private key the enclave never sees. + struct TestDevice { + sk: StaticSecret, + pk: [u8; 32], + } + + impl TestDevice { + fn new(seed: u8) -> Self { + let sk = StaticSecret::from([seed; 32]); + let pk = *PublicKey::from(&sk).as_bytes(); + Self { sk, pk } + } + } + + fn hex32(s: &str) -> [u8; 32] { + let v = hex::decode(s).unwrap(); + let mut k = [0u8; 32]; + k.copy_from_slice(&v); + k + } + fn hex12(s: &str) -> [u8; 12] { + let v = hex::decode(s).unwrap(); + let mut k = [0u8; 12]; + k.copy_from_slice(&v); + k + } + + /// Client-side decryption — the operation the enclave structurally cannot + /// perform (it holds no device private key). Mirrors what dd-client does. + fn open_record(device: &TestDevice, line: &str) -> Option> { + let parsed: SealedLine = serde_json::from_str(line).ok()?; + let bn = hex12(&parsed.bn); + let body = B64.decode(&parsed.body).ok()?; + for st in &parsed.rcpts { + let epk = hex32(&st.epk); + let shared = device.sk.diffie_hellman(&PublicKey::from(epk)); + let wrap_key = derive_wrap_key(shared.as_bytes(), &epk, &device.pk); + let n = hex12(&st.n); + let wk = B64.decode(&st.wk).ok()?; + if let Ok(cek) = ChaCha20Poly1305::new(Key::from_slice(&wrap_key)) + .decrypt(Nonce::from_slice(&n), wk.as_ref()) + { + let cek: [u8; 32] = cek.try_into().ok()?; + return ChaCha20Poly1305::new(Key::from_slice(&cek)) + .decrypt(Nonce::from_slice(&bn), body.as_ref()) + .ok(); + } + } + None + } + + #[test] + fn round_trip_single_recipient() { + let dev = TestDevice::new(1); + let plain = b"codex: hello world"; + let line = seal_record(&[dev.pk], plain).unwrap(); + // On-disk line is opaque: plaintext must not appear anywhere in it. + assert!(!line.as_bytes().windows(plain.len()).any(|w| w == plain)); + assert_eq!(open_record(&dev, &line).unwrap(), plain); + } + + #[test] + fn multi_recipient_each_opens_outsider_cannot() { + let a = TestDevice::new(1); + let b = TestDevice::new(2); + let c = TestDevice::new(3); // never a recipient + let plain = b"shared scrollback"; + let line = seal_record(&[a.pk, b.pk], plain).unwrap(); + assert_eq!(open_record(&a, &line).unwrap(), plain); + assert_eq!(open_record(&b, &line).unwrap(), plain); + assert!(open_record(&c, &line).is_none()); + } + + #[tokio::test] + async fn no_recipients_persists_nothing() { + let dir = tempfile::tempdir().unwrap(); + let recipients: Recipients = Arc::new(RwLock::new(Vec::new())); + let store = TranscriptStore::new(dir.path().to_path_buf(), recipients) + .await + .unwrap(); + store + .append_bytes("sess", "pty", b"secret bytes") + .await + .unwrap(); + // Nothing written, and replay reports no history rather than leaking. + assert!(!Path::new(&store.path("sess")).exists()); + assert!(matches!(store.replay("sess").await, Err(Error::NotFound))); + } + + #[tokio::test] + async fn recipient_set_changes_apply_to_subsequent_records() { + let a = TestDevice::new(1); + let b = TestDevice::new(2); + let dir = tempfile::tempdir().unwrap(); + let recipients: Recipients = Arc::new(RwLock::new(vec![a.pk])); + let store = TranscriptStore::new(dir.path().to_path_buf(), recipients.clone()) + .await + .unwrap(); + + store.append_bytes("s", "pty", b"first").await.unwrap(); + *recipients.write().await = vec![b.pk]; + store.append_bytes("s", "pty", b"second").await.unwrap(); + + let lines = store.replay("s").await.unwrap(); + assert_eq!(lines.len(), 2); + + // Record 1 is readable only by A, record 2 only by B. + let r1: TranscriptRecord = + serde_json::from_slice(&open_record(&a, &lines[0]).unwrap()).unwrap(); + assert_eq!(B64.decode(r1.data_b64).unwrap(), b"first"); + assert!(open_record(&b, &lines[0]).is_none()); + + let r2: TranscriptRecord = + serde_json::from_slice(&open_record(&b, &lines[1]).unwrap()).unwrap(); + assert_eq!(B64.decode(r2.data_b64).unwrap(), b"second"); + assert!(open_record(&a, &lines[1]).is_none()); + } +}