diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 433077e910..16557a7d91 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -103,3 +103,4 @@ tools/sidecar_mockgen/ @DataDog/libdatadog-php libdd-data-pipeline/src/otlp/ @DataDog/apm-sdk-capabilities-rust libdd-data-pipeline/tests/test_trace_exporter_otlp_export.rs @DataDog/apm-sdk-capabilities-rust libdd-trace-utils/src/otlp_encoder/ @DataDog/apm-sdk-capabilities-rust +datadog-sidecar/src/service/ffe_exposures_flusher.rs @DataDog/libdatadog-php @DataDog/libdatadog-apm @DataDog/feature-flagging-and-experimentation-sdk diff --git a/Cargo.lock b/Cargo.lock index ba5b18e3bb..5ce1bdcce2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1358,6 +1358,7 @@ dependencies = [ "faststr", "libdd-common", "log", + "lru", "md5", "pyo3", "semver", @@ -1523,6 +1524,7 @@ dependencies = [ "bincode", "chrono", "console-subscriber", + "datadog-ffe", "datadog-ipc", "datadog-ipc-macros", "datadog-live-debugger", diff --git a/datadog-ffe/Cargo.toml b/datadog-ffe/Cargo.toml index 358b4b4016..95b349ef86 100644 --- a/datadog-ffe/Cargo.toml +++ b/datadog-ffe/Cargo.toml @@ -24,7 +24,9 @@ serde-bool = { version = "0.1.3", default-features = false } serde_with = { version = "3.11.0", default-features = false, features = ["base64", "hex", "macros"] } thiserror = { version = "2.0.3", default-features = false } url = { version = "2.5.0", default-features = false, features = ["std"] } +lru = { version = "0.16.3", optional = true } pyo3 = { version = "0.28", optional = true, default-features = false, features = ["macros"] } [features] +exposure-events = ["dep:lru"] pyo3 = ["dep:pyo3"] diff --git a/datadog-ffe/src/lib.rs b/datadog-ffe/src/lib.rs index a32b8b757c..256d1a864e 100644 --- a/datadog-ffe/src/lib.rs +++ b/datadog-ffe/src/lib.rs @@ -4,5 +4,7 @@ mod flag_type; pub mod rules_based; +#[cfg(feature = "exposure-events")] +pub mod telemetry; pub use flag_type::{ExpectedFlagType, FlagType}; diff --git a/datadog-ffe/src/telemetry/exposures.rs b/datadog-ffe/src/telemetry/exposures.rs new file mode 100644 index 0000000000..20f6184378 --- /dev/null +++ b/datadog-ffe/src/telemetry/exposures.rs @@ -0,0 +1,318 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Reusable FFE exposure payload and deduplication primitives. + +use super::FfeTelemetryContext; +use lru::LruCache; +use serde::{Deserialize, Serialize}; +use std::num::NonZeroUsize; +use std::sync::{Arc, Mutex}; + +const DEFAULT_CACHE_LIMIT: usize = 65_536; + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct FfeExposureBatch { + pub context: FfeTelemetryContext, + pub exposures: Vec, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct FfeExposure { + pub timestamp_ms: u64, + pub flag_key: String, + pub subject_id: String, + /// JSON object encoded by the tracer. Invalid or non-object JSON is treated + /// as an empty object during EVP payload serialization. + pub subject_attributes_json: String, + pub allocation_key: String, + pub variant: String, +} + +#[derive(Clone)] +pub struct ExposureDeduplicator { + cache: Arc>>, +} + +impl Default for ExposureDeduplicator { + fn default() -> Self { + Self::new(DEFAULT_CACHE_LIMIT) + } +} + +impl ExposureDeduplicator { + pub fn new(limit: usize) -> Self { + let limit = NonZeroUsize::new(limit).unwrap_or(NonZeroUsize::MIN); + Self { + cache: Arc::new(Mutex::new(LruCache::new(limit))), + } + } + + pub fn should_send(&self, context: &FfeTelemetryContext, exposure: &FfeExposure) -> bool { + let key = ExposureCacheKey { + service: context.service.clone(), + env: context.env.clone(), + version: context.version.clone(), + flag_key: exposure.flag_key.clone(), + subject_id: exposure.subject_id.clone(), + }; + let value = ExposureCacheValue { + allocation_key: exposure.allocation_key.clone(), + variant: exposure.variant.clone(), + }; + + let mut cache = self.cache.lock().unwrap_or_else(|e| e.into_inner()); + if cache.get(&key).is_some_and(|cached| cached == &value) { + return false; + } + + cache.put(key, value); + true + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct ExposureCacheKey { + service: String, + env: String, + version: String, + flag_key: String, + subject_id: String, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct ExposureCacheValue { + allocation_key: String, + variant: String, +} + +pub fn encode_exposure_batch( + deduplicator: &ExposureDeduplicator, + batch: FfeExposureBatch, +) -> Result, serde_json::Error> { + let exposures = batch + .exposures + .into_iter() + .filter(is_complete) + .filter(|exposure| deduplicator.should_send(&batch.context, exposure)) + .map(ExposureEvent::from) + .collect::>(); + + if exposures.is_empty() { + return Ok(None); + } + + let payload = ExposurePayload { + context: ExposurePayloadContext::from(batch.context), + exposures, + }; + serde_json::to_string(&payload).map(Some) +} + +fn is_complete(exposure: &FfeExposure) -> bool { + !exposure.flag_key.is_empty() + && !exposure.allocation_key.is_empty() + && !exposure.variant.is_empty() +} + +#[derive(Serialize)] +struct ExposurePayload { + context: ExposurePayloadContext, + exposures: Vec, +} + +#[derive(Serialize)] +struct ExposurePayloadContext { + #[serde(skip_serializing_if = "String::is_empty")] + service: String, + #[serde(skip_serializing_if = "String::is_empty")] + env: String, + #[serde(skip_serializing_if = "String::is_empty")] + version: String, +} + +impl From for ExposurePayloadContext { + fn from(value: FfeTelemetryContext) -> Self { + Self { + service: value.service, + env: value.env, + version: value.version, + } + } +} + +#[derive(Serialize)] +struct ExposureEvent { + timestamp: u64, + allocation: Key, + flag: Key, + variant: Key, + subject: Subject, +} + +impl From for ExposureEvent { + fn from(value: FfeExposure) -> Self { + Self { + timestamp: value.timestamp_ms, + allocation: Key { + key: value.allocation_key, + }, + flag: Key { + key: value.flag_key, + }, + variant: Key { key: value.variant }, + subject: Subject { + id: value.subject_id, + attributes: subject_attributes(&value.subject_attributes_json), + }, + } + } +} + +#[derive(Serialize)] +struct Key { + key: String, +} + +#[derive(Serialize)] +struct Subject { + id: String, + attributes: serde_json::Map, +} + +fn subject_attributes(json: &str) -> serde_json::Map { + if json.is_empty() { + return serde_json::Map::new(); + } + + match serde_json::from_str::(json) { + Ok(serde_json::Value::Object(attrs)) => attrs, + Ok(_) | Err(_) => serde_json::Map::new(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::Value; + + fn context() -> FfeTelemetryContext { + FfeTelemetryContext { + service: "svc".to_owned(), + env: "prod".to_owned(), + version: "1".to_owned(), + } + } + + fn exposure(subject_id: &str, allocation_key: &str, variant: &str) -> FfeExposure { + FfeExposure { + timestamp_ms: 123, + flag_key: "flag".to_owned(), + subject_id: subject_id.to_owned(), + subject_attributes_json: r#"{"tier":"premium"}"#.to_owned(), + allocation_key: allocation_key.to_owned(), + variant: variant.to_owned(), + } + } + + #[test] + fn encodes_structured_batch_and_preserves_empty_subject() { + let deduplicator = ExposureDeduplicator::new(4); + let payload = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![exposure("", "alloc", "variant")], + }, + ) + .unwrap() + .unwrap(); + let payload: Value = serde_json::from_str(&payload).unwrap(); + + assert_eq!(payload["context"]["service"], "svc"); + assert_eq!(payload["context"]["env"], "prod"); + assert_eq!(payload["context"]["version"], "1"); + assert_eq!(payload["exposures"][0]["subject"]["id"], ""); + assert_eq!( + payload["exposures"][0]["subject"]["attributes"]["tier"], + "premium" + ); + } + + #[test] + fn deduplicates_same_assignment_and_emits_changed_assignment() { + let deduplicator = ExposureDeduplicator::new(4); + let first = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc-a", "a")], + }, + ) + .unwrap(); + let duplicate = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc-a", "a")], + }, + ) + .unwrap(); + let changed = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc-b", "b")], + }, + ) + .unwrap(); + + assert!(first.is_some()); + assert!(duplicate.is_none()); + assert!(changed.is_some()); + } + + #[test] + fn cache_key_includes_service_env_and_version() { + let deduplicator = ExposureDeduplicator::new(4); + let first = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc", "variant")], + }, + ) + .unwrap(); + let other_service = encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: FfeTelemetryContext { + service: "other".to_owned(), + ..context() + }, + exposures: vec![exposure("user", "alloc", "variant")], + }, + ) + .unwrap(); + + assert!(first.is_some()); + assert!(other_service.is_some()); + } + + #[test] + fn drops_incomplete_exposures() { + let deduplicator = ExposureDeduplicator::new(4); + let mut invalid = exposure("user", "alloc", "variant"); + invalid.allocation_key.clear(); + + assert!(encode_exposure_batch( + &deduplicator, + FfeExposureBatch { + context: context(), + exposures: vec![invalid], + }, + ) + .unwrap() + .is_none()); + } +} diff --git a/datadog-ffe/src/telemetry/mod.rs b/datadog-ffe/src/telemetry/mod.rs new file mode 100644 index 0000000000..0ad2259999 --- /dev/null +++ b/datadog-ffe/src/telemetry/mod.rs @@ -0,0 +1,13 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +pub mod exposures; + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub struct FfeTelemetryContext { + pub service: String, + pub env: String, + pub version: String, +} diff --git a/datadog-sidecar-ffi/src/lib.rs b/datadog-sidecar-ffi/src/lib.rs index 7cc6a3267d..057c5db57d 100644 --- a/datadog-sidecar-ffi/src/lib.rs +++ b/datadog-sidecar-ffi/src/lib.rs @@ -27,15 +27,17 @@ use datadog_sidecar::service::agent_info::AgentInfoReader; use datadog_sidecar::service::telemetry::InternalTelemetryAction; use datadog_sidecar::service::{ blocking::{self, SidecarTransport}, - DynamicInstrumentationConfigState, InstanceId, QueueId, RuntimeMetadata, - SerializedTracerHeaderTags, SessionConfig, SidecarAction, SidecarFlushOptions, + DynamicInstrumentationConfigState, FfeExposure as SidecarFfeExposure, + FfeExposureBatch as SidecarFfeExposureBatch, FfeTelemetryContext as SidecarFfeTelemetryContext, + InstanceId, QueueId, RuntimeMetadata, SerializedTracerHeaderTags, SessionConfig, SidecarAction, + SidecarFlushOptions, }; use datadog_sidecar::service::{get_telemetry_action_sender, InternalTelemetryActions}; use datadog_sidecar::shm_remote_config::{path_for_remote_config, RemoteConfigReader}; use libc::c_char; use libdd_common::tag::Tag; use libdd_common::Endpoint; -use libdd_common_ffi::slice::{AsBytes, CharSlice}; +use libdd_common_ffi::slice::{AsBytes, CharSlice, Slice}; use libdd_common_ffi::{self as ffi, MaybeError}; #[cfg(windows)] use libdd_crashtracker_ffi::Metadata; @@ -1116,6 +1118,92 @@ pub unsafe extern "C" fn ddog_sidecar_send_debugger_datum( ddog_sidecar_send_debugger_data(transport, instance_id, queue_id, vec![*payload]) } +#[repr(C)] +pub struct FfeTelemetryContext<'a> { + pub service: CharSlice<'a>, + pub env: CharSlice<'a>, + pub version: CharSlice<'a>, +} + +#[repr(C)] +pub struct FfeExposure<'a> { + pub timestamp_ms: u64, + pub flag_key: CharSlice<'a>, + pub subject_id: CharSlice<'a>, + /// UTF-8 JSON object. Empty, invalid, or non-object JSON is serialized as + /// an empty subject attribute object. + pub subject_attributes_json: CharSlice<'a>, + pub allocation_key: CharSlice<'a>, + pub variant: CharSlice<'a>, +} + +/// Send structured FFE exposure events to the sidecar. The sidecar owns +/// deduplication, JSON serialization, and Agent EVP delivery. This function is +/// caller-driven; shared libdatadog evaluator calls do not log unless an SDK +/// explicitly sends this action. +/// +/// # Safety +/// `context` and every element in `exposures` must contain valid UTF-8 +/// `CharSlice` values. Empty `exposures` is a no-op. +#[no_mangle] +#[allow(clippy::missing_safety_doc)] +pub unsafe extern "C" fn ddog_sidecar_send_ffe_exposure_batch( + transport: &mut Box, + instance_id: &InstanceId, + queue_id: &QueueId, + context: &FfeTelemetryContext<'_>, + exposures: Slice>, +) -> MaybeError { + if exposures.is_empty() { + return MaybeError::None; + } + + let context = try_c!(ffe_context_from_ffi(context)); + let exposures = try_c!(exposures + .try_as_slice() + .map_err(|e| format!("Invalid exposure slice: {e}")) + .and_then(|exposures| exposures + .iter() + .map(ffe_exposure_from_ffi) + .collect::, _>>())); + + if exposures.is_empty() { + return MaybeError::None; + } + + try_c!(blocking::enqueue_actions( + transport, + instance_id, + queue_id, + vec![SidecarAction::FfeExposureBatch(SidecarFfeExposureBatch { + context, + exposures, + })], + )); + MaybeError::None +} + +fn ffe_context_from_ffi( + context: &FfeTelemetryContext<'_>, +) -> Result { + Ok(SidecarFfeTelemetryContext { + service: char_slice_to_string(context.service)?, + env: char_slice_to_string(context.env)?, + version: char_slice_to_string(context.version)?, + }) +} + +fn ffe_exposure_from_ffi(exposure: &FfeExposure<'_>) -> Result { + Ok(SidecarFfeExposure { + timestamp_ms: exposure.timestamp_ms, + flag_key: char_slice_to_string(exposure.flag_key)?, + subject_id: char_slice_to_string(exposure.subject_id)?, + subject_attributes_json: char_slice_to_string(exposure.subject_attributes_json)?, + allocation_key: char_slice_to_string(exposure.allocation_key)?, + variant: char_slice_to_string(exposure.variant)?, + }) +} + #[no_mangle] #[allow(clippy::missing_safety_doc)] #[allow(improper_ctypes_definitions)] // DebuggerPayload is just a pointer, we hide its internals diff --git a/datadog-sidecar/Cargo.toml b/datadog-sidecar/Cargo.toml index 8d04b2d28d..f50cd65bda 100644 --- a/datadog-sidecar/Cargo.toml +++ b/datadog-sidecar/Cargo.toml @@ -29,6 +29,7 @@ libdd-trace-utils = { path = "../libdd-trace-utils" } libdd-trace-stats = { path = "../libdd-trace-stats", default-features=false, features = ["https"] } datadog-remote-config = { path = "../datadog-remote-config" , features = ["live-debugger"]} datadog-live-debugger = { path = "../datadog-live-debugger" } +datadog-ffe = { path = "../datadog-ffe", features = ["exposure-events"] } libdd-crashtracker = { path = "../libdd-crashtracker" } libdd-dogstatsd-client = { path = "../libdd-dogstatsd-client" } libdd-tinybytes = { path = "../libdd-tinybytes" } diff --git a/datadog-sidecar/src/service/ffe_exposures_flusher.rs b/datadog-sidecar/src/service/ffe_exposures_flusher.rs new file mode 100644 index 0000000000..996802694d --- /dev/null +++ b/datadog-sidecar/src/service/ffe_exposures_flusher.rs @@ -0,0 +1,288 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Serializes and forwards FFE (Feature Flag Evaluation) exposure events to +//! the Datadog Agent's EVP proxy. +//! +//! Protocol matches dd-trace-go / dd-trace-rb / dd-trace-py / dd-trace-js / +//! dd-trace-dotnet: `POST /evp_proxy/v2/api/v2/exposures` with the header +//! `X-Datadog-EVP-Subdomain: event-platform-intake`. No agent capability gate. + +use crate::service::FfeExposureBatch; +use datadog_ffe::telemetry::exposures::encode_exposure_batch; +pub(crate) use datadog_ffe::telemetry::exposures::ExposureDeduplicator; +use http::uri::PathAndQuery; +use http::Method; +use libdd_capabilities::{Bytes, HttpClientCapability, SleepCapability}; +use libdd_common::Endpoint; +use std::time::Duration; +use tracing::{debug, warn}; + +/// EVP proxy path for FFE exposure intake. +pub(crate) const EVP_EXPOSURES_PATH: &str = "/evp_proxy/v2/api/v2/exposures"; + +/// EVP subdomain that routes requests to event-platform intake. +pub(crate) const EVP_SUBDOMAIN_HEADER: &str = "X-Datadog-EVP-Subdomain"; +pub(crate) const EVP_SUBDOMAIN_VALUE: &str = "event-platform-intake"; + +const USER_AGENT: &str = concat!("ddtrace-sidecar/", env!("CARGO_PKG_VERSION")); + +/// Build the FFE exposure endpoint from a session's agent base endpoint. +/// Overrides only the path (`/evp_proxy/v2/api/v2/exposures`), preserving +/// scheme, authority, timeout, and test_token. +/// Returns `None` for agentless mode because EVP proxy routing is agent-only. +pub(crate) fn exposure_endpoint(base: &Endpoint) -> Option { + if base.api_key.is_some() { + return None; + } + + let mut parts = base.url.clone().into_parts(); + parts.path_and_query = Some(PathAndQuery::from_static(EVP_EXPOSURES_PATH)); + let url = http::Uri::from_parts(parts).ok()?; + Some(Endpoint { + url, + ..base.clone() + }) +} + +/// POST a structured FFE exposure batch to the agent EVP proxy. +/// Fire-and-forget: non-2xx responses are logged at `warn`, network errors at +/// `debug`, and dropped (matches dd-trace-go behaviour). +pub(crate) async fn send_batch( + client: &C, + endpoint: &Endpoint, + deduplicator: &ExposureDeduplicator, + batch: FfeExposureBatch, +) { + let payload = match encode_exposure_batch(deduplicator, batch) { + Ok(Some(payload)) => payload, + Ok(None) => return, + Err(e) => { + debug!("ffe_exposures_flusher: failed to encode exposure payload: {e:?}"); + return; + } + }; + send_payload(client, endpoint, payload).await; +} + +async fn send_payload( + client: &C, + endpoint: &Endpoint, + payload: String, +) { + let builder = match endpoint.to_request_builder(USER_AGENT) { + Ok(b) => b, + Err(e) => { + debug!("ffe_exposures_flusher: failed to build request: {e:?}"); + return; + } + }; + + let req = match builder + .method(Method::POST) + .header("Content-Type", "application/json") + .header(EVP_SUBDOMAIN_HEADER, EVP_SUBDOMAIN_VALUE) + .body(Bytes::from(payload)) + { + Ok(r) => r, + Err(e) => { + debug!("ffe_exposures_flusher: failed to construct request body: {e:?}"); + return; + } + }; + + let timeout = Duration::from_millis(endpoint.timeout_ms); + let response = tokio::select! { + biased; + result = client.request(req) => result, + _ = client.sleep(timeout) => { + debug!("ffe_exposures_flusher: request timed out after {timeout:?}"); + return; + } + }; + + match response { + Ok(resp) => { + let status = resp.status(); + if !status.is_success() { + let body_preview = truncate(resp.body().as_ref(), 256); + warn!("ffe_exposures_flusher: non-2xx response {status}: {body_preview}"); + } else { + debug!("ffe_exposures_flusher: sent exposure batch, status={status}"); + } + } + Err(e) => { + debug!("ffe_exposures_flusher: request failed: {e:?}"); + } + } +} + +fn truncate(bytes: &[u8], cap: usize) -> String { + let take = bytes.len().min(cap); + String::from_utf8_lossy(&bytes[..take]).into_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::service::{FfeExposure, FfeTelemetryContext}; + use httpmock::MockServer; + use libdd_capabilities::{HttpError, MaybeSend}; + use libdd_capabilities_impl::NativeCapabilities; + use std::future; + + fn endpoint_for(server: &MockServer) -> Endpoint { + Endpoint { + url: server.url("/").parse().unwrap(), + ..Endpoint::default() + } + } + + fn context() -> FfeTelemetryContext { + FfeTelemetryContext { + service: "svc".to_owned(), + env: "prod".to_owned(), + version: "1".to_owned(), + } + } + + fn exposure(subject_id: &str, allocation_key: &str, variant: &str) -> FfeExposure { + FfeExposure { + timestamp_ms: 123, + flag_key: "flag".to_owned(), + subject_id: subject_id.to_owned(), + subject_attributes_json: r#"{"tier":"premium"}"#.to_owned(), + allocation_key: allocation_key.to_owned(), + variant: variant.to_owned(), + } + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn posts_to_evp_proxy() { + let server = MockServer::start_async().await; + let mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST) + .path(EVP_EXPOSURES_PATH) + .header(EVP_SUBDOMAIN_HEADER, EVP_SUBDOMAIN_VALUE) + .header("content-type", "application/json"); + then.status(202); + }) + .await; + + let base = endpoint_for(&server); + let ep = exposure_endpoint(&base).unwrap(); + let client = NativeCapabilities::new_client(); + + send_batch( + &client, + &ep, + &ExposureDeduplicator::new(4), + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc", "variant")], + }, + ) + .await; + + mock.assert_async().await; + assert_eq!(mock.calls_async().await, 1); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn non_2xx_does_not_panic() { + let server = MockServer::start_async().await; + let _mock = server + .mock_async(|when, then| { + when.method(httpmock::Method::POST).path(EVP_EXPOSURES_PATH); + then.status(500).body("intake overloaded"); + }) + .await; + + let base = endpoint_for(&server); + let ep = exposure_endpoint(&base).unwrap(); + let client = NativeCapabilities::new_client(); + send_batch( + &client, + &ep, + &ExposureDeduplicator::new(4), + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc", "variant")], + }, + ) + .await; + } + + #[tokio::test] + async fn timeout_returns_without_waiting_for_http_response() { + let ep = Endpoint { + url: "http://localhost:8126".parse().unwrap(), + timeout_ms: 1, + ..Endpoint::default() + }; + + send_batch( + &HangingCapabilities, + &ep, + &ExposureDeduplicator::new(4), + FfeExposureBatch { + context: context(), + exposures: vec![exposure("user", "alloc", "variant")], + }, + ) + .await; + } + + #[test] + fn endpoint_preserves_authority_overrides_path() { + let base = Endpoint { + url: "http://agent.internal:8126/v0.4/traces".parse().unwrap(), + ..Endpoint::default() + }; + let ep = exposure_endpoint(&base).unwrap(); + assert_eq!(ep.url.scheme_str(), Some("http")); + assert_eq!(ep.url.authority().unwrap().as_str(), "agent.internal:8126"); + assert_eq!(ep.url.path(), EVP_EXPOSURES_PATH); + } + + #[test] + fn endpoint_rejects_agentless() { + let base = Endpoint { + url: "https://trace.agent.datadoghq.com/v0.4/traces" + .parse() + .unwrap(), + api_key: Some("api-key".into()), + ..Endpoint::default() + }; + + assert!(exposure_endpoint(&base).is_none()); + } + + #[derive(Clone, Debug)] + struct HangingCapabilities; + + impl HttpClientCapability for HangingCapabilities { + fn new_client() -> Self { + Self + } + + fn request( + &self, + _req: http::Request, + ) -> impl future::Future, HttpError>> + MaybeSend + { + future::pending() + } + } + + impl SleepCapability for HangingCapabilities { + fn new() -> Self { + Self + } + + async fn sleep(&self, _duration: Duration) {} + } +} diff --git a/datadog-sidecar/src/service/mod.rs b/datadog-sidecar/src/service/mod.rs index bc5930fc78..6555ccf8a4 100644 --- a/datadog-sidecar/src/service/mod.rs +++ b/datadog-sidecar/src/service/mod.rs @@ -3,6 +3,8 @@ // imports for structs defined in this file use crate::config; +pub use datadog_ffe::telemetry::exposures::{FfeExposure, FfeExposureBatch}; +pub use datadog_ffe::telemetry::FfeTelemetryContext; use datadog_remote_config::{RemoteConfigCapabilities, RemoteConfigProduct}; use libdd_common::tag::Tag; use libdd_common::Endpoint; @@ -28,6 +30,7 @@ pub mod agent_info; pub mod blocking; mod debugger_diagnostics_bookkeeper; pub mod exception_hash_rate_limiter; +pub(crate) mod ffe_exposures_flusher; mod instance_id; mod queue_id; mod remote_configs; @@ -82,4 +85,7 @@ pub enum SidecarAction { Telemetry(TelemetryActions), AddTelemetryMetricPoint((String, f64, Vec)), PhpComposerTelemetryFile(PathBuf), + /// Structured FFE exposures. The sidecar owns JSON serialization, + /// cross-request deduplication, and EVP delivery. + FfeExposureBatch(FfeExposureBatch), } diff --git a/datadog-sidecar/src/service/sidecar_server.rs b/datadog-sidecar/src/service/sidecar_server.rs index e4841081a6..7f1a633193 100644 --- a/datadog-sidecar/src/service/sidecar_server.rs +++ b/datadog-sidecar/src/service/sidecar_server.rs @@ -35,6 +35,7 @@ use crate::service::debugger_diagnostics_bookkeeper::{ DebuggerDiagnosticsBookkeeper, DebuggerDiagnosticsBookkeeperStats, }; use crate::service::exception_hash_rate_limiter::EXCEPTION_HASH_LIMITER; +use crate::service::ffe_exposures_flusher; use crate::service::remote_configs::{RemoteConfigNotifyTarget, RemoteConfigs}; use crate::service::stats_flusher::{ flush_all_stats_now, get_or_create_concentrator, stats_endpoint, ConcentratorKey, @@ -44,6 +45,7 @@ use crate::service::tracing::trace_flusher::TraceFlusherStats; use crate::tokio_util::run_or_spawn_shared; use datadog_live_debugger::sender::{agent_info_supports_debugger_v2_endpoint, DebuggerType}; use datadog_remote_config::fetch::{ConfigInvariants, ConfigOptions, MultiTargetStats}; +use libdd_capabilities_impl::NativeCapabilities; use libdd_common::tag::Tag; use libdd_dogstatsd_client::{new, DogStatsDActionOwned}; use libdd_telemetry::config::Config; @@ -109,6 +111,10 @@ pub struct SidecarServer { debugger_diagnostics_bookkeeper: Arc, /// Per-env&version SHM span concentrators (global across all sessions). pub(crate) span_concentrators: Arc>>>, + /// HTTP client shared by FFE fire-and-forget forwarders for connection reuse. + pub(crate) ffe_http_client: NativeCapabilities, + /// Sidecar-owned exposure cache, shared across sessions/connections. + pub(crate) ffe_exposure_deduplicator: ffe_exposures_flusher::ExposureDeduplicator, } /// Per-connection handler wrapper that tracks sessions/instances for cleanup on disconnect. @@ -405,6 +411,49 @@ impl SidecarInterface for ConnectionSidecarHandler { trace_config.tracer_version.clone(), ); + // FFE exposure actions are session-scoped, not application-scoped: + // dispatch them before the `applications.entry(queue_id)` check so they + // are not silently dropped when the PHP runtime hasn't yet registered the + // application via remote-config metadata. The PHP exposure writer can + // fire as soon as evaluations begin, which is often earlier than the + // first RC config registration call. + let ffe_http_client = self.server.ffe_http_client.clone(); + let actions: Vec = actions + .into_iter() + .filter(|a| match a { + SidecarAction::FfeExposureBatch(batch) => { + if let Some(base) = trace_config.endpoint.as_ref() { + if let Some(ep) = ffe_exposures_flusher::exposure_endpoint(base) { + let batch = batch.clone(); + let client = ffe_http_client.clone(); + let deduplicator = self.server.ffe_exposure_deduplicator.clone(); + tokio::spawn(async move { + ffe_exposures_flusher::send_batch( + &client, + &ep, + &deduplicator, + batch, + ) + .await; + }); + } else { + debug!( + "ffe_exposures_flusher: could not derive endpoint, dropping batch" + ); + } + } else { + debug!("ffe_exposures_flusher: no session endpoint, dropping batch"); + } + false + } + _ => true, + }) + .collect(); + + if actions.is_empty() { + return; + } + let rt_info = self.server.get_runtime(&instance_id); let mut applications = rt_info.lock_applications(); @@ -1075,4 +1124,143 @@ impl SidecarInterface for ConnectionSidecarHandler { } } +#[cfg(test)] +mod tests { + use super::*; + use crate::service::{FfeExposure, FfeExposureBatch, FfeTelemetryContext}; + use httpmock::{Method::POST, MockServer}; + use tokio::time::{sleep, Duration as TokioDuration}; + + fn ffe_context() -> FfeTelemetryContext { + FfeTelemetryContext { + service: "svc".to_owned(), + env: "prod".to_owned(), + version: "1".to_owned(), + } + } + + fn ffe_exposure(subject_id: &str) -> FfeExposure { + FfeExposure { + timestamp_ms: 123, + flag_key: "flag".to_owned(), + subject_id: subject_id.to_owned(), + subject_attributes_json: "{}".to_owned(), + allocation_key: "alloc".to_owned(), + variant: "variant".to_owned(), + } + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn ffe_exposure_actions_dispatch_without_registered_application() { + let http_server = MockServer::start_async().await; + let exposures_mock = http_server + .mock_async(|when, then| { + when.method(POST) + .path(ffe_exposures_flusher::EVP_EXPOSURES_PATH); + then.status(202); + }) + .await; + let handler = ConnectionSidecarHandler::new(SidecarServer::default()); + let instance_id = InstanceId::new("session", "runtime"); + let queue_id = QueueId::from(42); + + handler + .server + .get_session(&instance_id.session_id) + .modify_trace_config(|cfg| { + let endpoint = Endpoint { + url: http_server.url("/").parse().unwrap(), + ..Endpoint::default() + }; + cfg.set_endpoint(endpoint).unwrap(); + }); + + assert!(!handler + .server + .get_runtime(&instance_id) + .lock_applications() + .contains_key(&queue_id)); + + handler + .enqueue_actions( + PeerCredentials::default(), + instance_id.clone(), + queue_id, + vec![SidecarAction::FfeExposureBatch(FfeExposureBatch { + context: ffe_context(), + exposures: vec![ffe_exposure("user")], + })], + ) + .await; + + for _ in 0..100 { + if exposures_mock.calls_async().await == 1 { + break; + } + sleep(TokioDuration::from_millis(10)).await; + } + + exposures_mock.assert_async().await; + assert!(!handler + .server + .get_runtime(&instance_id) + .lock_applications() + .contains_key(&queue_id)); + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn registered_sdk_without_ffe_actions_does_not_emit_ffe_telemetry() { + let http_server = MockServer::start_async().await; + let exposures_mock = http_server + .mock_async(|when, then| { + when.method(POST) + .path(ffe_exposures_flusher::EVP_EXPOSURES_PATH); + then.status(202); + }) + .await; + let handler = ConnectionSidecarHandler::new(SidecarServer::default()); + let instance_id = InstanceId::new("session", "runtime"); + let queue_id = QueueId::from(42); + + handler + .server + .get_session(&instance_id.session_id) + .modify_trace_config(|cfg| { + let endpoint = Endpoint { + url: http_server.url("/").parse().unwrap(), + ..Endpoint::default() + }; + cfg.set_endpoint(endpoint).unwrap(); + }); + + handler + .server + .get_runtime(&instance_id) + .lock_applications() + .entry(queue_id) + .or_default(); + + assert!(handler + .server + .get_runtime(&instance_id) + .lock_applications() + .contains_key(&queue_id)); + + handler + .enqueue_actions( + PeerCredentials::default(), + instance_id, + queue_id, + Vec::new(), + ) + .await; + + sleep(TokioDuration::from_millis(50)).await; + + assert_eq!(exposures_mock.calls_async().await, 0); + } +} + // TODO: APMSP-1079 - Unit tests are sparse for the sidecar server. We should add more. diff --git a/datadog-sidecar/src/service/telemetry.rs b/datadog-sidecar/src/service/telemetry.rs index 201f72fb33..526dd27319 100644 --- a/datadog-sidecar/src/service/telemetry.rs +++ b/datadog-sidecar/src/service/telemetry.rs @@ -454,6 +454,7 @@ impl TelemetryCachedClient { } } SidecarAction::PhpComposerTelemetryFile(_) => {} // handled separately + SidecarAction::FfeExposureBatch(_) => {} // handled in sidecar_server } } actions