From 722eaf3d7fbf59b77d5d21b74ff8a18a70533061 Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 8 Apr 2026 13:57:24 -0500 Subject: [PATCH 01/11] Add Sourcepoint first-party integration --- .../lib/src/integrations/sourcepoint/index.ts | 8 + .../integrations/sourcepoint/script_guard.ts | 65 +++ .../sourcepoint/script_guard.test.ts | 81 ++++ .../src/integrations/mod.rs | 2 + .../src/integrations/sourcepoint.rs | 454 ++++++++++++++++++ docs/guide/integrations-overview.md | 46 +- docs/guide/integrations/sourcepoint.md | 75 +++ trusted-server.toml | 8 +- 8 files changed, 732 insertions(+), 7 deletions(-) create mode 100644 crates/js/lib/src/integrations/sourcepoint/index.ts create mode 100644 crates/js/lib/src/integrations/sourcepoint/script_guard.ts create mode 100644 crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts create mode 100644 crates/trusted-server-core/src/integrations/sourcepoint.rs create mode 100644 docs/guide/integrations/sourcepoint.md diff --git a/crates/js/lib/src/integrations/sourcepoint/index.ts b/crates/js/lib/src/integrations/sourcepoint/index.ts new file mode 100644 index 00000000..017494d1 --- /dev/null +++ b/crates/js/lib/src/integrations/sourcepoint/index.ts @@ -0,0 +1,8 @@ +import { log } from '../../core/log'; + +import { installSourcepointGuard } from './script_guard'; + +if (typeof window !== 'undefined') { + installSourcepointGuard(); + log.info('Sourcepoint integration initialized'); +} diff --git a/crates/js/lib/src/integrations/sourcepoint/script_guard.ts b/crates/js/lib/src/integrations/sourcepoint/script_guard.ts new file mode 100644 index 00000000..789ec5c5 --- /dev/null +++ b/crates/js/lib/src/integrations/sourcepoint/script_guard.ts @@ -0,0 +1,65 @@ +import { createScriptGuard } from '../../shared/script_guard'; + +const SOURCEPOINT_CDN_HOST = 'cdn.privacy-mgmt.com'; +const SOURCEPOINT_GEO_HOST = 'geo.privacymanager.io'; + +function normalizeSourcepointUrl(url: string): string | null { + if (!url) return null; + + const trimmed = url.trim(); + if (!trimmed) return null; + + if (trimmed.startsWith('//')) { + return `https:${trimmed}`; + } + + if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) { + return trimmed; + } + + if (trimmed.startsWith(SOURCEPOINT_CDN_HOST) || trimmed.startsWith(SOURCEPOINT_GEO_HOST)) { + return `https://${trimmed}`; + } + + return null; +} + +function parseSourcepointUrl(url: string): URL | null { + const normalized = normalizeSourcepointUrl(url); + if (!normalized) return null; + + try { + return new URL(normalized); + } catch { + return null; + } +} + +export function isSourcepointUrl(url: string): boolean { + const parsed = parseSourcepointUrl(url); + return parsed?.host === SOURCEPOINT_CDN_HOST || parsed?.host === SOURCEPOINT_GEO_HOST; +} + +export function rewriteSourcepointUrl(originalUrl: string): string { + const parsed = parseSourcepointUrl(originalUrl); + if (!parsed) return originalUrl; + + const prefix = + parsed.host === SOURCEPOINT_CDN_HOST + ? '/integrations/sourcepoint/cdn' + : '/integrations/sourcepoint/geo'; + const query = parsed.search || ''; + + return `${window.location.origin}${prefix}${parsed.pathname}${query}`; +} + +const guard = createScriptGuard({ + displayName: 'Sourcepoint', + id: 'sourcepoint', + isTargetUrl: isSourcepointUrl, + rewriteUrl: rewriteSourcepointUrl, +}); + +export const installSourcepointGuard = guard.install; +export const isGuardInstalled = guard.isInstalled; +export const resetGuardState = guard.reset; diff --git a/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts new file mode 100644 index 00000000..8257d369 --- /dev/null +++ b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts @@ -0,0 +1,81 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + installSourcepointGuard, + isGuardInstalled, + isSourcepointUrl, + resetGuardState, + rewriteSourcepointUrl, +} from '../../../src/integrations/sourcepoint/script_guard'; + +describe('Sourcepoint SDK Script Interception Guard', () => { + let originalAppendChild: typeof Element.prototype.appendChild; + let originalInsertBefore: typeof Element.prototype.insertBefore; + + beforeEach(() => { + resetGuardState(); + originalAppendChild = Element.prototype.appendChild; + originalInsertBefore = Element.prototype.insertBefore; + }); + + afterEach(() => { + resetGuardState(); + }); + + it('detects Sourcepoint CDN and geo URLs', () => { + expect(isSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages')).toBe(true); + expect(isSourcepointUrl('https://geo.privacymanager.io/')).toBe(true); + expect(isSourcepointUrl('//cdn.privacy-mgmt.com/mms/v2/get_site_data')).toBe(true); + expect(isSourcepointUrl('https://example.com/script.js')).toBe(false); + }); + + it('rewrites CDN URLs to the first-party proxy path', () => { + expect( + rewriteSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages?env=prod') + ).toBe( + `${window.location.origin}/integrations/sourcepoint/cdn/wrapper/v2/messages?env=prod` + ); + }); + + it('rewrites geo URLs to the first-party proxy path', () => { + expect(rewriteSourcepointUrl('https://geo.privacymanager.io/')).toBe( + `${window.location.origin}/integrations/sourcepoint/geo/` + ); + }); + + it('installs and resets the guard', () => { + expect(isGuardInstalled()).toBe(false); + installSourcepointGuard(); + expect(isGuardInstalled()).toBe(true); + expect(Element.prototype.appendChild).not.toBe(originalAppendChild); + expect(Element.prototype.insertBefore).not.toBe(originalInsertBefore); + resetGuardState(); + expect(Element.prototype.appendChild).toBe(originalAppendChild); + expect(Element.prototype.insertBefore).toBe(originalInsertBefore); + }); + + it('rewrites dynamically inserted Sourcepoint scripts', () => { + installSourcepointGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://cdn.privacy-mgmt.com/wrapperMessagingWithoutDetection.js'; + + container.appendChild(script); + + expect(script.src).toContain('/integrations/sourcepoint/cdn/wrapperMessagingWithoutDetection.js'); + expect(script.src).not.toContain('cdn.privacy-mgmt.com'); + }); + + it('does not rewrite unrelated scripts', () => { + installSourcepointGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://example.com/app.js'; + + container.appendChild(script); + + expect(script.src).toBe('https://example.com/app.js'); + }); +}); diff --git a/crates/trusted-server-core/src/integrations/mod.rs b/crates/trusted-server-core/src/integrations/mod.rs index 92f30219..29657166 100644 --- a/crates/trusted-server-core/src/integrations/mod.rs +++ b/crates/trusted-server-core/src/integrations/mod.rs @@ -16,6 +16,7 @@ pub mod nextjs; pub mod permutive; pub mod prebid; mod registry; +pub mod sourcepoint; pub mod testlight; pub use registry::{ @@ -37,6 +38,7 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] { permutive::register, lockr::register, didomi::register, + sourcepoint::register, google_tag_manager::register, datadome::register, gpt::register, diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs new file mode 100644 index 00000000..59a8a7d0 --- /dev/null +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -0,0 +1,454 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use error_stack::{Report, ResultExt}; +use fastly::http::{header, Method}; +use fastly::{Request, Response}; +use serde::Deserialize; +use url::Url; +use validator::Validate; + +use crate::backend::BackendConfig; +use crate::error::TrustedServerError; +use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, + IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, +}; +use crate::settings::{IntegrationConfig, Settings}; + +const SOURCEPOINT_INTEGRATION_ID: &str = "sourcepoint"; +const SOURCEPOINT_CDN_HOST: &str = "cdn.privacy-mgmt.com"; +const SOURCEPOINT_GEO_HOST: &str = "geo.privacymanager.io"; +const SOURCEPOINT_CDN_PREFIX: &str = "/integrations/sourcepoint/cdn"; +const SOURCEPOINT_GEO_PREFIX: &str = "/integrations/sourcepoint/geo"; + +/// Configuration for the Sourcepoint first-party proxy. +#[derive(Debug, Clone, Deserialize, Validate)] +pub struct SourcepointConfig { + /// Whether the integration is enabled. + #[serde(default = "default_enabled")] + pub enabled: bool, + /// Whether Sourcepoint URLs should be rewritten in HTML. + #[serde(default = "default_rewrite_sdk")] + pub rewrite_sdk: bool, + /// Base URL for Sourcepoint CDN assets and API calls. + #[serde(default = "default_cdn_origin")] + #[validate(url)] + pub cdn_origin: String, + /// Base URL for Sourcepoint geo requests. + #[serde(default = "default_geo_origin")] + #[validate(url)] + pub geo_origin: String, + /// Cache TTL for Sourcepoint static responses in seconds. + #[serde(default = "default_cache_ttl")] + #[validate(range(min = 60, max = 86400))] + pub cache_ttl_seconds: u32, +} + +impl IntegrationConfig for SourcepointConfig { + fn is_enabled(&self) -> bool { + self.enabled + } +} + +fn default_enabled() -> bool { + false +} + +fn default_rewrite_sdk() -> bool { + true +} + +fn default_cdn_origin() -> String { + format!("https://{SOURCEPOINT_CDN_HOST}") +} + +fn default_geo_origin() -> String { + format!("https://{SOURCEPOINT_GEO_HOST}") +} + +fn default_cache_ttl() -> u32 { + 3600 +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum SourcepointBackend { + Cdn, + Geo, +} + +pub struct SourcepointIntegration { + config: Arc, +} + +impl SourcepointIntegration { + fn new(config: Arc) -> Arc { + Arc::new(Self { config }) + } + + fn error(message: impl Into) -> TrustedServerError { + TrustedServerError::Integration { + integration: SOURCEPOINT_INTEGRATION_ID.to_string(), + message: message.into(), + } + } + + fn backend_for_route(path: &str) -> Option<(SourcepointBackend, &str)> { + if let Some(target_path) = path.strip_prefix(SOURCEPOINT_CDN_PREFIX) { + return Some((SourcepointBackend::Cdn, normalize_target_path(target_path))); + } + + path.strip_prefix(SOURCEPOINT_GEO_PREFIX) + .map(|target_path| (SourcepointBackend::Geo, normalize_target_path(target_path))) + } + + fn build_target_url( + &self, + backend: SourcepointBackend, + target_path: &str, + query: Option<&str>, + ) -> Result> { + let base = match backend { + SourcepointBackend::Cdn => self.config.cdn_origin.as_str(), + SourcepointBackend::Geo => self.config.geo_origin.as_str(), + }; + + let mut target = + Url::parse(base).change_context(Self::error("Invalid Sourcepoint origin URL"))?; + target.set_path(target_path); + target.set_query(query); + Ok(target.to_string()) + } + + fn build_first_party_url( + &self, + backend: SourcepointBackend, + source_url: &str, + ctx: &IntegrationAttributeContext<'_>, + ) -> Option { + let parsed = parse_sourcepoint_url(source_url)?; + let target_backend = match parsed.host_str()? { + SOURCEPOINT_CDN_HOST => SourcepointBackend::Cdn, + SOURCEPOINT_GEO_HOST => SourcepointBackend::Geo, + _ => return None, + }; + + if target_backend != backend { + return None; + } + + let prefix = match target_backend { + SourcepointBackend::Cdn => SOURCEPOINT_CDN_PREFIX, + SourcepointBackend::Geo => SOURCEPOINT_GEO_PREFIX, + }; + let path = parsed.path(); + let query = parsed + .query() + .map(|value| format!("?{value}")) + .unwrap_or_default(); + + Some(format!( + "{}://{}{}{}{}", + ctx.request_scheme, ctx.request_host, prefix, path, query + )) + } + + fn copy_headers(&self, original_req: &Request, proxy_req: &mut Request) { + if let Some(client_ip) = original_req.get_client_ip_addr() { + proxy_req.set_header("X-Forwarded-For", client_ip.to_string()); + } + + for header_name in [ + header::ACCEPT, + header::ACCEPT_LANGUAGE, + header::ACCEPT_ENCODING, + header::USER_AGENT, + header::REFERER, + header::ORIGIN, + header::AUTHORIZATION, + ] { + if let Some(value) = original_req.get_header(&header_name) { + proxy_req.set_header(&header_name, value); + } + } + } + + fn apply_cache_headers(&self, backend: SourcepointBackend, response: &mut Response) { + if backend == SourcepointBackend::Cdn + && response.get_header(header::CACHE_CONTROL).is_none() + && response.get_status().is_success() + { + response.set_header( + header::CACHE_CONTROL, + format!("public, max-age={}", self.config.cache_ttl_seconds), + ); + } + } +} + +fn normalize_target_path(target_path: &str) -> &str { + if target_path.is_empty() { + "/" + } else { + target_path + } +} + +fn parse_sourcepoint_url(url: &str) -> Option { + let trimmed = url.trim(); + if trimmed.is_empty() { + return None; + } + + let normalized = if trimmed.starts_with("//") { + format!("https:{trimmed}") + } else if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + trimmed.to_string() + } else if trimmed.starts_with(SOURCEPOINT_CDN_HOST) || trimmed.starts_with(SOURCEPOINT_GEO_HOST) + { + format!("https://{trimmed}") + } else { + return None; + }; + + Url::parse(&normalized).ok() +} + +fn build( + settings: &Settings, +) -> Result>, Report> { + let Some(config) = + settings.integration_config::(SOURCEPOINT_INTEGRATION_ID)? + else { + return Ok(None); + }; + + Ok(Some(SourcepointIntegration::new(Arc::new(config)))) +} + +/// Register the Sourcepoint integration when enabled. +/// +/// # Errors +/// +/// Returns an error when the Sourcepoint integration is enabled with invalid +/// configuration. +pub fn register( + settings: &Settings, +) -> Result, Report> { + let Some(integration) = build(settings)? else { + return Ok(None); + }; + + Ok(Some( + IntegrationRegistration::builder(SOURCEPOINT_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration) + .build(), + )) +} + +#[async_trait(?Send)] +impl IntegrationProxy for SourcepointIntegration { + fn integration_name(&self) -> &'static str { + SOURCEPOINT_INTEGRATION_ID + } + + fn routes(&self) -> Vec { + vec![ + self.get("/cdn/*"), + self.post("/cdn/*"), + self.get("/geo"), + self.get("/geo/*"), + ] + } + + async fn handle( + &self, + _settings: &Settings, + req: Request, + ) -> Result> { + let path = req.get_path().to_string(); + let (backend, target_path) = Self::backend_for_route(&path).ok_or_else(|| { + Report::new(Self::error(format!("Unknown Sourcepoint route: {path}"))) + })?; + + let target_url = self + .build_target_url(backend, target_path, req.get_query_str()) + .change_context(Self::error("Failed to build Sourcepoint target URL"))?; + let base_origin = match backend { + SourcepointBackend::Cdn => self.config.cdn_origin.as_str(), + SourcepointBackend::Geo => self.config.geo_origin.as_str(), + }; + let backend_name = BackendConfig::from_url(base_origin, true) + .change_context(Self::error("Failed to configure Sourcepoint backend"))?; + + let mut proxy_req = Request::new(req.get_method().clone(), &target_url); + self.copy_headers(&req, &mut proxy_req); + + if matches!( + req.get_method(), + &Method::POST | &Method::PUT | &Method::PATCH + ) { + if let Some(content_type) = req.get_header(header::CONTENT_TYPE) { + proxy_req.set_header(header::CONTENT_TYPE, content_type); + } + proxy_req.set_body(req.into_body()); + } + + let mut response = proxy_req + .send(&backend_name) + .change_context(Self::error("Sourcepoint upstream request failed"))?; + self.apply_cache_headers(backend, &mut response); + Ok(response) + } +} + +impl IntegrationAttributeRewriter for SourcepointIntegration { + fn integration_id(&self) -> &'static str { + SOURCEPOINT_INTEGRATION_ID + } + + fn handles_attribute(&self, attribute: &str) -> bool { + self.config.rewrite_sdk && matches!(attribute, "src" | "href") + } + + fn rewrite( + &self, + _attr_name: &str, + attr_value: &str, + ctx: &IntegrationAttributeContext<'_>, + ) -> AttributeRewriteAction { + if !self.config.rewrite_sdk { + return AttributeRewriteAction::keep(); + } + + if let Some(rewritten) = + self.build_first_party_url(SourcepointBackend::Cdn, attr_value, ctx) + { + return AttributeRewriteAction::replace(rewritten); + } + + if let Some(rewritten) = + self.build_first_party_url(SourcepointBackend::Geo, attr_value, ctx) + { + return AttributeRewriteAction::replace(rewritten); + } + + AttributeRewriteAction::keep() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::integrations::IntegrationRegistry; + use crate::test_support::tests::create_test_settings; + use fastly::http::Method; + use serde_json::json; + + fn config(enabled: bool) -> SourcepointConfig { + SourcepointConfig { + enabled, + rewrite_sdk: true, + cdn_origin: default_cdn_origin(), + geo_origin: default_geo_origin(), + cache_ttl_seconds: default_cache_ttl(), + } + } + + #[test] + fn selects_backend_for_cdn_and_geo_routes() { + assert_eq!( + SourcepointIntegration::backend_for_route( + "/integrations/sourcepoint/cdn/wrapper/v2/messages" + ), + Some((SourcepointBackend::Cdn, "/wrapper/v2/messages")) + ); + assert_eq!( + SourcepointIntegration::backend_for_route("/integrations/sourcepoint/geo/"), + Some((SourcepointBackend::Geo, "/")) + ); + } + + #[test] + fn rewrites_cdn_urls_to_first_party_paths() { + let integration = SourcepointIntegration::new(Arc::new(config(true))); + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "edge.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + }; + + let rewritten = integration.rewrite( + "src", + "https://cdn.privacy-mgmt.com/mms/v2/get_site_data?account_id=821", + &ctx, + ); + + assert_eq!( + rewritten, + AttributeRewriteAction::replace( + "https://edge.example.com/integrations/sourcepoint/cdn/mms/v2/get_site_data?account_id=821", + ) + ); + } + + #[test] + fn rewrites_geo_urls_to_first_party_paths() { + let integration = SourcepointIntegration::new(Arc::new(config(true))); + let ctx = IntegrationAttributeContext { + attribute_name: "href", + request_host: "edge.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + }; + + let rewritten = integration.rewrite("href", "https://geo.privacymanager.io/", &ctx); + + assert_eq!( + rewritten, + AttributeRewriteAction::replace( + "https://edge.example.com/integrations/sourcepoint/geo/" + ) + ); + } + + #[test] + fn leaves_non_sourcepoint_urls_unchanged() { + let integration = SourcepointIntegration::new(Arc::new(config(true))); + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "edge.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + }; + + assert_eq!( + integration.rewrite("src", "https://example.com/script.js", &ctx), + AttributeRewriteAction::keep() + ); + } + + #[test] + fn registers_sourcepoint_routes() { + let mut settings = create_test_settings(); + settings + .integrations + .insert_config(SOURCEPOINT_INTEGRATION_ID, &json!({ "enabled": true })) + .expect("should insert config"); + + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + assert!( + registry.has_route( + &Method::GET, + "/integrations/sourcepoint/cdn/wrapper/v2/messages" + ), + "should register CDN proxy route" + ); + assert!( + registry.has_route(&Method::GET, "/integrations/sourcepoint/geo/"), + "should register geo proxy route" + ); + } +} diff --git a/docs/guide/integrations-overview.md b/docs/guide/integrations-overview.md index 1c312584..b7f81562 100644 --- a/docs/guide/integrations-overview.md +++ b/docs/guide/integrations-overview.md @@ -4,12 +4,13 @@ Trusted Server provides built-in integrations with popular third-party services, ## Quick Comparison -| Integration | Type | Endpoints | HTML Rewriting | Primary Use Case | Status | -| ------------- | ---------------- | ---------- | ---------------------------- | --------------------------- | ----------- | -| **Prebid** | Proxy + Rewriter | 2-3 routes | Removes Prebid.js scripts | Server-side header bidding | Production | -| **Next.js** | Script Rewriter | None | Rewrites Next.js data | First-party Next.js routing | Production | -| **Permutive** | Proxy + Rewriter | 6 routes | Rewrites SDK URLs | First-party audience data | Production | -| **Testlight** | Proxy + Rewriter | 1 route | Rewrites integration scripts | Testing/development | Development | +| Integration | Type | Endpoints | HTML Rewriting | Primary Use Case | Status | +| --------------- | ---------------- | ---------- | ---------------------------- | --------------------------- | ----------- | +| **Prebid** | Proxy + Rewriter | 2-3 routes | Removes Prebid.js scripts | Server-side header bidding | Production | +| **Next.js** | Script Rewriter | None | Rewrites Next.js data | First-party Next.js routing | Production | +| **Permutive** | Proxy + Rewriter | 6 routes | Rewrites SDK URLs | First-party audience data | Production | +| **Sourcepoint** | Proxy + Rewriter | 2 routes | Rewrites CMP asset URLs | First-party CMP delivery | Development | +| **Testlight** | Proxy + Rewriter | 1 route | Rewrites integration scripts | Testing/development | Development | ## Integration Details @@ -119,6 +120,39 @@ rewrite_sdk = true --- +### Sourcepoint + +**What it does:** Proxies Sourcepoint CMP CDN and geo endpoints through Trusted Server and rewrites publisher references to those URLs. + +**Key Features:** + +- CDN proxy for `privacy-mgmt.com` +- Geo lookup proxy for `privacymanager.io` +- HTML attribute rewriting for Sourcepoint assets +- Client-side script guard for dynamic script insertion + +**Configuration:** + +```toml +[integrations.sourcepoint] +enabled = true +rewrite_sdk = true +cdn_origin = "https://cdn.privacy-mgmt.com" +geo_origin = "https://geo.privacymanager.io" +cache_ttl_seconds = 3600 +``` + +**Endpoints:** + +- `GET/POST /integrations/sourcepoint/cdn/*` - Sourcepoint CDN proxy +- `GET /integrations/sourcepoint/geo/*` - Sourcepoint geo proxy + +**When to use:** You load Sourcepoint CMP assets and want them to flow through first-party paths without introducing an open-ended proxy. + +**Learn more:** [Sourcepoint Integration](./integrations/sourcepoint.md) + +--- + ### Testlight **What it does:** Testing/development integration for validating the integration system with OpenRTB-like auctions. diff --git a/docs/guide/integrations/sourcepoint.md b/docs/guide/integrations/sourcepoint.md new file mode 100644 index 00000000..37d3ded3 --- /dev/null +++ b/docs/guide/integrations/sourcepoint.md @@ -0,0 +1,75 @@ +# Sourcepoint Integration + +Sourcepoint provides consent and privacy messaging for publishers. This integration proxies the Sourcepoint CDN and geo endpoints through Trusted Server so the browser loads them from first-party paths. + +## Overview + +The Sourcepoint integration: + +- Proxies `cdn.privacy-mgmt.com` requests through `/integrations/sourcepoint/cdn/*` +- Proxies `geo.privacymanager.io` requests through `/integrations/sourcepoint/geo/*` +- Rewrites matching `src` and `href` attributes during HTML processing +- Installs a client-side script guard for dynamically inserted Sourcepoint assets + +## Configuration + +Add the following to `trusted-server.toml`: + +```toml +[integrations.sourcepoint] +enabled = true +rewrite_sdk = true +cdn_origin = "https://cdn.privacy-mgmt.com" +geo_origin = "https://geo.privacymanager.io" +cache_ttl_seconds = 3600 +``` + +### Configuration Options + +| Option | Type | Default | Description | +| ------------------- | ------- | ------------------------------- | --------------------------------------------------------------------------------- | +| `enabled` | boolean | `false` | Enable the Sourcepoint integration | +| `rewrite_sdk` | boolean | `true` | Rewrite matching Sourcepoint URLs in HTML | +| `cdn_origin` | string | `https://cdn.privacy-mgmt.com` | Sourcepoint CDN origin | +| `geo_origin` | string | `https://geo.privacymanager.io` | Sourcepoint geo origin | +| `cache_ttl_seconds` | integer | `3600` | Cache TTL applied to successful CDN responses when the origin omits cache headers | + +## Endpoints + +| Method | Path | Description | +| ---------- | --------------------------------------------------------------------- | --------------------------------------------- | +| `GET/POST` | `/integrations/sourcepoint/cdn/*` | Proxy Sourcepoint CDN assets and wrapper APIs | +| `GET` | `/integrations/sourcepoint/geo` and `/integrations/sourcepoint/geo/*` | Proxy Sourcepoint geo lookups | + +## HTML Rewriting + +When `rewrite_sdk = true`, Trusted Server rewrites matching Sourcepoint URLs in HTML responses: + +```html + + + + + +``` + +Geo lookups are rewritten the same way: + +```text +https://geo.privacymanager.io/ +-> https://publisher.example.com/integrations/sourcepoint/geo/ +``` + +## Client-Side Guard + +Single-page apps often insert CMP scripts after the initial HTML response. The `sourcepoint` tsjs module installs a DOM insertion guard so dynamically inserted Sourcepoint script and preload URLs are rewritten to first-party paths before the browser fetches them. + +## Notes + +- This first version intentionally scopes the integration to the Sourcepoint hosts observed on Autoblog: `cdn.privacy-mgmt.com` and `geo.privacymanager.io`. +- Adjacent privacy vendors and related endpoints can be added later without changing the integration shape. + +## See Also + +- [Integration Guide](/guide/integration-guide) +- [Integrations Overview](/guide/integrations-overview) diff --git a/trusted-server.toml b/trusted-server.toml index d9189aaa..464028ab 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -69,6 +69,13 @@ enabled = false sdk_origin = "https://sdk.privacy-center.org" api_origin = "https://api.privacy-center.org" +[integrations.sourcepoint] +enabled = false +rewrite_sdk = true +cdn_origin = "https://cdn.privacy-mgmt.com" +geo_origin = "https://geo.privacymanager.io" +cache_ttl_seconds = 3600 + [integrations.permutive] enabled = false organization_id = "" @@ -190,4 +197,3 @@ timeout_ms = 1000 [integrations.adserver_mock.context_query_params] permutive_segments = "permutive" - From 494e919de9d6934da69555714018e2f7a2949ac2 Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 8 Apr 2026 14:39:22 -0500 Subject: [PATCH 02/11] Rewrite Sourcepoint script body so webpack chunks and API calls route through first-party proxy --- .../src/integrations/sourcepoint.rs | 184 +++++++++++++++++- 1 file changed, 181 insertions(+), 3 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 59a8a7d0..4f1e69b7 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -1,9 +1,10 @@ -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use async_trait::async_trait; use error_stack::{Report, ResultExt}; -use fastly::http::{header, Method}; +use fastly::http::{header, Method, StatusCode}; use fastly::{Request, Response}; +use regex::Regex; use serde::Deserialize; use url::Url; use validator::Validate; @@ -22,6 +23,41 @@ const SOURCEPOINT_GEO_HOST: &str = "geo.privacymanager.io"; const SOURCEPOINT_CDN_PREFIX: &str = "/integrations/sourcepoint/cdn"; const SOURCEPOINT_GEO_PREFIX: &str = "/integrations/sourcepoint/geo"; +/// Matches quoted references to `cdn.privacy-mgmt.com` URLs in script content. +/// +/// Pattern breakdown: +/// - `(['"])` — opening quote +/// - `(https?:)?` — optional protocol +/// - `(//)?` — optional protocol-relative slashes +/// - `cdn\.privacy-mgmt\.com` — literal CDN hostname +/// - `(/[^'"]*)?` — optional path (everything until closing quote) +/// - `(['"])` — closing quote +/// +/// Handles all common URL styles: +/// - `"https://cdn.privacy-mgmt.com/consent/tcfv2"` +/// - `"//cdn.privacy-mgmt.com/mms/v2"` +/// - `"cdn.privacy-mgmt.com"` (bare domain) +static SP_CDN_URL_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new(r#"(['"])(https?:)?(//)?cdn\.privacy-mgmt\.com(/[^'"]*)?(['"])"#) + .expect("Sourcepoint CDN URL regex should compile") +}); + +/// Matches the webpack chunk loading pattern where the script resolves its +/// own origin from `document.currentScript` and appends `/unified/…`. +/// +/// The Sourcepoint wrapper builds its public path as: +/// ```js +/// t.origin + "/unified/4.40.1/" +/// ``` +/// We rewrite this so chunks load through the first-party prefix: +/// ```js +/// t.origin + "/integrations/sourcepoint/cdn/unified/4.40.1/" +/// ``` +static SP_ORIGIN_UNIFIED_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new(r#"\.origin\s*\+\s*"/unified/"#) + .expect("Sourcepoint origin+unified regex should compile") +}); + /// Configuration for the Sourcepoint first-party proxy. #[derive(Debug, Clone, Deserialize, Validate)] pub struct SourcepointConfig { @@ -184,6 +220,52 @@ impl SourcepointIntegration { ); } } + + /// Rewrite Sourcepoint CDN URLs inside JavaScript response bodies so that + /// dynamically loaded chunks and API calls route through the first-party + /// proxy instead of hitting `cdn.privacy-mgmt.com` directly. + /// + /// Two patterns are rewritten: + /// + /// 1. **Quoted CDN URL references** — e.g. `"https://cdn.privacy-mgmt.com"` + /// becomes `"/integrations/sourcepoint/cdn"`, turning absolute third-party + /// URLs into root-relative first-party paths. + /// + /// 2. **Webpack `origin + "/unified/"` chunk loader** — the Sourcepoint + /// wrapper resolves `document.currentScript.src` and appends + /// `"/unified/…"`. We insert the CDN prefix so chunks load from + /// `/integrations/sourcepoint/cdn/unified/…`. + fn rewrite_script_content(content: &str) -> String { + // Step 1: rewrite quoted cdn.privacy-mgmt.com URLs to root-relative paths. + let after_cdn = SP_CDN_URL_PATTERN + .replace_all(content, |caps: ®ex::Captures| { + let open_quote = &caps[1]; + let path = caps.get(4).map_or("", |m| m.as_str()); + let close_quote = &caps[5]; + format!( + "{}{}{}{close_quote}", + open_quote, SOURCEPOINT_CDN_PREFIX, path + ) + }) + .into_owned(); + + // Step 2: rewrite origin+"/unified/" to origin+"/integrations/sourcepoint/cdn/unified/". + SP_ORIGIN_UNIFIED_PATTERN + .replace_all( + &after_cdn, + &format!(r#".origin+"{SOURCEPOINT_CDN_PREFIX}/unified/"#), + ) + .into_owned() + } + + /// Returns `true` when the response `Content-Type` looks like JavaScript. + fn is_javascript_response(response: &Response) -> bool { + response + .get_header_str(header::CONTENT_TYPE) + .is_some_and(|ct| { + ct.contains("javascript") || ct.contains("ecmascript") + }) + } } fn normalize_target_path(target_path: &str) -> &str { @@ -298,6 +380,31 @@ impl IntegrationProxy for SourcepointIntegration { let mut response = proxy_req .send(&backend_name) .change_context(Self::error("Sourcepoint upstream request failed"))?; + + // Rewrite CDN URLs inside JavaScript responses so that dynamically + // loaded chunks and API calls route through the first-party proxy. + if backend == SourcepointBackend::Cdn + && response.get_status() == StatusCode::OK + && self.config.rewrite_sdk + && Self::is_javascript_response(&response) + { + let body = response.take_body_str(); + let rewritten = Self::rewrite_script_content(&body); + + let mut new_response = Response::new(); + new_response.set_status(StatusCode::OK); + new_response.set_header( + header::CONTENT_TYPE, + "application/javascript; charset=utf-8", + ); + new_response.set_header( + header::CACHE_CONTROL, + format!("public, max-age={}", self.config.cache_ttl_seconds), + ); + new_response.set_body(rewritten); + return Ok(new_response); + } + self.apply_cache_headers(backend, &mut response); Ok(response) } @@ -430,6 +537,77 @@ mod tests { ); } + #[test] + fn rewrites_quoted_cdn_urls_to_root_relative_paths() { + let input = r#"var fallback="https://cdn.privacy-mgmt.com";var api="https://cdn.privacy-mgmt.com/consent/tcfv2";"#; + let output = SourcepointIntegration::rewrite_script_content(input); + + assert_eq!( + output, + r#"var fallback="/integrations/sourcepoint/cdn";var api="/integrations/sourcepoint/cdn/consent/tcfv2";"# + ); + } + + #[test] + fn rewrites_protocol_relative_cdn_urls() { + let input = r#"url="//cdn.privacy-mgmt.com/mms/v2/get_site_data""#; + let output = SourcepointIntegration::rewrite_script_content(input); + + assert!( + output.contains("\"/integrations/sourcepoint/cdn/mms/v2/get_site_data\""), + "Should rewrite protocol-relative CDN URL. Got: {output}", + ); + } + + #[test] + fn rewrites_origin_plus_unified_chunk_pattern() { + let input = r#"return t.origin+"/unified/4.40.1/"}"#; + let output = SourcepointIntegration::rewrite_script_content(input); + + assert_eq!( + output, + r#"return t.origin+"/integrations/sourcepoint/cdn/unified/4.40.1/"}"# + ); + } + + #[test] + fn rewrites_both_patterns_in_realistic_snippet() { + // Mirrors the real Sourcepoint webpack public path resolution: + // try { ... return t.origin+"/unified/4.40.1/" } + // catch(e) {} return e+"/unified/4.40.1/" + // where e defaults to "https://cdn.privacy-mgmt.com" + let input = concat!( + r#"var e="https://cdn.privacy-mgmt.com";"#, + r#"try{var t=document.createElement("a");"#, + r#"t.href=document.currentScript.src;"#, + r#"return t.origin+"/unified/4.40.1/"}"#, + r#"catch(n){}return e+"/unified/4.40.1/""#, + ); + + let output = SourcepointIntegration::rewrite_script_content(input); + + assert!( + output.contains(r#"var e="/integrations/sourcepoint/cdn";"#), + "Fallback CDN default should be rewritten. Got: {output}", + ); + assert!( + output.contains(r#"t.origin+"/integrations/sourcepoint/cdn/unified/4.40.1/"}"#), + "Origin chunk path should be prefixed. Got: {output}", + ); + assert!( + output.contains(r#"e+"/unified/4.40.1/""#), + "Fallback concatenation should keep /unified/ since e is already rewritten. Got: {output}", + ); + } + + #[test] + fn preserves_non_sourcepoint_urls() { + let input = r#"var cdn="https://example.com/script.js";var x=t.origin+"/assets/app.js""#; + let output = SourcepointIntegration::rewrite_script_content(input); + + assert_eq!(output, input, "Non-Sourcepoint URLs should be untouched"); + } + #[test] fn registers_sourcepoint_routes() { let mut settings = create_test_settings(); @@ -447,7 +625,7 @@ mod tests { "should register CDN proxy route" ); assert!( - registry.has_route(&Method::GET, "/integrations/sourcepoint/geo/"), + registry.has_route(&Method::GET, "/integrations/sourcepoint/geo"), "should register geo proxy route" ); } From 16aa02dfe763ab22ff8194e08ea869fca1fc3eea Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 8 Apr 2026 15:06:00 -0500 Subject: [PATCH 03/11] Fix 500: request uncompressed content from Sourcepoint CDN before script rewriting --- .../src/integrations/sourcepoint.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 4f1e69b7..21c05dee 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -262,9 +262,7 @@ impl SourcepointIntegration { fn is_javascript_response(response: &Response) -> bool { response .get_header_str(header::CONTENT_TYPE) - .is_some_and(|ct| { - ct.contains("javascript") || ct.contains("ecmascript") - }) + .is_some_and(|ct| ct.contains("javascript") || ct.contains("ecmascript")) } } @@ -367,6 +365,13 @@ impl IntegrationProxy for SourcepointIntegration { let mut proxy_req = Request::new(req.get_method().clone(), &target_url); self.copy_headers(&req, &mut proxy_req); + // Request uncompressed content for CDN routes so we can safely read + // and rewrite the JavaScript body. Geo routes don't need rewriting, + // so they keep the client's original Accept-Encoding for efficiency. + if backend == SourcepointBackend::Cdn && self.config.rewrite_sdk { + proxy_req.set_header(header::ACCEPT_ENCODING, "identity"); + } + if matches!( req.get_method(), &Method::POST | &Method::PUT | &Method::PATCH From f8e069d71794304bc53b0865e0feebd45b35708a Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 8 Apr 2026 15:45:33 -0500 Subject: [PATCH 04/11] Inject head script to trap window._sp_ and rewrite Sourcepoint config URLs to first-party paths --- .../src/integrations/sourcepoint.rs | 136 +++++++++++++++++- 1 file changed, 133 insertions(+), 3 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 21c05dee..eb8bfd39 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -13,7 +13,8 @@ use crate::backend::BackendConfig; use crate::error::TrustedServerError; use crate::integrations::{ AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, - IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, + IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, IntegrationProxy, + IntegrationRegistration, }; use crate::settings::{IntegrationConfig, Settings}; @@ -322,7 +323,8 @@ pub fn register( Ok(Some( IntegrationRegistration::builder(SOURCEPOINT_INTEGRATION_ID) .with_proxy(integration.clone()) - .with_attribute_rewriter(integration) + .with_attribute_rewriter(integration.clone()) + .with_head_injector(integration) .build(), )) } @@ -450,10 +452,69 @@ impl IntegrationAttributeRewriter for SourcepointIntegration { } } +impl IntegrationHeadInjector for SourcepointIntegration { + fn integration_id(&self) -> &'static str { + SOURCEPOINT_INTEGRATION_ID + } + + fn head_inserts(&self, _ctx: &IntegrationHtmlContext<'_>) -> Vec { + if !self.config.rewrite_sdk { + return vec![]; + } + + // Install a property trap on `window._sp_` so that when the + // publisher's code (typically a Next.js hydration chunk) sets the + // Sourcepoint config object, we intercept it and rewrite any + // `cdn.privacy-mgmt.com` URLs to the first-party proxy prefix. + // + // The trap is transparent: the getter returns the (patched) value and + // the setter accepts any shape the SDK expects. We also handle the + // case where `window._sp_` is already set before our script runs. + vec![format!( + concat!( + "", + ), + cdn_host = SOURCEPOINT_CDN_HOST, + cdn_prefix = SOURCEPOINT_CDN_PREFIX, + geo_host = SOURCEPOINT_GEO_HOST, + geo_prefix = SOURCEPOINT_GEO_PREFIX, + )] + } +} + #[cfg(test)] mod tests { use super::*; - use crate::integrations::IntegrationRegistry; + use crate::integrations::{IntegrationDocumentState, IntegrationRegistry}; use crate::test_support::tests::create_test_settings; use fastly::http::Method; use serde_json::json; @@ -634,4 +695,73 @@ mod tests { "should register geo proxy route" ); } + + #[test] + fn head_injector_emits_sp_property_trap() { + let integration = SourcepointIntegration::new(Arc::new(config(true))); + let document_state = IntegrationDocumentState::default(); + let ctx = IntegrationHtmlContext { + request_host: "ts.autoblog.com", + request_scheme: "https", + origin_host: "origin.autoblog.com", + document_state: &document_state, + }; + + let inserts = integration.head_inserts(&ctx); + assert_eq!(inserts.len(), 1, "should produce exactly one head insert"); + + let script = &inserts[0]; + assert!( + script.starts_with(""), + "should be wrapped in script tags: {script}", + ); + assert!( + script.contains("cdn.privacy-mgmt.com"), + "should reference the CDN host to rewrite: {script}", + ); + assert!( + script.contains("/integrations/sourcepoint/cdn"), + "should contain the first-party CDN prefix: {script}", + ); + assert!( + script.contains("geo.privacymanager.io"), + "should reference the geo host to rewrite: {script}", + ); + assert!( + script.contains("/integrations/sourcepoint/geo"), + "should contain the first-party geo prefix: {script}", + ); + assert!( + script.contains("Object.defineProperty"), + "should install a property trap on window._sp_: {script}", + ); + assert!( + script.contains("baseEndpoint"), + "should patch baseEndpoint in the config: {script}", + ); + assert!( + script.contains("metricUrl"), + "should patch metricUrl: {script}", + ); + } + + #[test] + fn head_injector_returns_empty_when_rewrite_disabled() { + let mut cfg = config(true); + cfg.rewrite_sdk = false; + let integration = SourcepointIntegration::new(Arc::new(cfg)); + let document_state = IntegrationDocumentState::default(); + let ctx = IntegrationHtmlContext { + request_host: "ts.autoblog.com", + request_scheme: "https", + origin_host: "origin.autoblog.com", + document_state: &document_state, + }; + + let inserts = integration.head_inserts(&ctx); + assert!( + inserts.is_empty(), + "should not inject anything when rewrite_sdk is false" + ); + } } From c4b26cb20c1937bef70f7e61227c150aa6a63590 Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 8 Apr 2026 16:05:26 -0500 Subject: [PATCH 05/11] Address review findings: scope Accept-Encoding, add logging, tighten visibility, improve docs --- .../src/integrations/sourcepoint.rs | 112 ++++++++++++++++-- docs/guide/integrations-overview.md | 21 +++- 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index eb8bfd39..29ebc4f6 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -1,3 +1,25 @@ +//! Sourcepoint integration for first-party CMP (Consent Management Platform) delivery. +//! +//! Proxies Sourcepoint's CDN (`cdn.privacy-mgmt.com`) and geo +//! (`geo.privacymanager.io`) endpoints through Trusted Server so the browser +//! loads consent management assets from first-party paths. +//! +//! ## Rewriting layers +//! +//! | Layer | Mechanism | What it catches | +//! |-------|-----------|-----------------| +//! | HTML attributes | `IntegrationAttributeRewriter` | Static ` ``` -Geo lookups are rewritten the same way: - -```text -https://geo.privacymanager.io/ --> https://publisher.example.com/integrations/sourcepoint/geo/ -``` - ## Client-Side Guard Single-page apps often insert CMP scripts after the initial HTML response. The `sourcepoint` tsjs module installs a DOM insertion guard so dynamically inserted Sourcepoint script and preload URLs are rewritten to first-party paths before the browser fetches them. ## Notes -- This first version intentionally scopes the integration to the Sourcepoint hosts observed on Autoblog: `cdn.privacy-mgmt.com` and `geo.privacymanager.io`. -- Adjacent privacy vendors and related endpoints can be added later without changing the integration shape. +- This version scopes the integration to `cdn.privacy-mgmt.com`. Additional Sourcepoint domains (e.g., `geo.privacymanager.io`) can be added later if publishers require them. ## See Also diff --git a/trusted-server.toml b/trusted-server.toml index 464028ab..f7de4630 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -73,7 +73,6 @@ api_origin = "https://api.privacy-center.org" enabled = false rewrite_sdk = true cdn_origin = "https://cdn.privacy-mgmt.com" -geo_origin = "https://geo.privacymanager.io" cache_ttl_seconds = 3600 [integrations.permutive] From 748fd3978d074da82168140e052adc0abbf33391 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 10 Apr 2026 10:48:23 -0500 Subject: [PATCH 08/11] Address blocking review findings: SSRF guard, dead code, Prettier - Replace #[validate(url)] with a custom validator that restricts cdn_origin to *.privacy-mgmt.com hosts, preventing SSRF via arbitrary origins (e.g. cloud metadata endpoints). - Remove unreachable PUT/PATCH arms from the request body match since routes() only registers GET and POST. - Fix Prettier formatting in script_guard.test.ts. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sourcepoint/script_guard.test.ts | 4 +-- .../src/integrations/sourcepoint.rs | 29 +++++++++++++++---- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts index 8aaffe9d..ea2c0aa4 100644 --- a/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts +++ b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts @@ -30,9 +30,7 @@ describe('Sourcepoint SDK Script Interception Guard', () => { }); it('rewrites CDN URLs to the first-party proxy path', () => { - expect( - rewriteSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages?env=prod') - ).toBe( + expect(rewriteSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages?env=prod')).toBe( `${window.location.origin}/integrations/sourcepoint/cdn/wrapper/v2/messages?env=prod` ); }); diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 2c0ee468..6153fd2a 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -27,7 +27,7 @@ use fastly::{Request, Response}; use regex::Regex; use serde::Deserialize; use url::Url; -use validator::Validate; +use validator::{Validate, ValidationError}; use crate::backend::BackendConfig; use crate::error::TrustedServerError; @@ -93,7 +93,7 @@ pub struct SourcepointConfig { pub rewrite_sdk: bool, /// Base URL for Sourcepoint CDN assets and API calls. #[serde(default = "default_cdn_origin")] - #[validate(url)] + #[validate(custom(function = "validate_cdn_origin"))] pub cdn_origin: String, /// Cache TTL for Sourcepoint static responses in seconds. #[serde(default = "default_cache_ttl")] @@ -123,6 +123,26 @@ fn default_cache_ttl() -> u32 { 3600 } +/// Validates that `cdn_origin` is a syntactically valid URL whose host ends +/// with `.privacy-mgmt.com`, preventing SSRF via arbitrary origins. +fn validate_cdn_origin(value: &str) -> Result<(), ValidationError> { + let url = Url::parse(value).map_err(|_| { + let mut err = ValidationError::new("invalid_url"); + err.message = Some("cdn_origin must be a valid URL".into()); + err + })?; + + let host = url.host_str().unwrap_or_default(); + if !host.ends_with(".privacy-mgmt.com") { + let mut err = ValidationError::new("disallowed_host"); + err.message = + Some("cdn_origin host must end with .privacy-mgmt.com".into()); + return Err(err); + } + + Ok(()) +} + struct SourcepointIntegration { config: Arc, } @@ -370,10 +390,7 @@ impl IntegrationProxy for SourcepointIntegration { proxy_req.set_header(header::ACCEPT_ENCODING, ae); } - if matches!( - req.get_method(), - &Method::POST | &Method::PUT | &Method::PATCH - ) { + if matches!(req.get_method(), &Method::POST) { if let Some(content_type) = req.get_header(header::CONTENT_TYPE) { proxy_req.set_header(header::CONTENT_TYPE, content_type); } From d7913d57aafe773bc08d5b3f411ce83127fe287c Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 10 Apr 2026 10:49:53 -0500 Subject: [PATCH 09/11] Harden Sourcepoint proxy: UTF-8 safety, redirects, CORS, quote handling - Replace take_body_str() with take_body_bytes() + String::from_utf8() to avoid panicking on non-UTF-8 upstream responses. - Rewrite Location headers on 3xx redirects that point to cdn.privacy-mgmt.com so browsers stay on the first-party proxy. - Preserve upstream CORS headers on the JS-rewrite path instead of discarding them when building a fresh Response. - Extend SP_ORIGIN_UNIFIED_PATTERN to match both single- and double-quoted "/unified/" chunk paths, preserving the original quote character in the replacement. - Normalise log prefixes from [sourcepoint] to Sourcepoint: for consistency with APS/Prebid style. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/integrations/sourcepoint.rs | 80 ++++++++++++++++--- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 6153fd2a..5b8d0466 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -72,13 +72,15 @@ static SP_CDN_URL_PATTERN: LazyLock = LazyLock::new(|| { /// The Sourcepoint wrapper builds its public path as: /// ```js /// t.origin + "/unified/4.40.1/" +/// // or single-quoted: +/// t.origin + '/unified/4.40.1/' /// ``` /// We rewrite this so chunks load through the first-party prefix: /// ```js /// t.origin + "/integrations/sourcepoint/cdn/unified/4.40.1/" /// ``` static SP_ORIGIN_UNIFIED_PATTERN: LazyLock = LazyLock::new(|| { - Regex::new(r#"\.origin\s*\+\s*"/unified/"#) + Regex::new(r#"\.origin\s*\+\s*(['"])/unified/"#) .expect("Sourcepoint origin+unified regex should compile") }); @@ -268,10 +270,10 @@ impl SourcepointIntegration { // Step 2: rewrite origin+"/unified/" to origin+"/integrations/sourcepoint/cdn/unified/". SP_ORIGIN_UNIFIED_PATTERN - .replace_all( - &after_cdn, - &format!(r#".origin+"{SOURCEPOINT_CDN_PREFIX}/unified/"#), - ) + .replace_all(&after_cdn, |caps: ®ex::Captures| { + let quote = &caps[1]; + format!(".origin+{quote}{SOURCEPOINT_CDN_PREFIX}/unified/") + }) .into_owned() } @@ -372,7 +374,7 @@ impl IntegrationProxy for SourcepointIntegration { .build_target_url(target_path, req.get_query_str()) .change_context(Self::error("Failed to build Sourcepoint target URL"))?; - log::info!("[sourcepoint] Proxying {method} {path} → {target_url}"); + log::info!("Sourcepoint: proxying {method} {path} → {target_url}"); let backend_name = BackendConfig::from_url(&self.config.cdn_origin, true) .change_context(Self::error("Failed to configure Sourcepoint backend"))?; @@ -402,19 +404,63 @@ impl IntegrationProxy for SourcepointIntegration { .change_context(Self::error("Sourcepoint upstream request failed"))?; log::info!( - "[sourcepoint] Upstream responded with status {}", + "Sourcepoint: upstream responded with status {}", response.get_status() ); + // Rewrite Location headers on redirect responses so the browser + // follows the redirect through the first-party proxy instead of + // leaking the CDN origin to the client. + if response.get_status().is_redirection() { + if let Some(location) = response + .get_header(header::LOCATION) + .and_then(|h| h.to_str().ok()) + .filter(|loc| loc.contains(SOURCEPOINT_CDN_HOST)) + { + let rewritten_location = location + .replace( + &format!("https://{SOURCEPOINT_CDN_HOST}"), + SOURCEPOINT_CDN_PREFIX, + ) + .replace( + &format!("http://{SOURCEPOINT_CDN_HOST}"), + SOURCEPOINT_CDN_PREFIX, + ); + log::info!( + "Sourcepoint: rewrote redirect Location to {rewritten_location}" + ); + response.set_header(header::LOCATION, &rewritten_location); + } + self.apply_cache_headers(&mut response); + return Ok(response); + } + // Rewrite CDN URLs inside JavaScript responses so that dynamically // loaded chunks and API calls route through the first-party proxy. if response.get_status() == StatusCode::OK && self.config.rewrite_sdk && Self::is_javascript_response(&response) { - log::info!("[sourcepoint] Rewriting JavaScript response body for {path}"); - - let body = response.take_body_str(); + log::info!("Sourcepoint: rewriting JavaScript response body for {path}"); + + let body_bytes = response.take_body_bytes(); + let body = match String::from_utf8(body_bytes) { + Ok(text) => text, + Err(err) => { + log::warn!( + "Sourcepoint: upstream body for {path} is not valid UTF-8, \ + passing through unmodified" + ); + let mut passthrough = Response::new(); + passthrough.set_status(response.get_status()); + if let Some(ct) = response.get_header(header::CONTENT_TYPE) { + passthrough.set_header(header::CONTENT_TYPE, ct); + } + passthrough.set_body(err.into_bytes()); + self.apply_cache_headers(&mut passthrough); + return Ok(passthrough); + } + }; let rewritten = Self::rewrite_script_content(&body); let mut new_response = Response::new(); @@ -427,6 +473,20 @@ impl IntegrationProxy for SourcepointIntegration { header::CACHE_CONTROL, format!("public, max-age={}", self.config.cache_ttl_seconds), ); + + // Preserve CORS headers from upstream so cross-origin consumers + // continue to work through the first-party proxy. + for header_name in [ + header::ACCESS_CONTROL_ALLOW_ORIGIN, + header::ACCESS_CONTROL_ALLOW_METHODS, + header::ACCESS_CONTROL_ALLOW_HEADERS, + header::ACCESS_CONTROL_EXPOSE_HEADERS, + ] { + if let Some(value) = response.get_header(&header_name) { + new_response.set_header(&header_name, value); + } + } + new_response.set_body(rewritten); return Ok(new_response); } From 6d92a50a3ec5f8b0d1bcb5ec9b3d98aebbb31ebc Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 10 Apr 2026 10:52:47 -0500 Subject: [PATCH 10/11] Clean up rewrite guard, add docs and validation tests - Remove redundant rewrite_sdk check from rewrite() since handles_attribute() already gates on it; update test to verify the guard at the handles_attribute level. - Add # Examples section to register() per documentation standards. - Add tests for cdn_origin validation (rejects non-privacy-mgmt.com hosts, accepts valid origins). - Add test for single-quoted origin+'/unified/' rewrite pattern. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/integrations/sourcepoint.rs | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 5b8d0466..52d5d9ea 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -137,8 +137,7 @@ fn validate_cdn_origin(value: &str) -> Result<(), ValidationError> { let host = url.host_str().unwrap_or_default(); if !host.ends_with(".privacy-mgmt.com") { let mut err = ValidationError::new("disallowed_host"); - err.message = - Some("cdn_origin host must end with .privacy-mgmt.com".into()); + err.message = Some("cdn_origin host must end with .privacy-mgmt.com".into()); return Err(err); } @@ -333,6 +332,12 @@ fn build( /// /// Returns an error when the Sourcepoint integration is enabled with invalid /// configuration. +/// +/// # Examples +/// +/// ```ignore +/// let registration = sourcepoint::register(&settings)?; +/// ``` pub fn register( settings: &Settings, ) -> Result, Report> { @@ -426,9 +431,7 @@ impl IntegrationProxy for SourcepointIntegration { &format!("http://{SOURCEPOINT_CDN_HOST}"), SOURCEPOINT_CDN_PREFIX, ); - log::info!( - "Sourcepoint: rewrote redirect Location to {rewritten_location}" - ); + log::info!("Sourcepoint: rewrote redirect Location to {rewritten_location}"); response.set_header(header::LOCATION, &rewritten_location); } self.apply_cache_headers(&mut response); @@ -511,10 +514,8 @@ impl IntegrationAttributeRewriter for SourcepointIntegration { attr_value: &str, ctx: &IntegrationAttributeContext<'_>, ) -> AttributeRewriteAction { - if !self.config.rewrite_sdk { - return AttributeRewriteAction::keep(); - } - + // `handles_attribute()` already gates on `rewrite_sdk`, so this + // method is only called when rewriting is enabled. if let Some(rewritten) = self.build_first_party_url(attr_value, ctx) { return AttributeRewriteAction::replace(rewritten); } @@ -746,17 +747,14 @@ mod tests { let mut cfg = config(true); cfg.rewrite_sdk = false; let integration = SourcepointIntegration::new(Arc::new(cfg)); - let ctx = IntegrationAttributeContext { - attribute_name: "src", - request_host: "edge.example.com", - request_scheme: "https", - origin_host: "origin.example.com", - }; - assert_eq!( - integration.rewrite("src", "https://cdn.privacy-mgmt.com/wrapper.js", &ctx,), - AttributeRewriteAction::keep(), - "should not rewrite when rewrite_sdk is false" + assert!( + !integration.handles_attribute("src"), + "should not handle src when rewrite_sdk is false" + ); + assert!( + !integration.handles_attribute("href"), + "should not handle href when rewrite_sdk is false" ); } @@ -839,4 +837,43 @@ mod tests { "should not inject anything when rewrite_sdk is false" ); } + + #[test] + fn rejects_cdn_origin_outside_privacy_mgmt_domain() { + let cfg = SourcepointConfig { + enabled: true, + rewrite_sdk: true, + cdn_origin: "http://169.254.169.254".to_string(), + cache_ttl_seconds: default_cache_ttl(), + }; + assert!( + cfg.validate().is_err(), + "should reject cdn_origin not on *.privacy-mgmt.com" + ); + } + + #[test] + fn accepts_valid_cdn_origin() { + let cfg = SourcepointConfig { + enabled: true, + rewrite_sdk: true, + cdn_origin: "https://cdn.privacy-mgmt.com".to_string(), + cache_ttl_seconds: default_cache_ttl(), + }; + assert!( + cfg.validate().is_ok(), + "should accept cdn_origin on *.privacy-mgmt.com" + ); + } + + #[test] + fn rewrites_single_quoted_origin_plus_unified_pattern() { + let input = r#"return t.origin+'/unified/4.40.1/'}"#; + let output = SourcepointIntegration::rewrite_script_content(input); + + assert_eq!( + output, r#"return t.origin+'/integrations/sourcepoint/cdn/unified/4.40.1/'}"#, + "should rewrite single-quoted unified path" + ); + } } From 205be6431bad89f3be1d1f8b0bc6b9842487bb30 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 10 Apr 2026 12:04:59 -0500 Subject: [PATCH 11/11] Fix CodeQL URL sanitization alerts, add response body size guard Refactor normalizeSourcepointUrl to remove the bare-domain startsWith check that triggered CodeQL "Incomplete URL substring sanitization" alerts. The host === exact match was already the security boundary; now the normalization layer no longer references the CDN hostname at all, eliminating the static analysis finding. Add a Content-Length guard (5 MB) before reading upstream response bodies into memory for JavaScript rewriting, preventing unbounded memory consumption from unexpectedly large responses. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../integrations/sourcepoint/script_guard.ts | 17 ++++---------- .../sourcepoint/script_guard.test.ts | 7 ++++++ .../src/integrations/sourcepoint.rs | 23 +++++++++++++++++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/crates/js/lib/src/integrations/sourcepoint/script_guard.ts b/crates/js/lib/src/integrations/sourcepoint/script_guard.ts index 08bc844e..21eaea12 100644 --- a/crates/js/lib/src/integrations/sourcepoint/script_guard.ts +++ b/crates/js/lib/src/integrations/sourcepoint/script_guard.ts @@ -8,19 +8,12 @@ function normalizeSourcepointUrl(url: string): string | null { const trimmed = url.trim(); if (!trimmed) return null; - if (trimmed.startsWith('//')) { - return `https:${trimmed}`; - } - - if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) { - return trimmed; - } - - if (trimmed.startsWith(SOURCEPOINT_CDN_HOST)) { - return `https://${trimmed}`; - } + if (trimmed.startsWith('//')) return `https:${trimmed}`; + if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) return trimmed; - return null; + // Bare domain or path — attempt to parse as https URL. + // The host === check in isSourcepointUrl rejects non-matching domains. + return `https://${trimmed}`; } function parseSourcepointUrl(url: string): URL | null { diff --git a/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts index ea2c0aa4..c075600a 100644 --- a/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts +++ b/crates/js/lib/test/integrations/sourcepoint/script_guard.test.ts @@ -25,10 +25,17 @@ describe('Sourcepoint SDK Script Interception Guard', () => { it('detects Sourcepoint CDN URLs', () => { expect(isSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages')).toBe(true); expect(isSourcepointUrl('//cdn.privacy-mgmt.com/mms/v2/get_site_data')).toBe(true); + expect(isSourcepointUrl('cdn.privacy-mgmt.com/consent/tcfv2')).toBe(true); expect(isSourcepointUrl('https://example.com/script.js')).toBe(false); expect(isSourcepointUrl('https://geo.privacymanager.io/')).toBe(false); }); + it('rejects subdomain-spoofing URLs', () => { + expect(isSourcepointUrl('cdn.privacy-mgmt.com.evil.com/script.js')).toBe(false); + expect(isSourcepointUrl('https://cdn.privacy-mgmt.com.evil.com/')).toBe(false); + expect(isSourcepointUrl('notcdn.privacy-mgmt.com/path')).toBe(false); + }); + it('rewrites CDN URLs to the first-party proxy path', () => { expect(rewriteSourcepointUrl('https://cdn.privacy-mgmt.com/wrapper/v2/messages?env=prod')).toBe( `${window.location.origin}/integrations/sourcepoint/cdn/wrapper/v2/messages?env=prod` diff --git a/crates/trusted-server-core/src/integrations/sourcepoint.rs b/crates/trusted-server-core/src/integrations/sourcepoint.rs index 52d5d9ea..f155c0a5 100644 --- a/crates/trusted-server-core/src/integrations/sourcepoint.rs +++ b/crates/trusted-server-core/src/integrations/sourcepoint.rs @@ -42,6 +42,11 @@ const SOURCEPOINT_INTEGRATION_ID: &str = "sourcepoint"; const SOURCEPOINT_CDN_HOST: &str = "cdn.privacy-mgmt.com"; const SOURCEPOINT_CDN_PREFIX: &str = "/integrations/sourcepoint/cdn"; +/// Maximum response body size (5 MB) that will be read into memory for +/// JavaScript rewriting. Responses larger than this are passed through +/// unmodified to avoid unbounded memory consumption. +const MAX_REWRITE_BODY_SIZE: u64 = 5 * 1024 * 1024; + /// Matches quoted references to `cdn.privacy-mgmt.com` URLs in script content. /// /// Pattern breakdown: @@ -446,6 +451,24 @@ impl IntegrationProxy for SourcepointIntegration { { log::info!("Sourcepoint: rewriting JavaScript response body for {path}"); + // Guard against unexpectedly large responses to avoid unbounded + // memory consumption during rewriting. + if let Some(content_length) = response + .get_header(header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + { + if content_length > MAX_REWRITE_BODY_SIZE { + log::warn!( + "Sourcepoint: response body for {path} exceeds {} bytes \ + (Content-Length: {content_length}), skipping rewrite", + MAX_REWRITE_BODY_SIZE + ); + self.apply_cache_headers(&mut response); + return Ok(response); + } + } + let body_bytes = response.take_body_bytes(); let body = match String::from_utf8(body_bytes) { Ok(text) => text,