diff --git a/.changeset/frame-metadata-user-data-addition.md b/.changeset/frame-metadata-user-data-addition.md new file mode 100644 index 000000000..8fba9917b --- /dev/null +++ b/.changeset/frame-metadata-user-data-addition.md @@ -0,0 +1,7 @@ +--- +livekit: minor +livekit-ffi: minor +livekit-protocol: minor +--- + +Add `user_data` support to frame metadata, allowing arbitrary application-supplied bytes to be attached to a video frame via the `PTF_USER_DATA` packet trailer feature. diff --git a/examples/local_video/src/publisher.rs b/examples/local_video/src/publisher.rs index c9cbb93c2..5f85660df 100644 --- a/examples/local_video/src/publisher.rs +++ b/examples/local_video/src/publisher.rs @@ -33,6 +33,7 @@ use yuv_sys; mod argus; mod codec_display; mod test_pattern; +mod user_data; mod timestamp_burn; mod video_display; mod viewport_aspect; @@ -250,6 +251,13 @@ struct Args { #[arg(long, default_value_t = false)] attach_frame_id: bool, + /// Attach keyboard-controlled 6-channel data (6x int16 fixed-point, 12 bytes) + /// as the per-frame user_data trailer field. Control the channels from the + /// preview window: Q/A=CH1, W/S=CH2, E/D=CH3, R/F=CH4, T/G=CH5, Y/H=CH6. + /// Requires --display-video (the window provides keyboard focus). + #[arg(long, default_value_t = false, requires = "display_video")] + attach_user_data: bool, + /// Open a window that displays the video frames being published #[arg(long, default_value_t = false)] display_video: bool, @@ -1181,6 +1189,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { let mut frame_metadata_features = FrameMetadataFeatures::default(); frame_metadata_features.user_timestamp = args.attach_timestamp; frame_metadata_features.frame_id = args.attach_frame_id; + frame_metadata_features.user_data = args.attach_user_data; let publish_opts = |codec: VideoCodec| TrackPublishOptions { source: TrackSource::Camera, @@ -1222,6 +1231,12 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { display_timing: args.display_timing, }; + // Shared keyboard-controlled channel values, written by the preview window + // and read by the capture loop to fill the user_data trailer. + let user_data_channels = args + .attach_user_data + .then(|| Arc::new(Mutex::new([0.0f32; user_data::NUM_CHANNELS]))); + let publish_stats_task = tokio::spawn(update_publisher_video_stats(track.clone(), ctrl_c_received.clone())); @@ -1235,6 +1250,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { session, width, height, + user_data_channels.clone(), ) .await; let _ = publish_stats_task.await; @@ -1264,6 +1280,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { height, Some(shared.clone()), publish_timing_state.clone(), + user_data_channels.clone(), )); let display_result = video_display::run_display( @@ -1271,6 +1288,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { shared, ctrl_c_received.clone(), Some(width as f32 / height as f32), + user_data_channels.clone(), ); let capture_result = capture_task.await?; @@ -1289,6 +1307,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { height, None, publish_timing_state.clone(), + user_data_channels.clone(), ) .await; let _ = publish_stats_task.await; @@ -1310,6 +1329,7 @@ async fn run_capture_loop( height: u32, display_shared: Option>>, publish_timing_state: Option>>, + user_data_channels: Option>>, ) -> Result<()> { // Pace publishing at the requested FPS (not the camera-reported FPS) to hit desired cadence let pace_fps = config.fps as f64; @@ -1573,8 +1593,10 @@ async fn run_capture_loop( if burned_timestamp_us.is_some() { debug_assert_eq!(burned_timestamp_us, Some(capture_wall_time_us)); } - frame.frame_metadata = if user_ts.is_some() || fid.is_some() { - Some(FrameMetadata { user_timestamp: user_ts, frame_id: fid }) + let user_data = + user_data_channels.as_ref().map(|targets| user_data::encode(&targets.lock())); + frame.frame_metadata = if user_ts.is_some() || fid.is_some() || user_data.is_some() { + Some(FrameMetadata { user_timestamp: user_ts, frame_id: fid, user_data }) } else { None }; @@ -1702,6 +1724,7 @@ async fn run_argus_capture_loop( session: argus::ArgusCaptureSession, width: u32, height: u32, + user_data_channels: Option>>, ) -> Result<()> { let capture_handle = std::thread::Builder::new() .name("mipi-capture".into()) @@ -1818,8 +1841,10 @@ async fn run_argus_capture_loop( } else { None }; - let frame_metadata = if user_ts.is_some() || fid.is_some() { - Some(FrameMetadata { user_timestamp: user_ts, frame_id: fid }) + let user_data = + user_data_channels.as_ref().map(|targets| user_data::encode(&targets.lock())); + let frame_metadata = if user_ts.is_some() || fid.is_some() || user_data.is_some() { + Some(FrameMetadata { user_timestamp: user_ts, frame_id: fid, user_data }) } else { None }; diff --git a/examples/local_video/src/subscriber.rs b/examples/local_video/src/subscriber.rs index 54f4fff5e..cceb55d6b 100644 --- a/examples/local_video/src/subscriber.rs +++ b/examples/local_video/src/subscriber.rs @@ -13,7 +13,7 @@ use livekit_api::access_token; use log::{debug, info}; use parking_lot::Mutex; use std::{ - collections::HashMap, + collections::{HashMap, VecDeque}, env, sync::OnceLock, sync::{ @@ -24,6 +24,7 @@ use std::{ }; mod codec_display; +mod user_data; mod subscriber_timing; mod viewport_aspect; @@ -946,7 +947,7 @@ async fn handle_track_subscribed( if drained_frames > 0 { debug!("Dropped {drained_frames} stale decoded frames before render upload"); } - if let Some(metadata) = frame.frame_metadata { + if let Some(metadata) = &frame.frame_metadata { if let Some(capture_timestamp_us) = metadata.user_timestamp { subscriber_timing_sink.record_frame_received_by_sink( capture_timestamp_us, @@ -1120,6 +1121,81 @@ fn subscriber_overlay_lines( Some(lines) } +/// Render a live line graph of the six decoded channel values (top-right overlay). +/// Each trace is normalized so ±`VALUE_RANGE` spans the plot height. +fn paint_channel_graph(ctx: &egui::Context, history: &VecDeque<[f32; user_data::NUM_CHANNELS]>) { + if history.is_empty() { + return; + } + let latest = *history.back().unwrap(); + + egui::Area::new("channel_graph".into()) + .anchor(egui::Align2::RIGHT_TOP, egui::vec2(-10.0, 10.0)) + .interactable(false) + .show(ctx, |ui| { + egui::Frame::NONE + .fill(egui::Color32::from_black_alpha(180)) + .corner_radius(egui::CornerRadius::same(4)) + .inner_margin(egui::Margin::same(8)) + .show(ui, |ui| { + let plot_size = egui::vec2(360.0, 160.0); + // Pin the panel to the plot width so the legend wraps within it + // instead of stretching the frame wider than the graph. + ui.set_max_width(plot_size.x); + + ui.label( + egui::RichText::new("user_data channels") + .monospace() + .size(12.0) + .color(egui::Color32::WHITE), + ); + + let (rect, _) = ui.allocate_exact_size(plot_size, egui::Sense::hover()); + let painter = ui.painter_at(rect); + + // Zero axis. + painter.hline( + rect.x_range(), + rect.center().y, + egui::Stroke::new(1.0, egui::Color32::from_gray(90)), + ); + + let n = history.len(); + let denom = (n.saturating_sub(1)).max(1) as f32; + let half_h = rect.height() / 2.0 - 2.0; + for j in 0..user_data::NUM_CHANNELS { + let points: Vec = history + .iter() + .enumerate() + .map(|(i, sample)| { + let x = rect.left() + (i as f32 / denom) * rect.width(); + let norm = (sample[j] / user_data::VALUE_RANGE) + .clamp(-1.0, 1.0); + let y = rect.center().y - norm * half_h; + egui::pos2(x, y) + }) + .collect(); + painter.add(egui::Shape::line( + points, + egui::Stroke::new(1.5, CHANNEL_COLORS[j]), + )); + } + + // Legend: current value per channel. + ui.horizontal_wrapped(|ui| { + for (j, value) in latest.iter().enumerate() { + ui.label( + egui::RichText::new(format!("CH{}: {:>+6.2}", j + 1, value)) + .monospace() + .size(11.0) + .color(CHANNEL_COLORS[j]), + ); + } + }); + }); + }); +} + fn paint_subscriber_overlay(ctx: &egui::Context, lines: &[String]) { egui::Area::new("subscriber_overlay".into()) .anchor(egui::Align2::LEFT_TOP, egui::vec2(10.0, 10.0)) @@ -1182,6 +1258,19 @@ fn handle_track_unpublished( clear_hud_and_simulcast(shared, frame_slot, video_size, simulcast, subscriber_timing); } +/// Number of channel samples retained for the live graph (~10s at 30fps). +const CHANNEL_HISTORY_LEN: usize = 300; + +/// Distinct colors for the six channel traces. +const CHANNEL_COLORS: [egui::Color32; user_data::NUM_CHANNELS] = [ + egui::Color32::from_rgb(0xef, 0x53, 0x50), // red + egui::Color32::from_rgb(0xff, 0xa7, 0x26), // orange + egui::Color32::from_rgb(0xff, 0xee, 0x58), // yellow + egui::Color32::from_rgb(0x66, 0xbb, 0x6a), // green + egui::Color32::from_rgb(0x42, 0xa5, 0xf5), // blue + egui::Color32::from_rgb(0xab, 0x47, 0xbc), // purple +]; + struct VideoApp { shared: Arc>, frame_slot: Arc, @@ -1192,6 +1281,8 @@ struct VideoApp { ctrl_c_received: Arc, viewport: AspectConstrainedViewport, display_timestamp: bool, + /// Rolling history of decoded channel values from the user_data trailer. + channel_history: VecDeque<[f32; user_data::NUM_CHANNELS]>, } impl eframe::App for VideoApp { @@ -1208,7 +1299,7 @@ impl eframe::App for VideoApp { let render_frame = self.frame_slot.take(); if let Some(frame) = render_frame.as_ref() { - if let Some(metadata) = frame.frame_metadata { + if let Some(metadata) = &frame.frame_metadata { if let Some(capture_timestamp_us) = metadata.user_timestamp { self.subscriber_timing.record_frame_selected_for_render( capture_timestamp_us, @@ -1216,6 +1307,13 @@ impl eframe::App for VideoApp { current_timestamp_us(), ); } + // Decode the 6 user_data channel values for the live graph. + if let Some(values) = metadata.user_data.as_deref().and_then(user_data::decode) { + if self.channel_history.len() >= CHANNEL_HISTORY_LEN { + self.channel_history.pop_front(); + } + self.channel_history.push_back(values); + } } } @@ -1255,6 +1353,8 @@ impl eframe::App for VideoApp { paint_subscriber_overlay(ctx, lines); } + paint_channel_graph(ctx, &self.channel_history); + // Simulcast layer controls: bottom-left overlay egui::Area::new("simulcast_controls".into()) .anchor(egui::Align2::LEFT_BOTTOM, egui::vec2(10.0, -10.0)) @@ -1450,6 +1550,7 @@ async fn run(args: Args, ctrl_c_received: Arc) -> Result<()> { ctrl_c_received: ctrl_c_received.clone(), viewport, display_timestamp: args.display_timestamp, + channel_history: VecDeque::with_capacity(CHANNEL_HISTORY_LEN), }; let native_options = viewport_aspect::native_options(None); eframe::run_native( @@ -1797,8 +1898,8 @@ impl CallbackTrait for YuvPaintCallback { let frame_for_upload = self.render_frame.lock().take().map(|frame| { let prepare_timestamp_us = current_timestamp_us(); - let frame_id = frame.frame_metadata.and_then(|m| m.frame_id); - let sample = frame.frame_metadata.and_then(|metadata| { + let frame_id = frame.frame_metadata.as_ref().and_then(|m| m.frame_id); + let sample = frame.frame_metadata.as_ref().and_then(|metadata| { metadata.user_timestamp.map(|capture_timestamp_us| PendingPaintSample { frame_id, capture_timestamp_us, diff --git a/examples/local_video/src/user_data.rs b/examples/local_video/src/user_data.rs new file mode 100644 index 000000000..128004c46 --- /dev/null +++ b/examples/local_video/src/user_data.rs @@ -0,0 +1,81 @@ +//! Shared 6-channel codec for the `--attach-user-data` demo. +//! +//! Six channel values are encoded as little-endian `int16` fixed-point, 2 bytes +//! per channel = 12 bytes total, and shipped in the `user_data` frame-metadata +//! trailer field. The full `int16` range maps to `±VALUE_RANGE`, giving +//! ~1/32767 of the range in resolution — well within the ~232-byte trailer +//! budget. +//! +//! Both the `publisher` and `subscriber` binaries include this file via +//! `mod user_data;` so they agree on the wire format. + +/// Number of channels carried in the user_data payload. +pub const NUM_CHANNELS: usize = 6; + +/// Encoded payload size in bytes (2 bytes per channel). +pub const ENCODED_LEN: usize = NUM_CHANNELS * 2; + +/// Value that maps to `i16::MAX`. Channel values are normalized to +/// `±VALUE_RANGE` before quantization. +pub const VALUE_RANGE: f32 = 1.0; + +/// Value units per `int16` step. +fn scale() -> f32 { + VALUE_RANGE / i16::MAX as f32 +} + +/// Clamp a channel value to the encodable `±VALUE_RANGE` range. +pub fn clamp_value(value: f32) -> f32 { + value.clamp(-VALUE_RANGE, VALUE_RANGE) +} + +/// Encode 6 channel values into 12 little-endian `int16` bytes. +pub fn encode(values: &[f32; NUM_CHANNELS]) -> Vec { + let s = scale(); + let mut buf = Vec::with_capacity(ENCODED_LEN); + for &v in values { + let q = (v / s).round().clamp(i16::MIN as f32, i16::MAX as f32) as i16; + buf.extend_from_slice(&q.to_le_bytes()); + } + buf +} + +/// Decode 6 channel values from the `user_data` payload. Returns `None` if the +/// buffer is too short to hold all six values. +pub fn decode(buf: &[u8]) -> Option<[f32; NUM_CHANNELS]> { + if buf.len() < ENCODED_LEN { + return None; + } + let s = scale(); + let mut out = [0.0f32; NUM_CHANNELS]; + for (i, chunk) in buf.chunks_exact(2).take(NUM_CHANNELS).enumerate() { + out[i] = i16::from_le_bytes([chunk[0], chunk[1]]) as f32 * s; + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trips_within_quantization_error() { + let values = [0.0, 0.5, -0.75, 1.0, -0.1, 0.9]; + let decoded = decode(&encode(&values)).unwrap(); + for (v, d) in values.iter().zip(decoded.iter()) { + assert!((v - d).abs() <= scale(), "got {d}, expected ~{v}"); + } + } + + #[test] + fn clamp_keeps_within_range() { + assert_eq!(clamp_value(100.0), VALUE_RANGE); + assert_eq!(clamp_value(-100.0), -VALUE_RANGE); + assert_eq!(clamp_value(0.5), 0.5); + } + + #[test] + fn decode_rejects_short_buffer() { + assert!(decode(&[0u8; ENCODED_LEN - 1]).is_none()); + } +} diff --git a/examples/local_video/src/video_display.rs b/examples/local_video/src/video_display.rs index 9acb7fd1e..3d809d0fb 100644 --- a/examples/local_video/src/video_display.rs +++ b/examples/local_video/src/video_display.rs @@ -593,11 +593,54 @@ mod tests { } } +/// Per-channel (increment, decrement) keys: Q/A=CH1, W/S=CH2, E/D=CH3, R/F=CH4, +/// T/G=CH5, Y/H=CH6. +const CHANNEL_KEYS: [(egui::Key, egui::Key); crate::user_data::NUM_CHANNELS] = [ + (egui::Key::Q, egui::Key::A), + (egui::Key::W, egui::Key::S), + (egui::Key::E, egui::Key::D), + (egui::Key::R, egui::Key::F), + (egui::Key::T, egui::Key::G), + (egui::Key::Y, egui::Key::H), +]; + +/// How fast a held key drives a channel (value units/second). Full `±VALUE_RANGE` +/// span in ~2s. +const CHANNEL_RATE_PER_S: f32 = 1.0; + +type ChannelValues = Arc>; + struct VideoApp { shared: Arc>, ctrl_c_received: Arc, viewport: AspectConstrainedViewport, timing_overlay_state: PublisherTimingOverlayState, + /// Keyboard-controlled user_data channel values shared with the capture loop. + channels: Option, +} + +/// Apply held-key deltas to the shared channel values and return the current +/// values for display. Returns `None` when channel control is off. +fn drive_channels( + ctx: &egui::Context, + targets: &ChannelValues, +) -> [f32; crate::user_data::NUM_CHANNELS] { + let dt = ctx.input(|i| i.stable_dt).min(0.1); + let mut values = targets.lock(); + for (idx, (inc, dec)) in CHANNEL_KEYS.iter().enumerate() { + let (inc_down, dec_down) = ctx.input(|i| (i.key_down(*inc), i.key_down(*dec))); + let mut delta = 0.0; + if inc_down { + delta += CHANNEL_RATE_PER_S * dt; + } + if dec_down { + delta -= CHANNEL_RATE_PER_S * dt; + } + if delta != 0.0 { + values[idx] = crate::user_data::clamp_value(values[idx] + delta); + } + } + *values } impl eframe::App for VideoApp { @@ -613,6 +656,8 @@ impl eframe::App for VideoApp { self.viewport.set_video_size(ctx, width, height); } + let channel_values = self.channels.as_ref().map(|targets| drive_channels(ctx, targets)); + egui::CentralPanel::default().frame(egui::Frame::NONE).show(ctx, |ui| { ui.ctx().request_repaint(); @@ -662,21 +707,58 @@ impl eframe::App for VideoApp { }); }); + if let Some(values) = channel_values { + paint_channel_controls(ctx, &values); + } + ctx.request_repaint_after(viewport_aspect::VIDEO_REPAINT_INTERVAL); } } +/// Bottom-left HUD listing the channel key bindings and current values. +fn paint_channel_controls(ctx: &egui::Context, values: &[f32; crate::user_data::NUM_CHANNELS]) { + const KEY_LABELS: [&str; crate::user_data::NUM_CHANNELS] = + ["Q/A", "W/S", "E/D", "R/F", "T/G", "Y/H"]; + let mut lines = vec!["user_data channels".to_string()]; + for (idx, value) in values.iter().enumerate() { + lines.push(format!("CH{} [{}]: {:>+6.2}", idx + 1, KEY_LABELS[idx], value)); + } + + egui::Area::new("channel_controls".into()) + .anchor(egui::Align2::LEFT_BOTTOM, egui::vec2(10.0, -10.0)) + .interactable(false) + .show(ctx, |ui| { + egui::Frame::NONE + .fill(egui::Color32::from_black_alpha(160)) + .corner_radius(egui::CornerRadius::same(4)) + .inner_margin(egui::Margin::same(6)) + .show(ui, |ui| { + ui.add( + egui::Label::new( + egui::RichText::new(lines.join("\n")) + .monospace() + .size(12.0) + .color(egui::Color32::WHITE), + ) + .extend(), + ); + }); + }); +} + pub(crate) fn run_display( title: &str, shared: Arc>, ctrl_c_received: Arc, initial_aspect: Option, + channels: Option, ) -> Result<()> { let app = VideoApp { shared, ctrl_c_received: ctrl_c_received.clone(), viewport: AspectConstrainedViewport::new(initial_aspect), timing_overlay_state: PublisherTimingOverlayState::default(), + channels, }; let native_options = viewport_aspect::native_options(initial_aspect); let result = eframe::run_native(title, native_options, Box::new(|_| Ok(Box::new(app)))); diff --git a/libwebrtc/src/native/packet_trailer.rs b/libwebrtc/src/native/packet_trailer.rs index 6ba68a909..476b01a9f 100644 --- a/libwebrtc/src/native/packet_trailer.rs +++ b/libwebrtc/src/native/packet_trailer.rs @@ -159,13 +159,15 @@ impl PacketTrailerHandler { } /// Lookup the frame metadata for a given RTP timestamp (receiver side). - /// Returns `Some((user_timestamp, frame_id))` if found, `None` otherwise. - /// The entry is removed from the map after a successful lookup. - pub fn lookup_frame_metadata(&self, rtp_timestamp: u32) -> Option<(u64, u32)> { + /// Returns `Some((user_timestamp, frame_id, user_data))` if found, + /// `None` otherwise. The entry is removed from the map after a + /// successful lookup. + pub fn lookup_frame_metadata(&self, rtp_timestamp: u32) -> Option<(u64, u32, Vec)> { let ts = self.sys_handle.lookup_timestamp(rtp_timestamp); if ts != u64::MAX { let frame_id = self.sys_handle.last_lookup_frame_id(); - Some((ts, frame_id)) + let user_data = self.sys_handle.last_lookup_user_data(); + Some((ts, frame_id, user_data)) } else { None } @@ -188,8 +190,14 @@ impl PacketTrailerHandler { capture_timestamp_us: i64, user_timestamp: u64, frame_id: u32, + user_data: &[u8], ) { - self.sys_handle.store_frame_metadata(capture_timestamp_us, user_timestamp, frame_id); + self.sys_handle.store_frame_metadata( + capture_timestamp_us, + user_timestamp, + frame_id, + user_data, + ); } pub(crate) fn sys_handle(&self) -> SharedPtr { diff --git a/libwebrtc/src/native/video_source.rs b/libwebrtc/src/native/video_source.rs index 4bbe47fcf..323e61c88 100644 --- a/libwebrtc/src/native/video_source.rs +++ b/libwebrtc/src/native/video_source.rs @@ -89,6 +89,7 @@ impl NativeVideoSource { has_packet_trailer: false, user_timestamp: 0, frame_id: 0, + user_data: Vec::new(), }, ); } @@ -115,9 +116,14 @@ impl NativeVideoSource { }; builder.pin_mut().set_timestamp_us(capture_ts); - let (has_trailer, user_ts, fid) = match frame.frame_metadata { - Some(meta) => (true, meta.user_timestamp.unwrap_or(0), meta.frame_id.unwrap_or(0)), - None => (false, 0, 0), + let (has_trailer, user_ts, fid, user_data) = match &frame.frame_metadata { + Some(meta) => ( + true, + meta.user_timestamp.unwrap_or(0), + meta.frame_id.unwrap_or(0), + meta.user_data.clone().unwrap_or_default(), + ), + None => (false, 0, 0, Vec::new()), }; self.inner.lock().captured_frames += 1; @@ -128,6 +134,7 @@ impl NativeVideoSource { has_packet_trailer: has_trailer, user_timestamp: user_ts, frame_id: fid, + user_data, }, ); } @@ -167,9 +174,14 @@ impl NativeVideoSource { timestamp_us: i64, frame_metadata: Option, ) -> bool { - let (has_trailer, user_ts, fid) = match frame_metadata { - Some(meta) => (true, meta.user_timestamp.unwrap_or(0), meta.frame_id.unwrap_or(0)), - None => (false, 0, 0), + let (has_trailer, user_ts, fid, user_data) = match frame_metadata { + Some(meta) => ( + true, + meta.user_timestamp.unwrap_or(0), + meta.frame_id.unwrap_or(0), + meta.user_data.unwrap_or_default(), + ), + None => (false, 0, 0, Vec::new()), }; self.inner.lock().captured_frames += 1; @@ -183,6 +195,7 @@ impl NativeVideoSource { has_packet_trailer: has_trailer, user_timestamp: user_ts, frame_id: fid, + user_data, }, ) } diff --git a/libwebrtc/src/native/video_stream.rs b/libwebrtc/src/native/video_stream.rs index 9edcc0f2a..1828dcb0d 100644 --- a/libwebrtc/src/native/video_stream.rs +++ b/libwebrtc/src/native/video_stream.rs @@ -112,14 +112,15 @@ impl VideoTrackObserver { ) -> Option { handler .and_then(|handler| { - handler.lookup_frame_metadata(rtp_timestamp).map(|(ts, fid)| { + handler.lookup_frame_metadata(rtp_timestamp).map(|(ts, fid, user_data)| { handler.emit_subscribe_timing(SubscribeTimingStage::DecoderOutput, ts, fid); - (ts, fid) + (ts, fid, user_data) }) }) - .map(|(ts, fid)| FrameMetadata { + .map(|(ts, fid, user_data)| FrameMetadata { user_timestamp: Some(ts), frame_id: if fid != 0 { Some(fid) } else { None }, + user_data: if user_data.is_empty() { None } else { Some(user_data) }, }) } } diff --git a/libwebrtc/src/video_frame.rs b/libwebrtc/src/video_frame.rs index 3f90768a7..1c728c3e9 100644 --- a/libwebrtc/src/video_frame.rs +++ b/libwebrtc/src/video_frame.rs @@ -55,13 +55,20 @@ pub enum VideoBufferType { /// Metadata carried alongside a video frame via the packet trailer mechanism. /// /// Each field corresponds to an independently negotiable packet trailer feature -/// (`PTF_USER_TIMESTAMP`, `PTF_FRAME_ID`), so individual fields are `Option`. -#[derive(Debug, Clone, Copy)] +/// (`PTF_USER_TIMESTAMP`, `PTF_FRAME_ID`, `PTF_USER_DATA`), so individual fields +/// are `Option`. +#[derive(Debug, Clone)] pub struct FrameMetadata { /// Wall-clock capture time in microseconds, when `PTF_USER_TIMESTAMP` is enabled. pub user_timestamp: Option, /// Monotonically increasing frame identifier, when `PTF_FRAME_ID` is enabled. pub frame_id: Option, + /// Arbitrary application-supplied bytes, when `PTF_USER_DATA` is enabled. + /// + /// Bounded by the packet trailer size budget (~232 bytes when the other + /// features are also active); oversize payloads are dropped on the send + /// side rather than truncated. + pub user_data: Option>, } #[derive(Debug)] diff --git a/livekit-ffi-node-bindings/proto/track_pb.d.ts b/livekit-ffi-node-bindings/proto/track_pb.d.ts index 8aaf93650..03c2c8027 100644 --- a/livekit-ffi-node-bindings/proto/track_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/track_pb.d.ts @@ -148,6 +148,11 @@ export declare enum FrameMetadataFeature { * @generated from enum value: FMF_FRAME_ID = 1; */ FMF_FRAME_ID = 1, + + /** + * @generated from enum value: FMF_USER_DATA = 2; + */ + FMF_USER_DATA = 2, } /** diff --git a/livekit-ffi-node-bindings/proto/track_pb.js b/livekit-ffi-node-bindings/proto/track_pb.js index 8ecc19d4d..55d07ff5c 100644 --- a/livekit-ffi-node-bindings/proto/track_pb.js +++ b/livekit-ffi-node-bindings/proto/track_pb.js @@ -87,6 +87,7 @@ const FrameMetadataFeature = /*@__PURE__*/ proto2.makeEnum( [ {no: 0, name: "FMF_USER_TIMESTAMP"}, {no: 1, name: "FMF_FRAME_ID"}, + {no: 2, name: "FMF_USER_DATA"}, ], ); diff --git a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts index e0ec12f19..76a56bb4d 100644 --- a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts @@ -701,6 +701,11 @@ export declare class FrameMetadata extends Message { */ frameId?: number; + /** + * @generated from field: optional bytes user_data = 3; + */ + userData?: Uint8Array; + constructor(data?: PartialMessage); static readonly runtime: typeof proto2; diff --git a/livekit-ffi-node-bindings/proto/video_frame_pb.js b/livekit-ffi-node-bindings/proto/video_frame_pb.js index 331320682..8cffe1708 100644 --- a/livekit-ffi-node-bindings/proto/video_frame_pb.js +++ b/livekit-ffi-node-bindings/proto/video_frame_pb.js @@ -281,6 +281,7 @@ const FrameMetadata = /*@__PURE__*/ proto2.makeMessageType( () => [ { no: 1, name: "user_timestamp", kind: "scalar", T: 4 /* ScalarType.UINT64 */, opt: true }, { no: 2, name: "frame_id", kind: "scalar", T: 13 /* ScalarType.UINT32 */, opt: true }, + { no: 3, name: "user_data", kind: "scalar", T: 12 /* ScalarType.BYTES */, opt: true }, ], ); diff --git a/livekit-ffi/protocol/track.proto b/livekit-ffi/protocol/track.proto index b6eb77551..f952992c8 100644 --- a/livekit-ffi/protocol/track.proto +++ b/livekit-ffi/protocol/track.proto @@ -164,4 +164,5 @@ enum AudioTrackFeature { enum FrameMetadataFeature { FMF_USER_TIMESTAMP = 0; FMF_FRAME_ID = 1; + FMF_USER_DATA = 2; } diff --git a/livekit-ffi/protocol/video_frame.proto b/livekit-ffi/protocol/video_frame.proto index ff91fa3c6..a4b610e0e 100644 --- a/livekit-ffi/protocol/video_frame.proto +++ b/livekit-ffi/protocol/video_frame.proto @@ -157,6 +157,7 @@ message OwnedVideoBuffer { message FrameMetadata { optional uint64 user_timestamp = 1; optional uint32 frame_id = 2; + optional bytes user_data = 3; } // diff --git a/livekit-ffi/src/conversion/room.rs b/livekit-ffi/src/conversion/room.rs index d76313c24..9cdf873d8 100644 --- a/livekit-ffi/src/conversion/room.rs +++ b/livekit-ffi/src/conversion/room.rs @@ -44,6 +44,9 @@ fn frame_metadata_features_from_proto(features: Vec) -> FrameMetadataFeatur proto::FrameMetadataFeature::FmfFrameId => { frame_metadata_features.frame_id = true; } + proto::FrameMetadataFeature::FmfUserData => { + frame_metadata_features.user_data = true; + } } } diff --git a/livekit-ffi/src/conversion/track.rs b/livekit-ffi/src/conversion/track.rs index 9ff441b7e..53acc046a 100644 --- a/livekit-ffi/src/conversion/track.rs +++ b/livekit-ffi/src/conversion/track.rs @@ -174,7 +174,7 @@ impl From for proto::FrameMetadataFeatur proto::FrameMetadataFeature::FmfFrameId } livekit_protocol::PacketTrailerFeature::PtfUserData => { - unimplemented!("Not exposed yet") + proto::FrameMetadataFeature::FmfUserData } } } diff --git a/livekit-ffi/src/server/video_source.rs b/livekit-ffi/src/server/video_source.rs index 047443728..ee1410a72 100644 --- a/livekit-ffi/src/server/video_source.rs +++ b/livekit-ffi/src/server/video_source.rs @@ -29,11 +29,16 @@ impl FfiHandle for FfiVideoSource {} fn frame_metadata_from_proto(metadata: Option) -> Option { let metadata = metadata?; - let frame_metadata = - FrameMetadata { user_timestamp: metadata.user_timestamp, frame_id: metadata.frame_id }; + let frame_metadata = FrameMetadata { + user_timestamp: metadata.user_timestamp, + frame_id: metadata.frame_id, + user_data: metadata.user_data, + }; - (frame_metadata.user_timestamp.is_some() || frame_metadata.frame_id.is_some()) - .then_some(frame_metadata) + (frame_metadata.user_timestamp.is_some() + || frame_metadata.frame_id.is_some() + || frame_metadata.user_data.is_some()) + .then_some(frame_metadata) } impl FfiVideoSource { @@ -106,10 +111,12 @@ mod tests { let metadata = frame_metadata_from_proto(Some(proto::FrameMetadata { user_timestamp: Some(123), frame_id: Some(456), + user_data: Some(vec![7, 8, 9]), })) .unwrap(); assert_eq!(metadata.user_timestamp, Some(123)); assert_eq!(metadata.frame_id, Some(456)); + assert_eq!(metadata.user_data, Some(vec![7, 8, 9])); } } diff --git a/livekit-ffi/src/server/video_stream.rs b/livekit-ffi/src/server/video_stream.rs index 5b8453568..2cc69ae7f 100644 --- a/livekit-ffi/src/server/video_stream.rs +++ b/livekit-ffi/src/server/video_stream.rs @@ -41,6 +41,7 @@ fn frame_metadata_to_proto(metadata: Option) -> Option bool { features.iter().any(|f| { *f == PacketTrailerFeature::PtfUserTimestamp as i32 || *f == PacketTrailerFeature::PtfFrameId as i32 + || *f == PacketTrailerFeature::PtfUserData as i32 }) } @@ -375,4 +376,11 @@ mod tests { assert!(needs_video_receiver_packet_trailer_handler(&features)); } + + #[test] + fn receiver_packet_trailer_handler_is_needed_for_user_data() { + let features = [PacketTrailerFeature::PtfUserData as i32]; + + assert!(needs_video_receiver_packet_trailer_handler(&features)); + } } diff --git a/livekit/src/room/options.rs b/livekit/src/room/options.rs index 9cfb4ecda..c36ae1725 100644 --- a/livekit/src/room/options.rs +++ b/livekit/src/room/options.rs @@ -77,11 +77,12 @@ pub struct AudioPreset { pub struct FrameMetadataFeatures { pub user_timestamp: bool, pub frame_id: bool, + pub user_data: bool, } impl FrameMetadataFeatures { pub(crate) fn is_empty(&self) -> bool { - !self.user_timestamp && !self.frame_id + !self.user_timestamp && !self.frame_id && !self.user_data } pub(crate) fn to_proto(&self) -> Vec { @@ -95,6 +96,10 @@ impl FrameMetadataFeatures { features.push(proto::PacketTrailerFeature::PtfFrameId); } + if self.user_data { + features.push(proto::PacketTrailerFeature::PtfUserData); + } + features } } diff --git a/livekit/src/room/participant/local_participant.rs b/livekit/src/room/participant/local_participant.rs index 956bfc96c..12c4ddad9 100644 --- a/livekit/src/room/participant/local_participant.rs +++ b/livekit/src/room/participant/local_participant.rs @@ -1011,6 +1011,7 @@ mod tests { frame_metadata_features: FrameMetadataFeatures { user_timestamp: true, frame_id: false, + user_data: false, }, ..Default::default() }; diff --git a/livekit/tests/packet_trailer_test.rs b/livekit/tests/packet_trailer_test.rs index cb1b05c8c..eab9f7d77 100644 --- a/livekit/tests/packet_trailer_test.rs +++ b/livekit/tests/packet_trailer_test.rs @@ -14,9 +14,10 @@ //! Packet Trailer E2E Tests //! -//! These tests verify that user_timestamp and frame_id metadata survives the -//! full publish → SFU → subscribe WebRTC pipeline via the packet trailer -//! mechanism, both with and without E2EE. +//! These tests verify that user_timestamp, frame_id, and user_data metadata +//! survives the full publish → SFU → subscribe WebRTC pipeline via the packet +//! trailer mechanism, both with and without E2EE. They also verify that +//! oversize user_data is dropped (not truncated) on the send side. //! //! Run all tests (use --test-threads=1 to avoid local server flakiness): //! livekit-server --dev --node-ip 127.0.0.1 @@ -54,6 +55,13 @@ const TEST_HEIGHT: u32 = 480; struct PacketTrailerTestParams { attach_timestamp: bool, attach_frame_id: bool, + /// Bytes the publisher attaches to each frame. `None` means the + /// `user_data` feature is disabled and no bytes are sent. + user_data: Option>, + /// What the subscriber is expected to observe. `None` means the + /// receiver should see no `user_data` (e.g. the payload was dropped + /// because it exceeded the trailer budget). + expect_user_data: Option>, e2ee: bool, codec: VideoCodec, } @@ -65,6 +73,8 @@ async fn test_timestamp_only_vp8() -> Result<()> { run_packet_trailer_test(PacketTrailerTestParams { attach_timestamp: true, attach_frame_id: false, + user_data: None, + expect_user_data: None, e2ee: false, codec: VideoCodec::VP8, }) @@ -76,6 +86,8 @@ async fn test_frame_id_only_vp8() -> Result<()> { run_packet_trailer_test(PacketTrailerTestParams { attach_timestamp: false, attach_frame_id: true, + user_data: None, + expect_user_data: None, e2ee: false, codec: VideoCodec::VP8, }) @@ -87,6 +99,8 @@ async fn test_timestamp_and_frame_id_vp8() -> Result<()> { run_packet_trailer_test(PacketTrailerTestParams { attach_timestamp: true, attach_frame_id: true, + user_data: None, + expect_user_data: None, e2ee: false, codec: VideoCodec::VP8, }) @@ -98,12 +112,72 @@ async fn test_timestamp_and_frame_id_vp8_e2ee() -> Result<()> { run_packet_trailer_test(PacketTrailerTestParams { attach_timestamp: true, attach_frame_id: true, + user_data: None, + expect_user_data: None, e2ee: true, codec: VideoCodec::VP8, }) .await } +#[test_log::test(tokio::test)] +async fn test_user_data_only_vp8() -> Result<()> { + let payload = b"livekit-user-data".to_vec(); + run_packet_trailer_test(PacketTrailerTestParams { + attach_timestamp: false, + attach_frame_id: false, + user_data: Some(payload.clone()), + expect_user_data: Some(payload), + e2ee: false, + codec: VideoCodec::VP8, + }) + .await +} + +#[test_log::test(tokio::test)] +async fn test_user_data_with_timestamp_and_frame_id_vp8() -> Result<()> { + let payload = b"all-three-features".to_vec(); + run_packet_trailer_test(PacketTrailerTestParams { + attach_timestamp: true, + attach_frame_id: true, + user_data: Some(payload.clone()), + expect_user_data: Some(payload), + e2ee: false, + codec: VideoCodec::VP8, + }) + .await +} + +#[test_log::test(tokio::test)] +async fn test_user_data_vp8_e2ee() -> Result<()> { + let payload = b"encrypted-user-data".to_vec(); + run_packet_trailer_test(PacketTrailerTestParams { + attach_timestamp: true, + attach_frame_id: false, + user_data: Some(payload.clone()), + expect_user_data: Some(payload), + e2ee: true, + codec: VideoCodec::VP8, + }) + .await +} + +/// user_data that exceeds the remaining trailer budget must be dropped on the +/// send side (skip + warn), not truncated. The timestamp TLV is always present +/// so frames still carry metadata; user_data should simply be absent. +#[test_log::test(tokio::test)] +async fn test_user_data_oversize_dropped_vp8() -> Result<()> { + run_packet_trailer_test(PacketTrailerTestParams { + attach_timestamp: false, + attach_frame_id: false, + user_data: Some(vec![0xAB; 250]), + expect_user_data: None, + e2ee: false, + codec: VideoCodec::VP8, + }) + .await +} + // ==================== Implementation ==================== /// Publishes solid-color video frames with packet trailer metadata (user_timestamp @@ -142,6 +216,7 @@ async fn run_packet_trailer_test(params: PacketTrailerTestParams) -> Result<()> let mut frame_metadata_features = FrameMetadataFeatures::default(); frame_metadata_features.user_timestamp = params.attach_timestamp; frame_metadata_features.frame_id = params.attach_frame_id; + frame_metadata_features.user_data = params.user_data.is_some(); let rtc_source = NativeVideoSource::new(VideoResolution { width: TEST_WIDTH, height: TEST_HEIGHT }, false); @@ -168,8 +243,9 @@ async fn run_packet_trailer_test(params: PacketTrailerTestParams) -> Result<()> let rtc_source = rtc_source.clone(); let attach_ts = params.attach_timestamp; let attach_fid = params.attach_frame_id; + let user_data = params.user_data.clone(); async move { - publish_frames(stop_rx, rtc_source, attach_ts, attach_fid).await; + publish_frames(stop_rx, rtc_source, attach_ts, attach_fid, user_data).await; } }); @@ -191,6 +267,7 @@ async fn run_packet_trailer_test(params: PacketTrailerTestParams) -> Result<()> let mut stream = NativeVideoStream::new(remote_track.rtc_track()); let attach_ts = params.attach_timestamp; let attach_fid = params.attach_frame_id; + let expect_user_data = params.expect_user_data.clone(); let validate = async { let mut validated = 0; @@ -223,6 +300,21 @@ async fn run_packet_trailer_test(params: PacketTrailerTestParams) -> Result<()> seen_frame_ids.push(fid); } + match &expect_user_data { + Some(expected) => { + let got = + meta.user_data.as_ref().expect("Expected user_data in frame metadata"); + assert_eq!(got, expected, "user_data should round-trip unchanged"); + } + None => { + assert!( + meta.user_data.is_none(), + "Expected no user_data, got {:?}", + meta.user_data + ); + } + } + validated += 1; if validated >= FRAMES_TO_VALIDATE { break; @@ -276,6 +368,7 @@ async fn publish_frames( rtc_source: NativeVideoSource, attach_timestamp: bool, attach_frame_id: bool, + user_data: Option>, ) { use std::time::{SystemTime, UNIX_EPOCH}; @@ -307,8 +400,12 @@ async fn publish_frames( None }; - let frame_metadata = if user_ts.is_some() || fid.is_some() { - Some(FrameMetadata { user_timestamp: user_ts, frame_id: fid }) + let frame_metadata = if user_ts.is_some() || fid.is_some() || user_data.is_some() { + Some(FrameMetadata { + user_timestamp: user_ts, + frame_id: fid, + user_data: user_data.clone(), + }) } else { None }; diff --git a/webrtc-sys/include/livekit/packet_trailer.h b/webrtc-sys/include/livekit/packet_trailer.h index 5b30701eb..0515aee8d 100644 --- a/webrtc-sys/include/livekit/packet_trailer.h +++ b/webrtc-sys/include/livekit/packet_trailer.h @@ -62,11 +62,22 @@ constexpr size_t kTrailerEnvelopeSize = 5; // TLV tag IDs constexpr uint8_t kTagTimestampUs = 0x01; // value: 8 bytes big-endian uint64 constexpr uint8_t kTagFrameId = 0x02; // value: 4 bytes big-endian uint32 +constexpr uint8_t kTagUserData = 0x03; // value: arbitrary bytes, bounded + // by the remaining trailer budget + // (255 - fixed TLVs - envelope - 2) constexpr size_t kTimestampTlvSize = 10; // tag + len + 8-byte value constexpr size_t kFrameIdTlvSize = 6; // tag + len + 4-byte value +constexpr size_t kUserDataTlvHeaderSize = 2; // tag + len, before value bytes -// Trailer size varies because frame_id is omitted when it is unset (0). +// The trailer length is encoded in a single XORed byte, so the entire +// trailer (TLV region + envelope) can never exceed 255 bytes. user_data +// that would push the trailer past this budget is dropped (see +// AppendTrailer), never truncated. +constexpr size_t kPacketTrailerMaxTotal = 255; + +// Fixed-feature trailer size varies because frame_id is omitted when it is +// unset (0). user_data is variable-length and accounted for separately. constexpr size_t kPacketTrailerMinSize = kTimestampTlvSize + kTrailerEnvelopeSize; constexpr size_t kPacketTrailerMaxSize = @@ -76,6 +87,7 @@ struct PacketTrailerMetadata { uint64_t user_timestamp; uint32_t frame_id; uint32_t ssrc; // SSRC that produced this entry (for simulcast tracking) + std::vector user_data; // arbitrary app-supplied bytes (PTF_USER_DATA) }; /// Frame transformer that appends/extracts packet trailers. @@ -121,7 +133,8 @@ class PacketTrailerTransformer : public webrtc::FrameTransformerInterface { /// in the encoder pipeline. void store_frame_metadata(int64_t capture_timestamp_us, uint64_t user_timestamp, - uint32_t frame_id); + uint32_t frame_id, + rust::Slice user_data); /// Set the observer receiving sender-side publish timing events. void set_publish_timing_observer( @@ -168,7 +181,8 @@ class PacketTrailerTransformer : public webrtc::FrameTransformerInterface { std::vector AppendTrailer( webrtc::ArrayView data, uint64_t user_timestamp, - uint32_t frame_id); + uint32_t frame_id, + const std::vector& user_data); /// Extract and remove frame metadata trailer from frame data std::optional ExtractTrailer( @@ -237,10 +251,16 @@ class PacketTrailerHandler { /// lookup_timestamp() call. Returns 0 if no lookup succeeded. uint32_t last_lookup_frame_id() const; + /// Returns the user_data from the most recent successful + /// lookup_timestamp() call. Returns an empty vector if no lookup + /// succeeded or the frame carried no user_data. + rust::Vec last_lookup_user_data() const; + /// Store frame metadata for a given capture timestamp (sender side). void store_frame_metadata(int64_t capture_timestamp_us, uint64_t user_timestamp, - uint32_t frame_id) const; + uint32_t frame_id, + rust::Slice user_data) const; /// Set the observer receiving sender-side publish timing events. void set_publish_timing_observer( @@ -275,6 +295,7 @@ class PacketTrailerHandler { webrtc::scoped_refptr sender_; webrtc::scoped_refptr receiver_; mutable uint32_t last_frame_id_{0}; + mutable std::vector last_user_data_; }; // Factory functions for Rust FFI diff --git a/webrtc-sys/src/packet_trailer.cpp b/webrtc-sys/src/packet_trailer.cpp index 96ff6418f..e179ae308 100644 --- a/webrtc-sys/src/packet_trailer.cpp +++ b/webrtc-sys/src/packet_trailer.cpp @@ -97,7 +97,7 @@ void PacketTrailerTransformer::TransformSend( std::vector new_data; if (enabled_.load()) { new_data = AppendTrailer(data, meta_to_embed.user_timestamp, - meta_to_embed.frame_id); + meta_to_embed.frame_id, meta_to_embed.user_data); frame->SetData(webrtc::ArrayView(new_data)); } @@ -238,11 +238,37 @@ void PacketTrailerTransformer::TransformReceive( std::vector PacketTrailerTransformer::AppendTrailer( webrtc::ArrayView data, uint64_t user_timestamp, - uint32_t frame_id) { + uint32_t frame_id, + const std::vector& user_data) { const bool has_frame_id = frame_id != 0; - const size_t trailer_len = kTimestampTlvSize + - (has_frame_id ? kFrameIdTlvSize : 0) + - kTrailerEnvelopeSize; + const size_t fixed_len = kTimestampTlvSize + + (has_frame_id ? kFrameIdTlvSize : 0) + + kTrailerEnvelopeSize; + + // user_data is embedded only if it fits the remaining trailer budget. + // The whole trailer length is a single byte (255 max), so after the + // always-present timestamp TLV, the optional frame_id TLV, the envelope + // and this TLV's own 2-byte header, the value can never approach 255 -- + // the real cap is (255 - fixed_len - 2), at most ~238 bytes. Oversize + // user_data is dropped + logged rather than truncated, so the frame is + // never silently corrupted. (This bound is always < 256, so the 1-byte + // TLV length field below can't overflow.) + const size_t user_data_budget = + kPacketTrailerMaxTotal > fixed_len + kUserDataTlvHeaderSize + ? kPacketTrailerMaxTotal - fixed_len - kUserDataTlvHeaderSize + : 0; + const bool embed_user_data = + !user_data.empty() && user_data.size() <= user_data_budget; + if (!user_data.empty() && !embed_user_data) { + RTC_LOG(LS_WARNING) + << "PacketTrailerTransformer::AppendTrailer dropping user_data: " + << user_data.size() << " bytes exceeds remaining trailer budget (" + << user_data_budget << " bytes)"; + } + + const size_t trailer_len = + fixed_len + + (embed_user_data ? kUserDataTlvHeaderSize + user_data.size() : 0); std::vector result; result.reserve(data.size() + trailer_len); @@ -270,6 +296,15 @@ std::vector PacketTrailerTransformer::AppendTrailer( } } + if (embed_user_data) { + // TLV: user_data (tag=0x03, len=N, N arbitrary bytes) + result.push_back(kTagUserData ^ 0xFF); + result.push_back(static_cast(user_data.size()) ^ 0xFF); + for (uint8_t byte : user_data) { + result.push_back(static_cast(byte ^ 0xFF)); + } + } + // Envelope: trailer_len (1B, XORed) + magic (4B, NOT XORed) result.push_back(static_cast(trailer_len ^ 0xFF)); result.insert(result.end(), std::begin(kPacketTrailerMagic), @@ -333,6 +368,12 @@ std::optional PacketTrailerTransformer::ExtractTrailer( } meta.frame_id = fid; found_any = true; + } else if (tag == kTagUserData) { + meta.user_data.resize(len); + for (uint8_t i = 0; i < len; ++i) { + meta.user_data[i] = static_cast(val[i] ^ 0xFF); + } + found_any = true; } // Unknown tags are silently skipped. @@ -401,7 +442,8 @@ std::optional PacketTrailerTransformer::lookup_frame_meta void PacketTrailerTransformer::store_frame_metadata( int64_t capture_timestamp_us, uint64_t user_timestamp, - uint32_t frame_id) { + uint32_t frame_id, + rust::Slice user_data) { // Truncate to millisecond precision to match what WebRTC stores // internally. The encoder pipeline converts the VideoFrame's // timestamp_us to capture_time_ms_ = timestamp_us / 1000, and @@ -426,7 +468,9 @@ void PacketTrailerTransformer::store_frame_metadata( if (send_map_.find(key) == send_map_.end()) { send_map_order_.push_back(key); } - send_map_[key] = PacketTrailerMetadata{user_timestamp, frame_id, 0}; + send_map_[key] = PacketTrailerMetadata{ + user_timestamp, frame_id, 0, + std::vector(user_data.begin(), user_data.end())}; } void PacketTrailerTransformer::set_publish_timing_observer( @@ -554,6 +598,7 @@ uint64_t PacketTrailerHandler::lookup_timestamp(uint32_t rtp_timestamp) const { auto meta = transformer_->lookup_frame_metadata(rtp_timestamp); if (meta.has_value()) { last_frame_id_ = meta->frame_id; + last_user_data_ = meta->user_data; return meta->user_timestamp; } return UINT64_MAX; @@ -563,11 +608,22 @@ uint32_t PacketTrailerHandler::last_lookup_frame_id() const { return last_frame_id_; } +rust::Vec PacketTrailerHandler::last_lookup_user_data() const { + rust::Vec out; + out.reserve(last_user_data_.size()); + for (uint8_t byte : last_user_data_) { + out.push_back(byte); + } + return out; +} + void PacketTrailerHandler::store_frame_metadata( int64_t capture_timestamp_us, uint64_t user_timestamp, - uint32_t frame_id) const { - transformer_->store_frame_metadata(capture_timestamp_us, user_timestamp, frame_id); + uint32_t frame_id, + rust::Slice user_data) const { + transformer_->store_frame_metadata(capture_timestamp_us, user_timestamp, + frame_id, user_data); } void PacketTrailerHandler::set_publish_timing_observer( diff --git a/webrtc-sys/src/packet_trailer.rs b/webrtc-sys/src/packet_trailer.rs index 3a3394415..ceabb5fa8 100644 --- a/webrtc-sys/src/packet_trailer.rs +++ b/webrtc-sys/src/packet_trailer.rs @@ -82,12 +82,17 @@ pub mod ffi { /// lookup_timestamp() call. fn last_lookup_frame_id(self: &PacketTrailerHandler) -> u32; + /// Returns the user_data from the most recent successful + /// lookup_timestamp() call. Empty if none. + fn last_lookup_user_data(self: &PacketTrailerHandler) -> Vec; + /// Store frame metadata for a given capture timestamp (sender side). fn store_frame_metadata( self: &PacketTrailerHandler, capture_timestamp_us: i64, user_timestamp: u64, frame_id: u32, + user_data: &[u8], ); /// Set a callback for sender-side publish timing events. diff --git a/webrtc-sys/src/video_track.cpp b/webrtc-sys/src/video_track.cpp index 4a25ab1ef..44d1351fa 100644 --- a/webrtc-sys/src/video_track.cpp +++ b/webrtc-sys/src/video_track.cpp @@ -151,7 +151,9 @@ bool VideoTrackSource::InternalSource::on_captured_frame( if (frame_metadata.has_packet_trailer && packet_trailer_handler_) { packet_trailer_handler_->store_frame_metadata( aligned_timestamp_us, frame_metadata.user_timestamp, - frame_metadata.frame_id); + frame_metadata.frame_id, + rust::Slice(frame_metadata.user_data.data(), + frame_metadata.user_data.size())); } webrtc::scoped_refptr buffer = diff --git a/webrtc-sys/src/video_track.rs b/webrtc-sys/src/video_track.rs index 9453d5766..6c0a584e9 100644 --- a/webrtc-sys/src/video_track.rs +++ b/webrtc-sys/src/video_track.rs @@ -47,6 +47,7 @@ pub mod ffi { pub has_packet_trailer: bool, pub user_timestamp: u64, pub frame_id: u32, + pub user_data: Vec, } extern "C++" {