1+ //! Audio encoding pipeline: PCM samples -> resample -> Opus -> MoQ.
2+ //!
3+ //! The Game Boy APU outputs unsigned 8-bit stereo PCM at ~44.1kHz.
4+ //! This module resamples to 48kHz and encodes to Opus (20ms frames)
5+ //! using ffmpeg-next, then publishes via `moq_mux::import::Opus`.
6+ //!
7+ //! Audio timestamps are anchored to the same wall clock as video,
8+ //! ensuring A/V sync. The epoch is set on the first `push_samples()`
9+ //! call and reset on pause/resume.
10+
111use anyhow:: { Context , Result } ;
212
313/// Audio encoding pipeline: PCM samples -> Opus -> MoQ.
@@ -9,27 +19,40 @@ pub struct AudioEncoder {
919
1020 resampler : Option < ffmpeg_next:: software:: resampling:: Context > ,
1121
12- // Input samples at input_sample_rate, waiting to be resampled.
22+ /// Input samples at input_sample_rate, waiting to be resampled.
1323 input_buffer : Vec < i16 > ,
14- // Resampled samples at OPUS_SAMPLE_RATE, waiting to be encoded.
24+ /// Resampled samples at OPUS_SAMPLE_RATE, waiting to be encoded.
1525 encode_buffer : Vec < i16 > ,
1626
1727 frame_size : usize ,
1828 frame_count : u64 ,
1929 input_sample_rate : u32 ,
2030
21- // Set once on the first push_samples call.
22- // Audio timestamps are: epoch + frame_count * frame_duration.
23- // This ensures exactly contiguous frames with no gaps.
31+ /// Set once on the first `push_samples()` call.
32+ ///
33+ /// Audio timestamps are computed as: `epoch + frame_count * frame_duration`.
34+ /// This produces exactly contiguous frames with no gaps, regardless of when
35+ /// `push_samples()` is called. The epoch accounts for samples already
36+ /// buffered (not yet encoded) at the time of initialization:
37+ ///
38+ /// ```text
39+ /// epoch = wall_clock_elapsed - (buffered_samples / sample_rate)
40+ /// ```
41+ ///
42+ /// This ensures the first encoded frame's PTS matches where it would have
43+ /// been if encoding had started from the very beginning.
2444 epoch : Option < u64 > ,
2545}
2646
27- /// Target Opus sample rate.
47+ /// Target Opus sample rate (standard for Opus) .
2848const OPUS_SAMPLE_RATE : u32 = 48000 ;
2949/// Opus frame duration: 20ms at 48kHz = 960 samples per channel.
3050const OPUS_FRAME_SAMPLES : usize = 960 ;
31- /// GB APU outputs stereo.
51+ /// Game Boy APU outputs stereo audio .
3252const CHANNELS : u32 = 2 ;
53+ /// Opus encoding bitrate. 64kbps is reasonable for stereo Game Boy
54+ /// audio (simple waveforms, limited frequency range).
55+ const OPUS_BITRATE : usize = 64000 ;
3356
3457impl AudioEncoder {
3558 pub fn new (
@@ -57,7 +80,7 @@ impl AudioEncoder {
5780 ) ) ;
5881 enc. set_channel_layout ( ffmpeg_next:: ChannelLayout :: STEREO ) ;
5982 enc. set_time_base ( ffmpeg_next:: Rational :: new ( 1 , OPUS_SAMPLE_RATE as i32 ) ) ;
60- enc. set_bit_rate ( 64000 ) ;
83+ enc. set_bit_rate ( OPUS_BITRATE ) ;
6184
6285 let ffmpeg_encoder = enc. open ( ) ?;
6386 let frame_size = ffmpeg_encoder. frame_size ( ) as usize ;
@@ -121,7 +144,6 @@ impl AudioEncoder {
121144 // Convert u8 (unsigned, center=128) to i16 (signed, center=0).
122145 let i16_samples: Vec < i16 > = samples. iter ( ) . map ( |& s| ( ( s as i16 ) - 128 ) * 256 ) . collect ( ) ;
123146
124- // Count stereo samples from emulator (i16 values / 2 channels).
125147 self . input_buffer . extend_from_slice ( & i16_samples) ;
126148
127149 // Resample input to OPUS_SAMPLE_RATE first, then encode in frame_size chunks.
@@ -131,6 +153,8 @@ impl AudioEncoder {
131153 let frame_duration_us = self . frame_size as u64 * 1_000_000 / OPUS_SAMPLE_RATE as u64 ;
132154
133155 // Initialize epoch on first call so audio timestamps align with video.
156+ // Subtract buffered time so the first frame's PTS accounts for samples
157+ // that were accumulated before encoding begins.
134158 if self . epoch . is_none ( ) && self . encode_buffer . len ( ) >= samples_per_frame {
135159 let buffered_us = self . encode_buffer . len ( ) as u64 * 1_000_000 / ( OPUS_SAMPLE_RATE as u64 * CHANNELS as u64 ) ;
136160 self . epoch = Some ( ( elapsed. as_micros ( ) as u64 ) . saturating_sub ( buffered_us) ) ;
@@ -165,6 +189,10 @@ impl AudioEncoder {
165189 ) ;
166190 frame. set_rate ( self . input_sample_rate ) ;
167191
192+ // Copy i16 samples into the frame's byte buffer.
193+ // SAFETY: i16 is always 2 bytes, little-endian on all supported platforms.
194+ // The source and destination buffers are properly aligned (Vec<i16> guarantees
195+ // alignment, and we're reading as bytes which has no alignment requirement).
168196 let data = frame. data_mut ( 0 ) ;
169197 let bytes: & [ u8 ] =
170198 unsafe { std:: slice:: from_raw_parts ( self . input_buffer . as_ptr ( ) as * const u8 , self . input_buffer . len ( ) * 2 ) } ;
@@ -185,7 +213,10 @@ impl AudioEncoder {
185213 ) ;
186214 resampler. run ( & frame, & mut resampled) ?;
187215
188- // Extract resampled i16 samples.
216+ // Extract resampled i16 samples from the frame's byte buffer.
217+ // SAFETY: Same as above — reinterpreting the frame's u8 data as i16.
218+ // ffmpeg guarantees the audio data is in s16 packed format (set above),
219+ // so the byte layout is valid i16 values.
189220 let out_samples = resampled. samples ( ) * CHANNELS as usize ;
190221 let out_data = resampled. data ( 0 ) ;
191222 let out_i16: & [ i16 ] = unsafe { std:: slice:: from_raw_parts ( out_data. as_ptr ( ) as * const i16 , out_samples) } ;
@@ -204,7 +235,7 @@ impl AudioEncoder {
204235 frame. set_rate ( OPUS_SAMPLE_RATE ) ;
205236 frame. set_pts ( Some ( self . frame_count as i64 * self . frame_size as i64 ) ) ;
206237
207- // Copy sample data into the frame.
238+ // SAFETY: Same as resample() — copying i16 data as bytes into the frame.
208239 let data = frame. data_mut ( 0 ) ;
209240 let bytes: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( samples. as_ptr ( ) as * const u8 , samples. len ( ) * 2 ) } ;
210241 data[ ..bytes. len ( ) ] . copy_from_slice ( bytes) ;
0 commit comments