Skip to content

Commit 8a9bcc6

Browse files
committed
feat(audio): wire phase into voice — VoiceFrame (21B) ties all loose ends
VoiceArchetype::modulate_with_phase(): Phase coherence → sharpen articulation channels (8-11) Phase gradient → boost prosody channels (12-15) Modulation is proportional (relative pressure within), not overwriting (no brute force). VoiceFrame (21 bytes): RvqFrame (17B) + PhaseDescriptor (4B) = complete synthesis unit. is_voiced() / is_attack() delegated to phase. Serialize/deserialize roundtrip. This closes the loop: Analysis: PCM → AudioFrame(48B) + Phase(4B) = 52B Synthesis: VoiceFrame(21B) = RVQ + Phase Bridge: Qualia17D ↔ Mode ↔ band weights ↔ AudioFrame 3 new tests (48 audio tests total, all passing). https://claude.ai/code/session_01NYGrxVopyszZYgLBxe4hgj
1 parent 809cd1c commit 8a9bcc6

1 file changed

Lines changed: 110 additions & 0 deletions

File tree

src/hpc/audio/voice.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,38 @@ impl VoiceArchetype {
134134
pub fn prosody_energy(&self) -> u32 {
135135
(12..16).map(|i| self.channels[i].unsigned_abs() as u32).sum()
136136
}
137+
138+
/// Modulate archetype with phase dynamics.
139+
///
140+
/// Phase coherence sharpens articulation channels (8-11).
141+
/// Phase gradient boosts prosody channels (12-15).
142+
/// This is the bridge: amplitude identity (archetype) + temporal
143+
/// dynamics (phase) = complete voice characterization.
144+
///
145+
/// The phase descriptor IS relative pressure within — it modulates
146+
/// the archetype's channels proportionally, not by overwriting.
147+
pub fn modulate_with_phase(&self, phase: &super::phase::PhaseDescriptor) -> Self {
148+
let mut out = *self;
149+
150+
// Phase coherence → sharpen articulation (high coherence = crisp)
151+
let coherence = phase.bytes[0] as i16; // 0-255
152+
for i in 8..12 {
153+
// Scale articulation channels toward their sign direction
154+
let sign = if out.channels[i] >= 0 { 1i16 } else { -1 };
155+
let boost = sign * (coherence - 128) / 8; // ±16 max
156+
out.channels[i] = (out.channels[i] as i16 + boost).clamp(-127, 127) as i8;
157+
}
158+
159+
// Phase gradient → boost prosody dynamics (high gradient = dynamic)
160+
let gradient = phase.bytes[1] as i16;
161+
for i in 12..16 {
162+
let sign = if out.channels[i] >= 0 { 1i16 } else { -1 };
163+
let boost = sign * (gradient - 128) / 8;
164+
out.channels[i] = (out.channels[i] as i16 + boost).clamp(-127, 127) as i8;
165+
}
166+
167+
out
168+
}
137169
}
138170

139171
/// VoiceCodebook: collection of voice archetypes for HHTL routing.
@@ -249,6 +281,53 @@ impl RvqFrame {
249281
}
250282
}
251283

284+
/// Complete voice frame: RVQ codes + phase dynamics.
285+
///
286+
/// The full 21-byte nonverbal unit:
287+
/// RvqFrame (17B): WHAT the voice is doing (identity + spectral + detail)
288+
/// PhaseDescriptor (4B): HOW the harmonics relate in time
289+
///
290+
/// This is the minimum viable unit for lossless nonverbal transmission.
291+
/// AudioFrame (48B) + PhaseDescriptor (4B) = 52B is the analysis frame.
292+
/// VoiceFrame (21B) is the compressed synthesis frame.
293+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
294+
pub struct VoiceFrame {
295+
pub rvq: RvqFrame,
296+
pub phase: super::phase::PhaseDescriptor,
297+
}
298+
299+
impl VoiceFrame {
300+
pub const BYTE_SIZE: usize = RvqFrame::BYTE_SIZE + 4; // 21 bytes
301+
302+
pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] {
303+
let mut bytes = [0u8; Self::BYTE_SIZE];
304+
bytes[..17].copy_from_slice(&self.rvq.to_bytes());
305+
bytes[17..21].copy_from_slice(&self.phase.bytes);
306+
bytes
307+
}
308+
309+
pub fn from_bytes(bytes: &[u8; Self::BYTE_SIZE]) -> Self {
310+
let mut rvq_bytes = [0u8; 17];
311+
rvq_bytes.copy_from_slice(&bytes[..17]);
312+
let mut phase_bytes = [0u8; 4];
313+
phase_bytes.copy_from_slice(&bytes[17..21]);
314+
VoiceFrame {
315+
rvq: RvqFrame::from_bytes(&rvq_bytes),
316+
phase: super::phase::PhaseDescriptor { bytes: phase_bytes },
317+
}
318+
}
319+
320+
/// Is this a voiced frame? (delegates to phase)
321+
pub fn is_voiced(&self) -> bool {
322+
self.phase.is_voiced()
323+
}
324+
325+
/// Is this an attack/plosive? (delegates to phase)
326+
pub fn is_attack(&self) -> bool {
327+
self.phase.is_attack()
328+
}
329+
}
330+
252331
#[cfg(test)]
253332
mod tests {
254333
use super::*;
@@ -312,6 +391,37 @@ mod tests {
312391
assert_eq!(frame, recovered);
313392
}
314393

394+
#[test]
395+
fn phase_modulation_changes_articulation() {
396+
let base = VoiceArchetype { channels: [0, 0, 0, 0, 0, 0, 0, 0,
397+
50, 50, 50, 50, 0, 0, 0, 0] };
398+
// High coherence → should boost articulation channels
399+
let high_coh = super::super::phase::PhaseDescriptor { bytes: [255, 128, 128, 128] };
400+
let modulated = base.modulate_with_phase(&high_coh);
401+
402+
// Articulation channels (8-11) should be boosted
403+
let base_art: i32 = (8..12).map(|i| base.channels[i].unsigned_abs() as i32).sum();
404+
let mod_art: i32 = (8..12).map(|i| modulated.channels[i].unsigned_abs() as i32).sum();
405+
assert!(mod_art >= base_art, "High coherence should boost articulation: {} vs {}", mod_art, base_art);
406+
}
407+
408+
#[test]
409+
fn voice_frame_roundtrip() {
410+
let frame = VoiceFrame {
411+
rvq: RvqFrame { archetype: 7, coarse: [1; 8], fine: [2; 8] },
412+
phase: super::super::phase::PhaseDescriptor { bytes: [200, 50, 100, 30] },
413+
};
414+
let bytes = frame.to_bytes();
415+
assert_eq!(bytes.len(), VoiceFrame::BYTE_SIZE);
416+
let recovered = VoiceFrame::from_bytes(&bytes);
417+
assert_eq!(frame, recovered);
418+
}
419+
420+
#[test]
421+
fn voice_frame_size() {
422+
assert_eq!(VoiceFrame::BYTE_SIZE, 21, "VoiceFrame should be 21 bytes (17 RVQ + 4 phase)");
423+
}
424+
315425
#[test]
316426
fn distance_table_symmetric() {
317427
let entries = vec![

0 commit comments

Comments
 (0)