Skip to content

Commit 3b9781d

Browse files
committed
feat(streaming): Add chord progression detection and improve key detection
- Add real-time chord detection using triad templates correlation - Track chord changes with minimum duration threshold (0.2s) - Implement Krumhansl-Schmuckler key detection with profile correlation - Export chord progression data through WASM bindings - Add StreamAnalyzer TypeScript class with full type definitions - Increase WASM stack size to 8MB for streaming workloads - Add comprehensive streaming tests for chord/key detection
1 parent e3008a1 commit 3b9781d

8 files changed

Lines changed: 710 additions & 13 deletions

File tree

js/index.ts

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,62 @@ interface WasmPitchResult {
156156
meanF0: number;
157157
}
158158

159+
// Streaming types
160+
interface WasmChordChange {
161+
root: number;
162+
quality: number;
163+
startTime: number;
164+
confidence: number;
165+
}
166+
167+
interface WasmProgressiveEstimate {
168+
bpm: number;
169+
bpmConfidence: number;
170+
bpmCandidateCount: number;
171+
key: number;
172+
keyMinor: boolean;
173+
keyConfidence: number;
174+
chordRoot: number;
175+
chordQuality: number;
176+
chordConfidence: number;
177+
chordProgression: WasmChordChange[];
178+
accumulatedSeconds: number;
179+
usedFrames: number;
180+
updated: boolean;
181+
}
182+
183+
interface WasmAnalyzerStats {
184+
totalFrames: number;
185+
totalSamples: number;
186+
durationSeconds: number;
187+
estimate: WasmProgressiveEstimate;
188+
}
189+
190+
interface WasmFrameBuffer {
191+
nFrames: number;
192+
timestamps: Float32Array;
193+
mel: Float32Array;
194+
chroma: Float32Array;
195+
onsetStrength: Float32Array;
196+
rmsEnergy: Float32Array;
197+
spectralCentroid: Float32Array;
198+
spectralFlatness: Float32Array;
199+
}
200+
201+
interface WasmStreamAnalyzer {
202+
process: (samples: Float32Array) => void;
203+
processWithOffset: (samples: Float32Array, sampleOffset: number) => void;
204+
availableFrames: () => number;
205+
readFramesSoa: (maxFrames: number) => WasmFrameBuffer;
206+
readFramesU8: (maxFrames: number) => unknown;
207+
readFramesI16: (maxFrames: number) => unknown;
208+
reset: (baseSampleOffset?: number) => void;
209+
stats: () => WasmAnalyzerStats;
210+
frameCount: () => number;
211+
currentTime: () => number;
212+
delete: () => void;
213+
}
214+
159215
// Types for Emscripten module with embind exports
160216
interface SonareModule {
161217
// Quick API
@@ -295,6 +351,18 @@ interface SonareModule {
295351

296352
// Core - Resample
297353
resample: (samples: Float32Array, srcSr: number, targetSr: number) => Float32Array;
354+
355+
// Streaming - StreamAnalyzer class
356+
StreamAnalyzer: new (
357+
sampleRate: number,
358+
nFft: number,
359+
hopLength: number,
360+
nMels: number,
361+
computeMel: boolean,
362+
computeChroma: boolean,
363+
computeOnset: boolean,
364+
emitEveryNFrames: number,
365+
) => WasmStreamAnalyzer;
298366
}
299367

300368
// ============================================================================
@@ -1283,6 +1351,227 @@ export function resample(samples: Float32Array, srcSr: number, targetSr: number)
12831351
return module.resample(samples, srcSr, targetSr);
12841352
}
12851353

1354+
// ============================================================================
1355+
// Streaming Types
1356+
// ============================================================================
1357+
1358+
/**
1359+
* A detected chord change in the progression
1360+
*/
1361+
export interface ChordChange {
1362+
root: PitchClass;
1363+
quality: ChordQuality;
1364+
startTime: number;
1365+
confidence: number;
1366+
}
1367+
1368+
/**
1369+
* Progressive estimation results for BPM, Key, and Chord
1370+
*/
1371+
export interface ProgressiveEstimate {
1372+
bpm: number;
1373+
bpmConfidence: number;
1374+
bpmCandidateCount: number;
1375+
key: PitchClass;
1376+
keyMinor: boolean;
1377+
keyConfidence: number;
1378+
chordRoot: PitchClass;
1379+
chordQuality: ChordQuality;
1380+
chordConfidence: number;
1381+
chordProgression: ChordChange[];
1382+
accumulatedSeconds: number;
1383+
usedFrames: number;
1384+
updated: boolean;
1385+
}
1386+
1387+
/**
1388+
* Statistics and current state of the analyzer
1389+
*/
1390+
export interface AnalyzerStats {
1391+
totalFrames: number;
1392+
totalSamples: number;
1393+
durationSeconds: number;
1394+
estimate: ProgressiveEstimate;
1395+
}
1396+
1397+
/**
1398+
* Frame buffer with analysis results
1399+
*/
1400+
export interface FrameBuffer {
1401+
nFrames: number;
1402+
timestamps: Float32Array;
1403+
mel: Float32Array;
1404+
chroma: Float32Array;
1405+
onsetStrength: Float32Array;
1406+
rmsEnergy: Float32Array;
1407+
spectralCentroid: Float32Array;
1408+
spectralFlatness: Float32Array;
1409+
}
1410+
1411+
/**
1412+
* Configuration for StreamAnalyzer
1413+
*/
1414+
export interface StreamConfig {
1415+
sampleRate: number;
1416+
nFft?: number;
1417+
hopLength?: number;
1418+
nMels?: number;
1419+
computeMel?: boolean;
1420+
computeChroma?: boolean;
1421+
computeOnset?: boolean;
1422+
emitEveryNFrames?: number;
1423+
}
1424+
1425+
// ============================================================================
1426+
// StreamAnalyzer Class
1427+
// ============================================================================
1428+
1429+
/**
1430+
* Real-time streaming audio analyzer.
1431+
*
1432+
* @example
1433+
* ```typescript
1434+
* import { init, StreamAnalyzer } from '@libraz/sonare';
1435+
*
1436+
* await init();
1437+
*
1438+
* const analyzer = new StreamAnalyzer({ sampleRate: 44100 });
1439+
*
1440+
* // In audio processing callback
1441+
* analyzer.process(samples);
1442+
*
1443+
* // Get current analysis state
1444+
* const stats = analyzer.stats();
1445+
* console.log('BPM:', stats.estimate.bpm);
1446+
* console.log('Key:', stats.estimate.key);
1447+
* console.log('Chord progression:', stats.estimate.chordProgression);
1448+
* ```
1449+
*/
1450+
export class StreamAnalyzer {
1451+
private analyzer: WasmStreamAnalyzer;
1452+
1453+
/**
1454+
* Create a new StreamAnalyzer.
1455+
*
1456+
* @param config - Configuration options
1457+
*/
1458+
constructor(config: StreamConfig) {
1459+
if (!module) {
1460+
throw new Error('Module not initialized. Call init() first.');
1461+
}
1462+
this.analyzer = new module.StreamAnalyzer(
1463+
config.sampleRate,
1464+
config.nFft ?? 2048,
1465+
config.hopLength ?? 512,
1466+
config.nMels ?? 128,
1467+
config.computeMel ?? true,
1468+
config.computeChroma ?? true,
1469+
config.computeOnset ?? true,
1470+
config.emitEveryNFrames ?? 1,
1471+
);
1472+
}
1473+
1474+
/**
1475+
* Process audio samples.
1476+
*
1477+
* @param samples - Audio samples (mono, float32)
1478+
*/
1479+
process(samples: Float32Array): void {
1480+
this.analyzer.process(samples);
1481+
}
1482+
1483+
/**
1484+
* Process audio samples with explicit sample offset.
1485+
*
1486+
* @param samples - Audio samples (mono, float32)
1487+
* @param sampleOffset - Cumulative sample count at start of this chunk
1488+
*/
1489+
processWithOffset(samples: Float32Array, sampleOffset: number): void {
1490+
this.analyzer.processWithOffset(samples, sampleOffset);
1491+
}
1492+
1493+
/**
1494+
* Get the number of frames available to read.
1495+
*/
1496+
availableFrames(): number {
1497+
return this.analyzer.availableFrames();
1498+
}
1499+
1500+
/**
1501+
* Read processed frames as Structure of Arrays.
1502+
*
1503+
* @param maxFrames - Maximum number of frames to read
1504+
* @returns Frame buffer with analysis results
1505+
*/
1506+
readFrames(maxFrames: number): FrameBuffer {
1507+
return this.analyzer.readFramesSoa(maxFrames);
1508+
}
1509+
1510+
/**
1511+
* Reset the analyzer state.
1512+
*
1513+
* @param baseSampleOffset - Starting sample offset (default 0)
1514+
*/
1515+
reset(baseSampleOffset = 0): void {
1516+
this.analyzer.reset(baseSampleOffset);
1517+
}
1518+
1519+
/**
1520+
* Get current statistics and progressive estimates.
1521+
*
1522+
* @returns Analyzer statistics including BPM, key, and chord progression
1523+
*/
1524+
stats(): AnalyzerStats {
1525+
const s = this.analyzer.stats();
1526+
return {
1527+
totalFrames: s.totalFrames,
1528+
totalSamples: s.totalSamples,
1529+
durationSeconds: s.durationSeconds,
1530+
estimate: {
1531+
bpm: s.estimate.bpm,
1532+
bpmConfidence: s.estimate.bpmConfidence,
1533+
bpmCandidateCount: s.estimate.bpmCandidateCount,
1534+
key: s.estimate.key as PitchClass,
1535+
keyMinor: s.estimate.keyMinor,
1536+
keyConfidence: s.estimate.keyConfidence,
1537+
chordRoot: s.estimate.chordRoot as PitchClass,
1538+
chordQuality: s.estimate.chordQuality as ChordQuality,
1539+
chordConfidence: s.estimate.chordConfidence,
1540+
chordProgression: s.estimate.chordProgression.map((c) => ({
1541+
root: c.root as PitchClass,
1542+
quality: c.quality as ChordQuality,
1543+
startTime: c.startTime,
1544+
confidence: c.confidence,
1545+
})),
1546+
accumulatedSeconds: s.estimate.accumulatedSeconds,
1547+
usedFrames: s.estimate.usedFrames,
1548+
updated: s.estimate.updated,
1549+
},
1550+
};
1551+
}
1552+
1553+
/**
1554+
* Get total frames processed.
1555+
*/
1556+
frameCount(): number {
1557+
return this.analyzer.frameCount();
1558+
}
1559+
1560+
/**
1561+
* Get current time position in seconds.
1562+
*/
1563+
currentTime(): number {
1564+
return this.analyzer.currentTime();
1565+
}
1566+
1567+
/**
1568+
* Release resources. Call when done using the analyzer.
1569+
*/
1570+
dispose(): void {
1571+
this.analyzer.delete();
1572+
}
1573+
}
1574+
12861575
// ============================================================================
12871576
// Re-exports
12881577
// ============================================================================

js/sonare.js.d.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,74 @@ interface SonareModule {
317317
Instrumental: { value: 5 };
318318
Outro: { value: 6 };
319319
};
320+
321+
// Streaming - StreamAnalyzer
322+
StreamAnalyzer: new (
323+
sampleRate: number,
324+
nFft: number,
325+
hopLength: number,
326+
nMels: number,
327+
computeMel: boolean,
328+
computeChroma: boolean,
329+
computeOnset: boolean,
330+
emitEveryNFrames: number,
331+
) => WasmStreamAnalyzer;
332+
}
333+
334+
// Streaming types for StreamAnalyzer
335+
interface WasmChordChange {
336+
root: number;
337+
quality: number;
338+
startTime: number;
339+
confidence: number;
340+
}
341+
342+
interface WasmProgressiveEstimate {
343+
bpm: number;
344+
bpmConfidence: number;
345+
bpmCandidateCount: number;
346+
key: number;
347+
keyMinor: boolean;
348+
keyConfidence: number;
349+
chordRoot: number;
350+
chordQuality: number;
351+
chordConfidence: number;
352+
chordProgression: WasmChordChange[];
353+
accumulatedSeconds: number;
354+
usedFrames: number;
355+
updated: boolean;
356+
}
357+
358+
interface WasmAnalyzerStats {
359+
totalFrames: number;
360+
totalSamples: number;
361+
durationSeconds: number;
362+
estimate: WasmProgressiveEstimate;
363+
}
364+
365+
interface WasmFrameBuffer {
366+
nFrames: number;
367+
timestamps: Float32Array;
368+
mel: Float32Array;
369+
chroma: Float32Array;
370+
onsetStrength: Float32Array;
371+
rmsEnergy: Float32Array;
372+
spectralCentroid: Float32Array;
373+
spectralFlatness: Float32Array;
374+
}
375+
376+
interface WasmStreamAnalyzer {
377+
process: (samples: Float32Array) => void;
378+
processWithOffset: (samples: Float32Array, sampleOffset: number) => void;
379+
availableFrames: () => number;
380+
readFramesSoa: (maxFrames: number) => WasmFrameBuffer;
381+
readFramesU8: (maxFrames: number) => unknown;
382+
readFramesI16: (maxFrames: number) => unknown;
383+
reset: (baseSampleOffset?: number) => void;
384+
stats: () => WasmAnalyzerStats;
385+
frameCount: () => number;
386+
currentTime: () => number;
387+
delete: () => void;
320388
}
321389

322390
declare function createModule(options?: SonareModuleOptions): Promise<SonareModule>;

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ if(BUILD_WASM)
8080
-sMODULARIZE=1
8181
-sEXPORT_ES6=1
8282
-sALLOW_MEMORY_GROWTH=1
83+
-sSTACK_SIZE=8388608
8384
--bind
8485
)
8586

0 commit comments

Comments
 (0)