Skip to content

Commit 563e343

Browse files
committed
feat(streaming): Add bar-synchronized and per-frame chord detection
- Add bar-synchronized chord progression (requires stable BPM) - Track chord per bar using accumulated chroma average - Automatically start bar tracking when BPM confidence >= 0.3 - Expose currentBar, barDuration, and barChordProgression - Add per-frame chord detection in StreamFrame - Detect chord for each STFT frame using template correlation - Include chordRoot, chordQuality, chordConfidence per frame - Update FrameBuffer to include chord arrays for bulk reads - Export bar chord data through WASM bindings - Add comprehensive tests for bar chord and per-frame detection
1 parent 3b9781d commit 563e343

7 files changed

Lines changed: 464 additions & 2 deletions

File tree

js/index.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ interface WasmChordChange {
164164
confidence: number;
165165
}
166166

167+
interface WasmBarChord {
168+
barIndex: number;
169+
root: number;
170+
quality: number;
171+
startTime: number;
172+
confidence: number;
173+
}
174+
167175
interface WasmProgressiveEstimate {
168176
bpm: number;
169177
bpmConfidence: number;
@@ -175,6 +183,9 @@ interface WasmProgressiveEstimate {
175183
chordQuality: number;
176184
chordConfidence: number;
177185
chordProgression: WasmChordChange[];
186+
barChordProgression: WasmBarChord[];
187+
currentBar: number;
188+
barDuration: number;
178189
accumulatedSeconds: number;
179190
usedFrames: number;
180191
updated: boolean;
@@ -196,6 +207,9 @@ interface WasmFrameBuffer {
196207
rmsEnergy: Float32Array;
197208
spectralCentroid: Float32Array;
198209
spectralFlatness: Float32Array;
210+
chordRoot: Int32Array;
211+
chordQuality: Int32Array;
212+
chordConfidence: Float32Array;
199213
}
200214

201215
interface WasmStreamAnalyzer {
@@ -1365,6 +1379,17 @@ export interface ChordChange {
13651379
confidence: number;
13661380
}
13671381

1382+
/**
1383+
* A chord detected at bar boundary (beat-synchronized)
1384+
*/
1385+
export interface BarChord {
1386+
barIndex: number;
1387+
root: PitchClass;
1388+
quality: ChordQuality;
1389+
startTime: number;
1390+
confidence: number;
1391+
}
1392+
13681393
/**
13691394
* Progressive estimation results for BPM, Key, and Chord
13701395
*/
@@ -1379,6 +1404,9 @@ export interface ProgressiveEstimate {
13791404
chordQuality: ChordQuality;
13801405
chordConfidence: number;
13811406
chordProgression: ChordChange[];
1407+
barChordProgression: BarChord[];
1408+
currentBar: number;
1409+
barDuration: number;
13821410
accumulatedSeconds: number;
13831411
usedFrames: number;
13841412
updated: boolean;
@@ -1406,6 +1434,9 @@ export interface FrameBuffer {
14061434
rmsEnergy: Float32Array;
14071435
spectralCentroid: Float32Array;
14081436
spectralFlatness: Float32Array;
1437+
chordRoot: Int32Array;
1438+
chordQuality: Int32Array;
1439+
chordConfidence: Float32Array;
14091440
}
14101441

14111442
/**
@@ -1543,6 +1574,15 @@ export class StreamAnalyzer {
15431574
startTime: c.startTime,
15441575
confidence: c.confidence,
15451576
})),
1577+
barChordProgression: s.estimate.barChordProgression.map((c) => ({
1578+
barIndex: c.barIndex,
1579+
root: c.root as PitchClass,
1580+
quality: c.quality as ChordQuality,
1581+
startTime: c.startTime,
1582+
confidence: c.confidence,
1583+
})),
1584+
currentBar: s.estimate.currentBar,
1585+
barDuration: s.estimate.barDuration,
15461586
accumulatedSeconds: s.estimate.accumulatedSeconds,
15471587
usedFrames: s.estimate.usedFrames,
15481588
updated: s.estimate.updated,

js/sonare.js.d.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,14 @@ interface WasmChordChange {
339339
confidence: number;
340340
}
341341

342+
interface WasmBarChord {
343+
barIndex: number;
344+
root: number;
345+
quality: number;
346+
startTime: number;
347+
confidence: number;
348+
}
349+
342350
interface WasmProgressiveEstimate {
343351
bpm: number;
344352
bpmConfidence: number;
@@ -350,6 +358,9 @@ interface WasmProgressiveEstimate {
350358
chordQuality: number;
351359
chordConfidence: number;
352360
chordProgression: WasmChordChange[];
361+
barChordProgression: WasmBarChord[];
362+
currentBar: number;
363+
barDuration: number;
353364
accumulatedSeconds: number;
354365
usedFrames: number;
355366
updated: boolean;
@@ -371,6 +382,9 @@ interface WasmFrameBuffer {
371382
rmsEnergy: Float32Array;
372383
spectralCentroid: Float32Array;
373384
spectralFlatness: Float32Array;
385+
chordRoot: Int32Array;
386+
chordQuality: Int32Array;
387+
chordConfidence: Float32Array;
374388
}
375389

376390
interface WasmStreamAnalyzer {

src/streaming/stream_analyzer.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ StreamAnalyzer::StreamAnalyzer(const StreamConfig& config) : config_(config) {
325325
if (config_.compute_chroma) {
326326
chroma_buffer_.resize(12);
327327
chroma_sum_.fill(0.0f);
328+
bar_chroma_sum_.fill(0.0f);
328329
/// Initialize chord templates for chord detection
329330
chord_templates_ = generate_triad_templates();
330331
}
@@ -423,6 +424,15 @@ StreamFrame StreamAnalyzer::process_single_frame(const float* frame_start, size_
423424
chroma_sum_[i] += chroma_buffer_[i];
424425
}
425426
++chroma_frame_count_;
427+
428+
/// Detect chord for this frame
429+
if (!chord_templates_.empty() && chroma_buffer_.size() == 12) {
430+
auto [best_chord, chord_corr] =
431+
find_best_chord(chroma_buffer_.data(), chord_templates_);
432+
frame.chord_root = static_cast<int>(best_chord.root);
433+
frame.chord_quality = static_cast<int>(best_chord.quality);
434+
frame.chord_confidence = std::max(0.0f, chord_corr);
435+
}
426436
}
427437

428438
/// Compute onset strength
@@ -678,6 +688,84 @@ void StreamAnalyzer::update_progressive_estimate(float current_time) {
678688
}
679689
}
680690
}
691+
692+
/// Update bar-synchronized chord tracking
693+
if (config_.compute_chroma) {
694+
update_bar_chord_tracking(current_time);
695+
}
696+
}
697+
698+
void StreamAnalyzer::update_bar_chord_tracking(float current_time) {
699+
/// Check if BPM is stable enough to start bar tracking
700+
if (!bar_tracking_active_) {
701+
if (current_estimate_.bpm_confidence >= kBpmConfidenceThreshold && current_estimate_.bpm > 0.0f) {
702+
/// Start bar tracking
703+
bar_tracking_active_ = true;
704+
bar_duration_ = static_cast<float>(kBeatsPerBar) * 60.0f / current_estimate_.bpm;
705+
current_bar_index_ = 0;
706+
bar_start_time_ = current_time;
707+
bar_chroma_sum_.fill(0.0f);
708+
bar_chroma_count_ = 0;
709+
710+
/// Update estimate with bar info
711+
current_estimate_.bar_duration = bar_duration_;
712+
current_estimate_.current_bar = 0;
713+
}
714+
return;
715+
}
716+
717+
/// Update bar duration if BPM changed significantly
718+
float new_bar_duration = static_cast<float>(kBeatsPerBar) * 60.0f / current_estimate_.bpm;
719+
if (std::abs(new_bar_duration - bar_duration_) > 0.1f) {
720+
bar_duration_ = new_bar_duration;
721+
current_estimate_.bar_duration = bar_duration_;
722+
}
723+
724+
/// Accumulate chroma for current bar
725+
for (int c = 0; c < 12; ++c) {
726+
bar_chroma_sum_[c] += chroma_buffer_[c];
727+
}
728+
++bar_chroma_count_;
729+
730+
/// Check if we've crossed a bar boundary
731+
if (current_time >= bar_start_time_ + bar_duration_) {
732+
/// Detect chord for completed bar using accumulated chroma
733+
if (bar_chroma_count_ > 0 && !chord_templates_.empty()) {
734+
/// Normalize accumulated chroma
735+
std::array<float, 12> bar_chroma;
736+
float sum = 0.0f;
737+
for (int c = 0; c < 12; ++c) {
738+
bar_chroma[c] = bar_chroma_sum_[c] / static_cast<float>(bar_chroma_count_);
739+
sum += bar_chroma[c];
740+
}
741+
if (sum > kEpsilon) {
742+
for (int c = 0; c < 12; ++c) {
743+
bar_chroma[c] /= sum;
744+
}
745+
}
746+
747+
/// Find best chord for this bar
748+
auto [best_chord, chord_corr] = find_best_chord(bar_chroma.data(), chord_templates_);
749+
750+
/// Add to bar chord progression
751+
BarChord bar_chord;
752+
bar_chord.bar_index = current_bar_index_;
753+
bar_chord.root = static_cast<int>(best_chord.root);
754+
bar_chord.quality = static_cast<int>(best_chord.quality);
755+
bar_chord.start_time = bar_start_time_;
756+
bar_chord.confidence = std::max(0.0f, chord_corr);
757+
current_estimate_.bar_chord_progression.push_back(bar_chord);
758+
}
759+
760+
/// Move to next bar
761+
++current_bar_index_;
762+
bar_start_time_ = current_time;
763+
bar_chroma_sum_.fill(0.0f);
764+
bar_chroma_count_ = 0;
765+
766+
/// Update estimate
767+
current_estimate_.current_bar = current_bar_index_;
768+
}
681769
}
682770

683771
size_t StreamAnalyzer::available_frames() const { return output_buffer_.size(); }
@@ -715,6 +803,9 @@ void StreamAnalyzer::read_frames_soa(size_t max_frames, FrameBuffer& buffer) {
715803
buffer.rms_energy.push_back(frame.rms_energy);
716804
buffer.spectral_centroid.push_back(frame.spectral_centroid);
717805
buffer.spectral_flatness.push_back(frame.spectral_flatness);
806+
buffer.chord_root.push_back(frame.chord_root);
807+
buffer.chord_quality.push_back(frame.chord_quality);
808+
buffer.chord_confidence.push_back(frame.chord_confidence);
718809

719810
// Append mel (row-major)
720811
buffer.mel.insert(buffer.mel.end(), frame.mel.begin(), frame.mel.end());
@@ -832,6 +923,14 @@ void StreamAnalyzer::reset(size_t base_sample_offset) {
832923
prev_chord_root_ = -1;
833924
prev_chord_quality_ = -1;
834925
chord_stable_time_ = 0.0f;
926+
927+
/// Reset bar tracking state
928+
bar_tracking_active_ = false;
929+
bar_duration_ = 0.0f;
930+
current_bar_index_ = -1;
931+
bar_start_time_ = 0.0f;
932+
bar_chroma_sum_.fill(0.0f);
933+
bar_chroma_count_ = 0;
835934
}
836935

837936
AnalyzerStats StreamAnalyzer::stats() const {

src/streaming/stream_analyzer.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,16 @@ class StreamAnalyzer {
170170
float chord_stable_time_ = 0.0f; ///< Time chord has been stable
171171
static constexpr float kChordMinDuration = 0.2f; ///< Min duration to register change
172172

173+
// Bar-synchronized chord tracking (requires stable BPM)
174+
static constexpr float kBpmConfidenceThreshold = 0.3f; ///< Min BPM confidence for bar sync
175+
static constexpr int kBeatsPerBar = 4; ///< Beats per bar (4/4 time signature)
176+
bool bar_tracking_active_ = false; ///< True when BPM is stable enough
177+
float bar_duration_ = 0.0f; ///< Duration of one bar in seconds
178+
int current_bar_index_ = -1; ///< Current bar index (0-based)
179+
float bar_start_time_ = 0.0f; ///< Start time of current bar
180+
std::array<float, 12> bar_chroma_sum_; ///< Accumulated chroma within current bar
181+
int bar_chroma_count_ = 0; ///< Number of frames accumulated in current bar
182+
173183
// Internal methods
174184
void process_internal(const float* samples, size_t n_samples);
175185
StreamFrame process_single_frame(const float* frame_start, size_t sample_offset);
@@ -179,6 +189,7 @@ class StreamAnalyzer {
179189
float compute_onset();
180190
void compute_spectral_features(StreamFrame& frame);
181191
void update_progressive_estimate(float current_time);
192+
void update_bar_chord_tracking(float current_time);
182193
};
183194

184195
} // namespace sonare

src/streaming/stream_frame.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ struct ChordChange {
1818
float confidence = 0.0f; ///< Detection confidence (0-1)
1919
};
2020

21+
/// @brief A chord detected at bar boundary (beat-synchronized).
22+
struct BarChord {
23+
int bar_index = -1; ///< Bar number (0-based, -1 = invalid)
24+
int root = -1; ///< Chord root (0-11 for C-B, -1 = unknown)
25+
int quality = 0; ///< Chord quality (0=Maj, 1=Min, 2=Dim, etc.)
26+
float start_time = 0.0f; ///< Start time in seconds
27+
float confidence = 0.0f; ///< Detection confidence (0-1)
28+
};
29+
2130
/// @brief A single frame of analysis results.
2231
/// @details Contains all computed features for one STFT frame.
2332
/// The timestamp represents stream time (input sample position),
@@ -44,6 +53,11 @@ struct StreamFrame {
4453
// Onset detection (1-frame lag)
4554
float onset_strength = 0.0f; ///< Onset strength value
4655
bool onset_valid = false; ///< False for frame_index == 0 (no previous frame)
56+
57+
// Chord detection (per-frame)
58+
int chord_root = -1; ///< Detected chord root (0-11 for C-B, -1 = unknown)
59+
int chord_quality = 0; ///< Chord quality (0=Maj, 1=Min, 2=Dim, etc.)
60+
float chord_confidence = 0.0f; ///< Chord detection confidence (0-1)
4761
};
4862

4963
/// @brief Progressive estimation results for BPM, Key, and Chord.
@@ -64,10 +78,16 @@ struct ProgressiveEstimate {
6478
int chord_root = -1; ///< Current chord root (0-11 for C-B, -1 = unknown)
6579
int chord_quality = 0; ///< Chord quality (0=Maj, 1=Min, 2=Dim, etc.)
6680
float chord_confidence = 0.0f; ///< Chord detection confidence (0-1)
81+
float chord_start_time = 0.0f; ///< Start time of current chord
6782

68-
// Chord progression (accumulated over time)
83+
// Chord progression (accumulated over time, includes current chord)
6984
std::vector<ChordChange> chord_progression; ///< Detected chord changes
7085

86+
// Bar-synchronized chord progression (updated when BPM is stable)
87+
std::vector<BarChord> bar_chord_progression; ///< Chord per bar (beat-synced)
88+
int current_bar = -1; ///< Current bar index (-1 if BPM not stable)
89+
float bar_duration = 0.0f; ///< Duration of one bar in seconds (0 if BPM not stable)
90+
7191
// Objective statistics (for UI display)
7292
float accumulated_seconds = 0.0f; ///< Total audio processed
7393
int used_frames = 0; ///< Number of frames used for estimation
@@ -94,6 +114,9 @@ struct FrameBuffer {
94114
std::vector<float> rms_energy; ///< [n_frames]
95115
std::vector<float> spectral_centroid; ///< [n_frames]
96116
std::vector<float> spectral_flatness; ///< [n_frames]
117+
std::vector<int> chord_root; ///< [n_frames] chord root per frame
118+
std::vector<int> chord_quality; ///< [n_frames] chord quality per frame
119+
std::vector<float> chord_confidence; ///< [n_frames] chord confidence per frame
97120

98121
/// @brief Clears all data.
99122
void clear() {
@@ -105,6 +128,9 @@ struct FrameBuffer {
105128
rms_energy.clear();
106129
spectral_centroid.clear();
107130
spectral_flatness.clear();
131+
chord_root.clear();
132+
chord_quality.clear();
133+
chord_confidence.clear();
108134
}
109135

110136
/// @brief Reserves capacity for n frames.
@@ -116,6 +142,9 @@ struct FrameBuffer {
116142
rms_energy.reserve(n);
117143
spectral_centroid.reserve(n);
118144
spectral_flatness.reserve(n);
145+
chord_root.reserve(n);
146+
chord_quality.reserve(n);
147+
chord_confidence.reserve(n);
119148
}
120149
};
121150

0 commit comments

Comments
 (0)