Skip to content

Commit 591ce91

Browse files
committed
feat(streaming): Improve chord detection accuracy with smoothing and better correlation
Enhance streaming chord detection to reduce flickering and improve accuracy: Chord template matching improvements: - Replace Pearson correlation with cosine similarity for more robust matching - Add root note emphasis bonus when root is prominent (>50% of max chroma) - Add slight penalty for less common diminished/augmented chord qualities - Use L2 norm instead of max normalization for chroma vectors Chord smoothing in stream analyzer: - Add sliding window chroma history (12 frames, ~0.25s) - Average chroma over history before chord matching - Add confidence threshold (0.5) to filter unreliable detections - Keep previous chord when confidence is low for stability Batch-style chord analysis integration: - Accumulate chroma frames during streaming - Run ChordAnalyzer periodically (every 2s) for chord progression - Use same smoothing settings as batch analysis for consistency Configuration changes: - Increase minimum chord duration from 0.2s to 0.3s - Add kChordSmoothingFrames (12) and kChordConfidenceThreshold (0.5) constants
1 parent 563e343 commit 591ce91

3 files changed

Lines changed: 206 additions & 63 deletions

File tree

src/analysis/chord_templates.cpp

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -71,34 +71,56 @@ std::string ChordTemplate::to_string() const {
7171
}
7272

7373
float ChordTemplate::correlate(const float* chroma) const {
74-
// Compute Pearson correlation between chroma and pattern
75-
float chroma_mean = 0.0f;
76-
float pattern_mean = 0.0f;
74+
// Compute weighted correlation between chroma and pattern
75+
// Using cosine similarity with root emphasis
76+
77+
// First compute basic dot product and norms
78+
float dot = 0.0f;
79+
float chroma_norm_sq = 0.0f;
80+
float pattern_norm_sq = 0.0f;
81+
7782
for (int i = 0; i < 12; ++i) {
78-
chroma_mean += chroma[i];
79-
pattern_mean += pattern[i];
83+
dot += chroma[i] * pattern[i];
84+
chroma_norm_sq += chroma[i] * chroma[i];
85+
pattern_norm_sq += pattern[i] * pattern[i];
86+
}
87+
88+
float denom = std::sqrt(chroma_norm_sq * pattern_norm_sq);
89+
if (denom < 1e-10f) {
90+
return 0.0f;
8091
}
81-
chroma_mean /= 12.0f;
82-
pattern_mean /= 12.0f;
8392

84-
float numerator = 0.0f;
85-
float chroma_var = 0.0f;
86-
float pattern_var = 0.0f;
93+
float cosine_sim = dot / denom;
94+
95+
// Add root note emphasis bonus
96+
// If the root note has high energy in chroma, boost the score
97+
int root_idx = static_cast<int>(root);
98+
float root_weight = chroma[root_idx];
8799

100+
// Find max chroma value for normalization
101+
float max_chroma = 0.0f;
88102
for (int i = 0; i < 12; ++i) {
89-
float cd = chroma[i] - chroma_mean;
90-
float pd = pattern[i] - pattern_mean;
91-
numerator += cd * pd;
92-
chroma_var += cd * cd;
93-
pattern_var += pd * pd;
103+
if (chroma[i] > max_chroma) {
104+
max_chroma = chroma[i];
105+
}
94106
}
95107

96-
float denominator = std::sqrt(chroma_var * pattern_var);
97-
if (denominator < 1e-10f) {
98-
return 0.0f;
108+
// Root emphasis: if root is prominent (>50% of max), add bonus
109+
float root_bonus = 0.0f;
110+
if (max_chroma > 1e-10f) {
111+
float root_ratio = root_weight / max_chroma;
112+
if (root_ratio >= 0.5f) {
113+
root_bonus = 0.1f * root_ratio; // Up to 0.1 bonus
114+
}
115+
}
116+
117+
// Penalize diminished/augmented chords slightly (they're less common)
118+
float quality_penalty = 0.0f;
119+
if (quality == ChordQuality::Diminished || quality == ChordQuality::Augmented) {
120+
quality_penalty = 0.05f;
99121
}
100122

101-
return numerator / denominator;
123+
return cosine_sim + root_bonus - quality_penalty;
102124
}
103125

104126
float ChordTemplate::correlate(const std::array<float, 12>& chroma) const {

src/streaming/stream_analyzer.cpp

Lines changed: 156 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
#include <cmath>
55
#include <numeric>
66

7+
#include "analysis/chord_analyzer.h"
78
#include "analysis/chord_templates.h"
89
#include "analysis/key_profiles.h"
910
#include "core/fft.h"
1011
#include "core/window.h"
12+
#include "feature/chroma.h"
1113
#include "filters/chroma.h"
1214
#include "filters/mel.h"
1315
#include "util/math_utils.h"
@@ -425,13 +427,51 @@ StreamFrame StreamAnalyzer::process_single_frame(const float* frame_start, size_
425427
}
426428
++chroma_frame_count_;
427429

428-
/// Detect chord for this frame
430+
/// Accumulate chroma frame for batch-style chord analysis
431+
/// Store in column-major order: [chroma_bin][frame] for Chroma class compatibility
432+
for (int c = 0; c < 12; ++c) {
433+
accumulated_chroma_.push_back(chroma_buffer_[c]);
434+
}
435+
436+
/// Detect chord for this frame using smoothed chroma
429437
if (!chord_templates_.empty() && chroma_buffer_.size() == 12) {
438+
/// Add current chroma to history
439+
std::array<float, 12> current_chroma;
440+
std::copy(chroma_buffer_.begin(), chroma_buffer_.end(), current_chroma.begin());
441+
chroma_history_.push_back(current_chroma);
442+
443+
/// Keep history limited to smoothing window
444+
while (chroma_history_.size() > static_cast<size_t>(kChordSmoothingFrames)) {
445+
chroma_history_.pop_front();
446+
}
447+
448+
/// Compute smoothed chroma (average over history)
449+
std::array<float, 12> smoothed_chroma = {};
450+
for (const auto& hist : chroma_history_) {
451+
for (int c = 0; c < 12; ++c) {
452+
smoothed_chroma[c] += hist[c];
453+
}
454+
}
455+
float inv_count = 1.0f / static_cast<float>(chroma_history_.size());
456+
for (int c = 0; c < 12; ++c) {
457+
smoothed_chroma[c] *= inv_count;
458+
}
459+
460+
/// Find best chord using smoothed chroma
430461
auto [best_chord, chord_corr] =
431-
find_best_chord(chroma_buffer_.data(), chord_templates_);
432-
frame.chord_root = static_cast<int>(best_chord.root);
433-
frame.chord_quality = static_cast<int>(best_chord.quality);
434-
frame.chord_confidence = std::max(0.0f, chord_corr);
462+
find_best_chord(smoothed_chroma.data(), chord_templates_);
463+
464+
/// Only report chord if confidence is above threshold
465+
if (chord_corr >= kChordConfidenceThreshold) {
466+
frame.chord_root = static_cast<int>(best_chord.root);
467+
frame.chord_quality = static_cast<int>(best_chord.quality);
468+
frame.chord_confidence = chord_corr;
469+
} else {
470+
/// Low confidence: keep previous chord or default to C major
471+
frame.chord_root = (prev_chord_root_ >= 0) ? prev_chord_root_ : 0;
472+
frame.chord_quality = (prev_chord_quality_ >= 0) ? prev_chord_quality_ : 0;
473+
frame.chord_confidence = std::max(0.0f, chord_corr);
474+
}
435475
}
436476
}
437477

@@ -507,11 +547,15 @@ void StreamAnalyzer::compute_chroma() {
507547
chroma_buffer_[c] = sum;
508548
}
509549

510-
/// Normalize chroma
511-
float max_val = *std::max_element(chroma_buffer_.begin(), chroma_buffer_.end());
512-
if (max_val > kEpsilon) {
550+
/// Normalize chroma using L2 norm (more robust than max)
551+
float l2_norm = 0.0f;
552+
for (int c = 0; c < 12; ++c) {
553+
l2_norm += chroma_buffer_[c] * chroma_buffer_[c];
554+
}
555+
l2_norm = std::sqrt(l2_norm);
556+
if (l2_norm > kEpsilon) {
513557
for (int c = 0; c < 12; ++c) {
514-
chroma_buffer_[c] /= max_val;
558+
chroma_buffer_[c] /= l2_norm;
515559
}
516560
}
517561
}
@@ -611,48 +655,68 @@ void StreamAnalyzer::update_progressive_estimate(float current_time) {
611655
current_estimate_.updated = true;
612656
}
613657

614-
/// Update chord estimate (every frame, using current chroma)
615-
if (!chord_templates_.empty() && chroma_buffer_.size() == 12) {
658+
/// Update chord estimate (every frame, using smoothed chroma)
659+
if (!chord_templates_.empty() && !chroma_history_.empty()) {
660+
/// Compute smoothed chroma (average over history)
661+
std::array<float, 12> smoothed_chroma = {};
662+
for (const auto& hist : chroma_history_) {
663+
for (int c = 0; c < 12; ++c) {
664+
smoothed_chroma[c] += hist[c];
665+
}
666+
}
667+
float inv_count = 1.0f / static_cast<float>(chroma_history_.size());
668+
for (int c = 0; c < 12; ++c) {
669+
smoothed_chroma[c] *= inv_count;
670+
}
671+
616672
auto [best_chord, chord_corr] =
617-
find_best_chord(chroma_buffer_.data(), chord_templates_);
673+
find_best_chord(smoothed_chroma.data(), chord_templates_);
618674
int new_root = static_cast<int>(best_chord.root);
619675
int new_quality = static_cast<int>(best_chord.quality);
620676
float new_confidence = std::max(0.0f, chord_corr);
621677

622-
current_estimate_.chord_root = new_root;
623-
current_estimate_.chord_quality = new_quality;
624-
current_estimate_.chord_confidence = new_confidence;
625-
626-
/// Track chord progression
627-
float frame_duration =
628-
static_cast<float>(config_.hop_length) / static_cast<float>(config_.sample_rate);
629-
630-
if (new_root == prev_chord_root_ && new_quality == prev_chord_quality_) {
631-
/// Same chord - accumulate stable time
632-
chord_stable_time_ += frame_duration;
678+
/// Only update if confidence is above threshold
679+
if (new_confidence >= kChordConfidenceThreshold) {
680+
current_estimate_.chord_root = new_root;
681+
current_estimate_.chord_quality = new_quality;
682+
current_estimate_.chord_confidence = new_confidence;
633683
} else {
634-
/// Chord changed - check if previous chord was stable long enough
635-
if (prev_chord_root_ >= 0 && chord_stable_time_ >= kChordMinDuration) {
636-
/// Find the start time of the previous chord
637-
float chord_start = current_time - chord_stable_time_;
638-
639-
/// Only add if it's a new chord or first chord
640-
if (current_estimate_.chord_progression.empty() ||
641-
current_estimate_.chord_progression.back().root != prev_chord_root_ ||
642-
current_estimate_.chord_progression.back().quality != prev_chord_quality_) {
643-
ChordChange change;
644-
change.root = prev_chord_root_;
645-
change.quality = prev_chord_quality_;
646-
change.start_time = chord_start;
647-
change.confidence = new_confidence;
648-
current_estimate_.chord_progression.push_back(change);
684+
/// Keep current estimate but update confidence
685+
current_estimate_.chord_confidence = new_confidence;
686+
}
687+
688+
/// Track chord progression (only when confidence is high enough)
689+
if (new_confidence >= kChordConfidenceThreshold) {
690+
float frame_duration =
691+
static_cast<float>(config_.hop_length) / static_cast<float>(config_.sample_rate);
692+
693+
if (new_root == prev_chord_root_ && new_quality == prev_chord_quality_) {
694+
/// Same chord - accumulate stable time
695+
chord_stable_time_ += frame_duration;
696+
} else {
697+
/// Chord changed - check if previous chord was stable long enough
698+
if (prev_chord_root_ >= 0 && chord_stable_time_ >= kChordMinDuration) {
699+
/// Find the start time of the previous chord
700+
float chord_start = current_time - chord_stable_time_;
701+
702+
/// Only add if it's a new chord or first chord
703+
if (current_estimate_.chord_progression.empty() ||
704+
current_estimate_.chord_progression.back().root != prev_chord_root_ ||
705+
current_estimate_.chord_progression.back().quality != prev_chord_quality_) {
706+
ChordChange change;
707+
change.root = prev_chord_root_;
708+
change.quality = prev_chord_quality_;
709+
change.start_time = chord_start;
710+
change.confidence = new_confidence;
711+
current_estimate_.chord_progression.push_back(change);
712+
}
649713
}
650-
}
651714

652-
/// Reset for new chord
653-
prev_chord_root_ = new_root;
654-
prev_chord_quality_ = new_quality;
655-
chord_stable_time_ = frame_duration;
715+
/// Reset for new chord
716+
prev_chord_root_ = new_root;
717+
prev_chord_quality_ = new_quality;
718+
chord_stable_time_ = frame_duration;
719+
}
656720
}
657721
}
658722
}
@@ -689,6 +753,52 @@ void StreamAnalyzer::update_progressive_estimate(float current_time) {
689753
}
690754
}
691755

756+
/// Update chord progression using batch-style analysis (same as ChordAnalyzer)
757+
if (config_.compute_chroma && chroma_frame_count_ > 0) {
758+
float time_since_chord_analysis = current_time - last_chord_analysis_time_;
759+
constexpr float kChordAnalysisInterval = 2.0f; // Update every 2 seconds
760+
constexpr int kMinFramesForAnalysis = 50; // ~1 second of audio
761+
762+
if (time_since_chord_analysis >= kChordAnalysisInterval &&
763+
chroma_frame_count_ >= kMinFramesForAnalysis) {
764+
/// Transpose accumulated chroma from [frame][chroma] to [chroma][frame]
765+
int n_frames = chroma_frame_count_;
766+
std::vector<float> transposed_chroma(12 * n_frames);
767+
for (int f = 0; f < n_frames; ++f) {
768+
for (int c = 0; c < 12; ++c) {
769+
transposed_chroma[c * n_frames + f] = accumulated_chroma_[f * 12 + c];
770+
}
771+
}
772+
773+
/// Create Chroma object from accumulated data
774+
Chroma chroma_obj(std::move(transposed_chroma), 12, n_frames, config_.sample_rate,
775+
config_.hop_length);
776+
777+
/// Run ChordAnalyzer with same settings as batch analysis
778+
ChordConfig chord_config;
779+
chord_config.smoothing_window = 2.0f; // Same as batch
780+
chord_config.min_duration = 0.3f;
781+
chord_config.use_triads_only = true;
782+
chord_config.use_beat_sync = false; // No beat sync in streaming
783+
784+
ChordAnalyzer chord_analyzer(chroma_obj, chord_config);
785+
786+
/// Update chord progression from ChordAnalyzer results
787+
current_estimate_.chord_progression.clear();
788+
for (const auto& chord : chord_analyzer.chords()) {
789+
ChordChange change;
790+
change.root = static_cast<int>(chord.root);
791+
change.quality = static_cast<int>(chord.quality);
792+
change.start_time = chord.start;
793+
change.confidence = chord.confidence;
794+
current_estimate_.chord_progression.push_back(change);
795+
}
796+
797+
last_chord_analysis_time_ = current_time;
798+
current_estimate_.updated = true;
799+
}
800+
}
801+
692802
/// Update bar-synchronized chord tracking
693803
if (config_.compute_chroma) {
694804
update_bar_chord_tracking(current_time);
@@ -915,14 +1025,17 @@ void StreamAnalyzer::reset(size_t base_sample_offset) {
9151025
onset_accumulator_.clear();
9161026
chroma_sum_.fill(0.0f);
9171027
chroma_frame_count_ = 0;
1028+
accumulated_chroma_.clear();
9181029
last_key_update_time_ = 0.0f;
9191030
last_bpm_update_time_ = 0.0f;
1031+
last_chord_analysis_time_ = 0.0f;
9201032
current_estimate_ = ProgressiveEstimate();
9211033

9221034
/// Reset chord progression tracking
9231035
prev_chord_root_ = -1;
9241036
prev_chord_quality_ = -1;
9251037
chord_stable_time_ = 0.0f;
1038+
chroma_history_.clear();
9261039

9271040
/// Reset bar tracking state
9281041
bar_tracking_active_ = false;

src/streaming/stream_analyzer.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,21 @@ class StreamAnalyzer {
162162
int chroma_frame_count_ = 0;
163163
float last_key_update_time_ = 0.0f;
164164
float last_bpm_update_time_ = 0.0f;
165+
float last_chord_analysis_time_ = 0.0f;
165166
ProgressiveEstimate current_estimate_;
166167

168+
// Accumulated chroma frames for batch-style chord analysis
169+
// Stored as [12 * n_frames] (row-major: chroma bins × frames)
170+
std::vector<float> accumulated_chroma_;
171+
167172
// Chord progression tracking
168173
int prev_chord_root_ = -1;
169174
int prev_chord_quality_ = -1;
170175
float chord_stable_time_ = 0.0f; ///< Time chord has been stable
171-
static constexpr float kChordMinDuration = 0.2f; ///< Min duration to register change
176+
static constexpr float kChordMinDuration = 0.3f; ///< Min duration to register change
177+
static constexpr int kChordSmoothingFrames = 12; ///< Number of frames to smooth (~0.25s at default settings)
178+
static constexpr float kChordConfidenceThreshold = 0.5f; ///< Min correlation for chord detection
179+
std::deque<std::array<float, 12>> chroma_history_; ///< History for chord smoothing
172180

173181
// Bar-synchronized chord tracking (requires stable BPM)
174182
static constexpr float kBpmConfidenceThreshold = 0.3f; ///< Min BPM confidence for bar sync

0 commit comments

Comments
 (0)