|
4 | 4 | #include <cmath> |
5 | 5 | #include <numeric> |
6 | 6 |
|
| 7 | +#include "analysis/chord_analyzer.h" |
7 | 8 | #include "analysis/chord_templates.h" |
8 | 9 | #include "analysis/key_profiles.h" |
9 | 10 | #include "core/fft.h" |
10 | 11 | #include "core/window.h" |
| 12 | +#include "feature/chroma.h" |
11 | 13 | #include "filters/chroma.h" |
12 | 14 | #include "filters/mel.h" |
13 | 15 | #include "util/math_utils.h" |
@@ -425,13 +427,51 @@ StreamFrame StreamAnalyzer::process_single_frame(const float* frame_start, size_ |
425 | 427 | } |
426 | 428 | ++chroma_frame_count_; |
427 | 429 |
|
428 | | - /// Detect chord for this frame |
| 430 | + /// Accumulate chroma frame for batch-style chord analysis |
| 431 | + /// Store in column-major order: [chroma_bin][frame] for Chroma class compatibility |
| 432 | + for (int c = 0; c < 12; ++c) { |
| 433 | + accumulated_chroma_.push_back(chroma_buffer_[c]); |
| 434 | + } |
| 435 | + |
| 436 | + /// Detect chord for this frame using smoothed chroma |
429 | 437 | if (!chord_templates_.empty() && chroma_buffer_.size() == 12) { |
| 438 | + /// Add current chroma to history |
| 439 | + std::array<float, 12> current_chroma; |
| 440 | + std::copy(chroma_buffer_.begin(), chroma_buffer_.end(), current_chroma.begin()); |
| 441 | + chroma_history_.push_back(current_chroma); |
| 442 | + |
| 443 | + /// Keep history limited to smoothing window |
| 444 | + while (chroma_history_.size() > static_cast<size_t>(kChordSmoothingFrames)) { |
| 445 | + chroma_history_.pop_front(); |
| 446 | + } |
| 447 | + |
| 448 | + /// Compute smoothed chroma (average over history) |
| 449 | + std::array<float, 12> smoothed_chroma = {}; |
| 450 | + for (const auto& hist : chroma_history_) { |
| 451 | + for (int c = 0; c < 12; ++c) { |
| 452 | + smoothed_chroma[c] += hist[c]; |
| 453 | + } |
| 454 | + } |
| 455 | + float inv_count = 1.0f / static_cast<float>(chroma_history_.size()); |
| 456 | + for (int c = 0; c < 12; ++c) { |
| 457 | + smoothed_chroma[c] *= inv_count; |
| 458 | + } |
| 459 | + |
| 460 | + /// Find best chord using smoothed chroma |
430 | 461 | auto [best_chord, chord_corr] = |
431 | | - find_best_chord(chroma_buffer_.data(), chord_templates_); |
432 | | - frame.chord_root = static_cast<int>(best_chord.root); |
433 | | - frame.chord_quality = static_cast<int>(best_chord.quality); |
434 | | - frame.chord_confidence = std::max(0.0f, chord_corr); |
| 462 | + find_best_chord(smoothed_chroma.data(), chord_templates_); |
| 463 | + |
| 464 | + /// Only report chord if confidence is above threshold |
| 465 | + if (chord_corr >= kChordConfidenceThreshold) { |
| 466 | + frame.chord_root = static_cast<int>(best_chord.root); |
| 467 | + frame.chord_quality = static_cast<int>(best_chord.quality); |
| 468 | + frame.chord_confidence = chord_corr; |
| 469 | + } else { |
| 470 | + /// Low confidence: keep previous chord or default to C major |
| 471 | + frame.chord_root = (prev_chord_root_ >= 0) ? prev_chord_root_ : 0; |
| 472 | + frame.chord_quality = (prev_chord_quality_ >= 0) ? prev_chord_quality_ : 0; |
| 473 | + frame.chord_confidence = std::max(0.0f, chord_corr); |
| 474 | + } |
435 | 475 | } |
436 | 476 | } |
437 | 477 |
|
@@ -507,11 +547,15 @@ void StreamAnalyzer::compute_chroma() { |
507 | 547 | chroma_buffer_[c] = sum; |
508 | 548 | } |
509 | 549 |
|
510 | | - /// Normalize chroma |
511 | | - float max_val = *std::max_element(chroma_buffer_.begin(), chroma_buffer_.end()); |
512 | | - if (max_val > kEpsilon) { |
| 550 | + /// Normalize chroma using L2 norm (more robust than max) |
| 551 | + float l2_norm = 0.0f; |
| 552 | + for (int c = 0; c < 12; ++c) { |
| 553 | + l2_norm += chroma_buffer_[c] * chroma_buffer_[c]; |
| 554 | + } |
| 555 | + l2_norm = std::sqrt(l2_norm); |
| 556 | + if (l2_norm > kEpsilon) { |
513 | 557 | for (int c = 0; c < 12; ++c) { |
514 | | - chroma_buffer_[c] /= max_val; |
| 558 | + chroma_buffer_[c] /= l2_norm; |
515 | 559 | } |
516 | 560 | } |
517 | 561 | } |
@@ -611,48 +655,68 @@ void StreamAnalyzer::update_progressive_estimate(float current_time) { |
611 | 655 | current_estimate_.updated = true; |
612 | 656 | } |
613 | 657 |
|
614 | | - /// Update chord estimate (every frame, using current chroma) |
615 | | - if (!chord_templates_.empty() && chroma_buffer_.size() == 12) { |
| 658 | + /// Update chord estimate (every frame, using smoothed chroma) |
| 659 | + if (!chord_templates_.empty() && !chroma_history_.empty()) { |
| 660 | + /// Compute smoothed chroma (average over history) |
| 661 | + std::array<float, 12> smoothed_chroma = {}; |
| 662 | + for (const auto& hist : chroma_history_) { |
| 663 | + for (int c = 0; c < 12; ++c) { |
| 664 | + smoothed_chroma[c] += hist[c]; |
| 665 | + } |
| 666 | + } |
| 667 | + float inv_count = 1.0f / static_cast<float>(chroma_history_.size()); |
| 668 | + for (int c = 0; c < 12; ++c) { |
| 669 | + smoothed_chroma[c] *= inv_count; |
| 670 | + } |
| 671 | + |
616 | 672 | auto [best_chord, chord_corr] = |
617 | | - find_best_chord(chroma_buffer_.data(), chord_templates_); |
| 673 | + find_best_chord(smoothed_chroma.data(), chord_templates_); |
618 | 674 | int new_root = static_cast<int>(best_chord.root); |
619 | 675 | int new_quality = static_cast<int>(best_chord.quality); |
620 | 676 | float new_confidence = std::max(0.0f, chord_corr); |
621 | 677 |
|
622 | | - current_estimate_.chord_root = new_root; |
623 | | - current_estimate_.chord_quality = new_quality; |
624 | | - current_estimate_.chord_confidence = new_confidence; |
625 | | - |
626 | | - /// Track chord progression |
627 | | - float frame_duration = |
628 | | - static_cast<float>(config_.hop_length) / static_cast<float>(config_.sample_rate); |
629 | | - |
630 | | - if (new_root == prev_chord_root_ && new_quality == prev_chord_quality_) { |
631 | | - /// Same chord - accumulate stable time |
632 | | - chord_stable_time_ += frame_duration; |
| 678 | + /// Only update if confidence is above threshold |
| 679 | + if (new_confidence >= kChordConfidenceThreshold) { |
| 680 | + current_estimate_.chord_root = new_root; |
| 681 | + current_estimate_.chord_quality = new_quality; |
| 682 | + current_estimate_.chord_confidence = new_confidence; |
633 | 683 | } else { |
634 | | - /// Chord changed - check if previous chord was stable long enough |
635 | | - if (prev_chord_root_ >= 0 && chord_stable_time_ >= kChordMinDuration) { |
636 | | - /// Find the start time of the previous chord |
637 | | - float chord_start = current_time - chord_stable_time_; |
638 | | - |
639 | | - /// Only add if it's a new chord or first chord |
640 | | - if (current_estimate_.chord_progression.empty() || |
641 | | - current_estimate_.chord_progression.back().root != prev_chord_root_ || |
642 | | - current_estimate_.chord_progression.back().quality != prev_chord_quality_) { |
643 | | - ChordChange change; |
644 | | - change.root = prev_chord_root_; |
645 | | - change.quality = prev_chord_quality_; |
646 | | - change.start_time = chord_start; |
647 | | - change.confidence = new_confidence; |
648 | | - current_estimate_.chord_progression.push_back(change); |
| 684 | + /// Keep current estimate but update confidence |
| 685 | + current_estimate_.chord_confidence = new_confidence; |
| 686 | + } |
| 687 | + |
| 688 | + /// Track chord progression (only when confidence is high enough) |
| 689 | + if (new_confidence >= kChordConfidenceThreshold) { |
| 690 | + float frame_duration = |
| 691 | + static_cast<float>(config_.hop_length) / static_cast<float>(config_.sample_rate); |
| 692 | + |
| 693 | + if (new_root == prev_chord_root_ && new_quality == prev_chord_quality_) { |
| 694 | + /// Same chord - accumulate stable time |
| 695 | + chord_stable_time_ += frame_duration; |
| 696 | + } else { |
| 697 | + /// Chord changed - check if previous chord was stable long enough |
| 698 | + if (prev_chord_root_ >= 0 && chord_stable_time_ >= kChordMinDuration) { |
| 699 | + /// Find the start time of the previous chord |
| 700 | + float chord_start = current_time - chord_stable_time_; |
| 701 | + |
| 702 | + /// Only add if it's a new chord or first chord |
| 703 | + if (current_estimate_.chord_progression.empty() || |
| 704 | + current_estimate_.chord_progression.back().root != prev_chord_root_ || |
| 705 | + current_estimate_.chord_progression.back().quality != prev_chord_quality_) { |
| 706 | + ChordChange change; |
| 707 | + change.root = prev_chord_root_; |
| 708 | + change.quality = prev_chord_quality_; |
| 709 | + change.start_time = chord_start; |
| 710 | + change.confidence = new_confidence; |
| 711 | + current_estimate_.chord_progression.push_back(change); |
| 712 | + } |
649 | 713 | } |
650 | | - } |
651 | 714 |
|
652 | | - /// Reset for new chord |
653 | | - prev_chord_root_ = new_root; |
654 | | - prev_chord_quality_ = new_quality; |
655 | | - chord_stable_time_ = frame_duration; |
| 715 | + /// Reset for new chord |
| 716 | + prev_chord_root_ = new_root; |
| 717 | + prev_chord_quality_ = new_quality; |
| 718 | + chord_stable_time_ = frame_duration; |
| 719 | + } |
656 | 720 | } |
657 | 721 | } |
658 | 722 | } |
@@ -689,6 +753,52 @@ void StreamAnalyzer::update_progressive_estimate(float current_time) { |
689 | 753 | } |
690 | 754 | } |
691 | 755 |
|
| 756 | + /// Update chord progression using batch-style analysis (same as ChordAnalyzer) |
| 757 | + if (config_.compute_chroma && chroma_frame_count_ > 0) { |
| 758 | + float time_since_chord_analysis = current_time - last_chord_analysis_time_; |
| 759 | + constexpr float kChordAnalysisInterval = 2.0f; // Update every 2 seconds |
| 760 | + constexpr int kMinFramesForAnalysis = 50; // ~1 second of audio |
| 761 | + |
| 762 | + if (time_since_chord_analysis >= kChordAnalysisInterval && |
| 763 | + chroma_frame_count_ >= kMinFramesForAnalysis) { |
| 764 | + /// Transpose accumulated chroma from [frame][chroma] to [chroma][frame] |
| 765 | + int n_frames = chroma_frame_count_; |
| 766 | + std::vector<float> transposed_chroma(12 * n_frames); |
| 767 | + for (int f = 0; f < n_frames; ++f) { |
| 768 | + for (int c = 0; c < 12; ++c) { |
| 769 | + transposed_chroma[c * n_frames + f] = accumulated_chroma_[f * 12 + c]; |
| 770 | + } |
| 771 | + } |
| 772 | + |
| 773 | + /// Create Chroma object from accumulated data |
| 774 | + Chroma chroma_obj(std::move(transposed_chroma), 12, n_frames, config_.sample_rate, |
| 775 | + config_.hop_length); |
| 776 | + |
| 777 | + /// Run ChordAnalyzer with same settings as batch analysis |
| 778 | + ChordConfig chord_config; |
| 779 | + chord_config.smoothing_window = 2.0f; // Same as batch |
| 780 | + chord_config.min_duration = 0.3f; |
| 781 | + chord_config.use_triads_only = true; |
| 782 | + chord_config.use_beat_sync = false; // No beat sync in streaming |
| 783 | + |
| 784 | + ChordAnalyzer chord_analyzer(chroma_obj, chord_config); |
| 785 | + |
| 786 | + /// Update chord progression from ChordAnalyzer results |
| 787 | + current_estimate_.chord_progression.clear(); |
| 788 | + for (const auto& chord : chord_analyzer.chords()) { |
| 789 | + ChordChange change; |
| 790 | + change.root = static_cast<int>(chord.root); |
| 791 | + change.quality = static_cast<int>(chord.quality); |
| 792 | + change.start_time = chord.start; |
| 793 | + change.confidence = chord.confidence; |
| 794 | + current_estimate_.chord_progression.push_back(change); |
| 795 | + } |
| 796 | + |
| 797 | + last_chord_analysis_time_ = current_time; |
| 798 | + current_estimate_.updated = true; |
| 799 | + } |
| 800 | + } |
| 801 | + |
692 | 802 | /// Update bar-synchronized chord tracking |
693 | 803 | if (config_.compute_chroma) { |
694 | 804 | update_bar_chord_tracking(current_time); |
@@ -915,14 +1025,17 @@ void StreamAnalyzer::reset(size_t base_sample_offset) { |
915 | 1025 | onset_accumulator_.clear(); |
916 | 1026 | chroma_sum_.fill(0.0f); |
917 | 1027 | chroma_frame_count_ = 0; |
| 1028 | + accumulated_chroma_.clear(); |
918 | 1029 | last_key_update_time_ = 0.0f; |
919 | 1030 | last_bpm_update_time_ = 0.0f; |
| 1031 | + last_chord_analysis_time_ = 0.0f; |
920 | 1032 | current_estimate_ = ProgressiveEstimate(); |
921 | 1033 |
|
922 | 1034 | /// Reset chord progression tracking |
923 | 1035 | prev_chord_root_ = -1; |
924 | 1036 | prev_chord_quality_ = -1; |
925 | 1037 | chord_stable_time_ = 0.0f; |
| 1038 | + chroma_history_.clear(); |
926 | 1039 |
|
927 | 1040 | /// Reset bar tracking state |
928 | 1041 | bar_tracking_active_ = false; |
|
0 commit comments