diff --git a/CLAUDE.md b/CLAUDE.md index b9613eb1..bb8a98b3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -430,20 +430,20 @@ All extracted Player modules follow a standardized naming pattern where the modu | player_core | `PlayerCore_` | `PlayerCore_buildGameInfo()`, `PlayerCore_processAVInfo()` | | player_menu | `PlayerMenu_` | `PlayerMenu_init()`, `PlayerMenuNav_navigate()` | | player_env | `PlayerEnv_` | `PlayerEnv_setRotation()`, `PlayerEnv_handleGeometry()` | -| player_cpu | `PlayerCPU_` | `PlayerCPU_update()`, `PlayerCPU_detectFrequencies()` | +| cpu | `CPU_` | `CPU_update()`, `CPU_detectFrequencies()` | | player_game | `PlayerGame_` | `PlayerGame_parseExtensions()`, `PlayerGame_detectM3uPath()` | | player_scaler | `PlayerScaler_` | `PlayerScaler_calculate()` | **Type naming:** Types follow the same pattern with `Player[Module]TypeName`: -- `PlayerCPUState`, `PlayerCPUConfig`, `PlayerCPUDecision` +- `CPUState`, `CPUConfig`, `CPUDecision` - `PlayerOption`, `PlayerOptionList` - `PlayerMemoryResult`, `PlayerStateResult` **Constants:** Module-specific constants use `PLAYER_MODULE_` prefix: -- `PLAYER_CPU_MAX_FREQUENCIES` -- `PLAYER_CPU_DEFAULT_WINDOW_FRAMES` +- `CPU_MAX_FREQUENCIES` +- `CPU_DEFAULT_WINDOW_FRAMES` - `PLAYER_MEM_OK`, `PLAYER_STATE_OK` This standardization makes it immediately clear which module owns each function and prevents naming collisions as the codebase grows. @@ -517,7 +517,7 @@ See `.clang-format` for complete style definition. | Player core AV processing | `workspace/all/player/player_core.c` | | Player memory persistence | `workspace/all/player/player_memory.c` | | Player save states | `workspace/all/player/player_state.c` | -| Player CPU scaling | `workspace/all/player/player_cpu.c` | +| CPU scaling | `workspace/all/common/cpu.c` | | Player input handling | `workspace/all/player/player_input.c` | | Player save paths | `workspace/all/player/player_paths.c` | | Launcher Entry type | `workspace/all/launcher/launcher_entry.c` | @@ -569,7 +569,7 @@ To enable comprehensive testing, complex logic has been extracted from large fil | player_scaler.c | 26 | player.c | Video scaling geometry calculations | | player_core.c | 23 | player.c | Core AV info processing, aspect ratio calculation | | effect_system.c | 43 | platform files | Visual effect state management | -| player_cpu.c | 42 | player.c | Auto CPU scaling algorithm | +| cpu.c | 42 | player.c | CPU topology + auto scaling algorithm | | player_utils.c | 41 | player.c | Core name extraction, string utilities | | player_menu.c | 41 | player.c | In-game menu, context pattern validation | | nointro_parser.c | 39 | (original) | No-Intro ROM naming conventions | diff --git a/Makefile.qa b/Makefile.qa index 4ec03689..ff06e450 100644 --- a/Makefile.qa +++ b/Makefile.qa @@ -236,7 +236,7 @@ TEST_UNITY = tests/vendor/unity/unity.c PATHS_STUB = tests/support/paths_stub.c # All test executables (built from tests/unit/ and tests/integration/) -TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/player_cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/frame_pacer_test +TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/sync_manager_test # Default targets: use Docker for consistency test: docker-test @@ -408,14 +408,15 @@ tests/launcher_launcher_test: tests/unit/all/launcher/test_launcher_launcher.c w @echo "Building launcher command tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -# Build auto CPU scaling tests (pure algorithm, no external dependencies) -tests/player_cpu_test: tests/unit/all/player/test_player_cpu.c workspace/all/player/player_cpu.c $(TEST_UNITY) - @echo "Building auto CPU scaling tests..." +# Build CPU scaling tests (pure algorithm, no external dependencies) +tests/cpu_test: tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c $(TEST_UNITY) + @echo "Building CPU scaling tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -# Build frame pacing tests (pure algorithm, no external dependencies) -tests/frame_pacer_test: tests/unit/all/player/test_frame_pacer.c workspace/all/player/frame_pacer.c $(TEST_UNITY) - @echo "Building frame pacer tests..." +# Build sync manager tests (vsync measurement and mode switching) +# Note: Uses test stub for getMicroseconds, not utils.c version +tests/sync_manager_test: tests/unit/all/player/test_sync_manager.c workspace/all/player/sync_manager.c workspace/all/common/log.c $(TEST_UNITY) + @echo "Building sync manager tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -lm # Build input handling tests (pure state queries and mapping lookups) diff --git a/docs/audio-rate-control.md b/docs/audio-rate-control.md index 88b1c5fa..73f31669 100644 --- a/docs/audio-rate-control.md +++ b/docs/audio-rate-control.md @@ -13,16 +13,55 @@ Retro game consoles are highly synchronous - audio generation is locked to video **The fundamental challenge**: Synchronize to vsync (smooth video) while never underrunning or blocking on audio. -## The Algorithm +## Runtime-Adaptive Sync System -### Arntzen's Core Formula +LessUI uses a runtime-adaptive approach that measures the actual display refresh rate and selects the appropriate sync mode automatically. -The paper's pure proportional control adjusts resampling ratio based on buffer fill: +### Two Sync Modes + +| Mode | Timing Source | Audio Handling | When Used | +| --------------- | --------------------- | --------------------------------- | -------------------------------- | +| **Audio Clock** | Blocking audio writes | Fixed ratio (no rate control) | Startup default, Hz mismatch >1% | +| **Vsync** | Display vsync | P rate control (±1.2% max adjust) | Hz mismatch <1% from game fps | + +### Mode Selection Algorithm ``` -error = 1 - 2×fill -adjustment = error × d -ratio = 1 - adjustment +1. Start in Audio Clock mode (safe default, works on all hardware) +2. Measure actual display Hz via vsync timing (~2 seconds warmup) +3. If measured Hz within 1% of game fps → switch to Vsync mode +4. Monitor for drift; fall back to Audio Clock if Hz becomes unstable +``` + +This eliminates compile-time mode selection and handles hardware variance automatically. + +## Audio Clock Mode + +When display Hz differs significantly from game fps (>1%), rate control cannot compensate without audible pitch changes. Instead: + +- Audio writes **block** when the buffer is full +- Audio hardware clock drives emulation timing +- Frame duplication occurs naturally (less visible than frame skipping) +- No rate control needed - the blocking provides natural backpressure + +**Benefits:** + +- Works with any display refresh rate +- Audio buffer stays naturally stable +- No controller oscillation or windup + +## Vsync Mode (Rate Control Active) + +When display Hz closely matches game fps (<1%), vsync provides timing and rate control keeps the audio buffer stable. + +### Arntzen's Proportional Control + +The paper's proportional control adjusts resampling ratio based on buffer fill: + +```c +error = 1 - 2 * fill; // +1 when empty, 0 at half, -1 when full +adjustment = error * d; // Bounded by ±d +ratio = 1 - adjustment; // Resampling ratio ``` **Behavior:** @@ -33,64 +72,41 @@ ratio = 1 - adjustment The paper proves this converges exponentially to a stable equilibrium. -### Our Extension: Dual-Timescale PI Controller +### Why Pure P Works -Pure proportional control works when the host display/audio clocks match the emulated system. On cheap handheld hardware, persistent clock mismatches cause the buffer to settle away from 50%. +Our 1% Hz tolerance for vsync mode ensures we're within the paper's "reasonably close" bounds: -We extend Arntzen with an integral term on a **separate, slower timescale**: +- **Arntzen tested with:** 0.36% Hz mismatch, d=0.5% → 1.4x headroom +- **Our parameters:** up to 1% Hz mismatch, d=0.8% → 1.25x headroom better than Arntzen's ratio -```c -// Fast timescale (proportional): immediate response to buffer jitter -float error = 1.0f - 2.0f * fill; -float p_term = error * d; - -// Slow timescale (integral): learns persistent clock offset over ~5 seconds -error_avg = α * error + (1-α) * error_avg; // Smooth error first -integral += error_avg * ki; // Then integrate -integral = clamp(integral, -0.02, +0.02); // Limit to ±2% - -// Combined adjustment -float adjustment = p_term + integral; -``` - -**Key insight**: Original PI failed because both terms operated on the same timescale, causing them to fight. By smoothing error before integrating (~5 seconds), the integral only sees persistent trends, not per-frame noise. +The 1% gate ensures devices in vsync mode have mismatch bounded within what proportional control can handle. Devices outside this range fall back to audio-clock mode where rate control isn't needed. ### Parameters -| Parameter | Value | Purpose | -| ---------- | -------- | ------------------------------------------------------ | -| **d** | 1.0% | Proportional gain. Handles frame-to-frame jitter. | -| **ki** | 0.00005 | Integral gain. Learns persistent clock offset. | -| **α** | 0.003 | Error smoothing (~333 frames / 5.5 seconds at 60fps). | -| **clamp** | ±2% | Max integral correction. Handles hardware clock drift. | -| **buffer** | 5 frames | ~83ms latency. Headroom for timing variance. | +| Parameter | Value | Purpose | +| ---------- | -------- | ----------------------------------------------------------- | +| **d** | 0.8% | Proportional gain. Handles frame-to-frame jitter. | +| **buffer** | 8 frames | ~133ms latency. Matches RetroArch handheld default (128ms). | ## Implementation Details -### Per-Frame Integral Update +### Sync Mode Callbacks -The integral must update **once per frame**, not once per audio batch. Some cores (e.g., 64-bit snes9x) use per-sample audio callbacks, calling `SND_batchSamples()` ~535 times per frame. Without this fix, effective ki = 535× intended, causing wild oscillation. +The audio system queries the sync manager to determine behavior: ```c -// Called once per frame from main loop, before core.run() -void SND_newFrame(void) { - SDL_LockAudio(); - - float fill = SND_getBufferFillLevel(); - float error = 1.0f - 2.0f * fill; - - // Update smoothed error and integral (once per frame) - error_avg = α * error + (1-α) * error_avg; - integral += error_avg * ki; - integral = clamp(integral, -0.02, +0.02); - - SDL_UnlockAudio(); -} +// Set by player at init +SND_setSyncCallbacks( + SyncManager_shouldUseRateControl, // true in Vsync mode + SyncManager_shouldBlockAudio // true in Audio Clock mode +); + +// In SND_batchSamples() +bool should_block = snd.should_block_audio(); +bool should_use_rate_control = !should_block && snd.should_use_rate_control(); ``` -### Thread Safety - -Rate control state is shared between the main thread (integral updates) and audio thread (buffer reads). All shared state access requires `SDL_LockAudio()` to prevent torn reads on 64-bit ARM where float operations aren't atomic. +This decouples the audio system from sync mode decisions. ### Sample Rate Policy @@ -104,41 +120,14 @@ int PLAT_pickSampleRate(int requested, int max) { Forcing a different rate (e.g., always 48kHz when core wants 32kHz) causes unnecessary resampling and wider buffer swings. -### Vsync Cadence - -When a libretro core skips rendering (passes NULL to video_refresh), we still flip to maintain vsync timing: - -```c -if (!data) { - frame_ready_for_flip = 1; // Still flip to maintain vsync cadence - return; -} -``` - -Without this, skipped frames cause: no vsync wait → 4ms frame → next frame waits 2 vblanks → 30ms frame. This creates 20% buffer oscillation even with perfect rate control. - -## Tuning Results - -Tested across three platforms with different timing characteristics: - -| Device | Fill | Variance | Integral | Underruns | Notes | -| ---------- | ---- | -------- | -------- | --------- | ---------------------------- | -| rg35xxplus | 59% | ±8% | +0.15% | 0 | Rock solid | -| tg5040 | 61% | ±16% | -0.71% | 0 | Integral learns clock offset | -| miyoomini | 64% | ±14% | +0.42% | 0 | Fixed by sample rate policy | - -**Key findings:** - -- d=0.010 (1.0%) is optimal for handheld timing variance (paper's 0.2-0.5% is for desktop) -- Integral converges in ~15-20 seconds to steady-state offset -- Each device has different clock characteristics that the integral learns - ## Code References -- PI controller: `workspace/all/common/api.c` (SND_calculateRateAdjust, SND_newFrame) -- Parameters: `workspace/all/common/api.c` (lines 1640-1652) +- Sync manager: `workspace/all/player/sync_manager.c` (mode selection, Hz measurement) +- Rate control: `workspace/all/common/api.c` (`SND_calculateRateAdjust`) +- Sync callbacks: `workspace/all/common/api.c` (`SND_setSyncCallbacks`) +- Parameters: `workspace/all/common/defines.h` (`SND_RATE_CONTROL_D`) - Resampler: `workspace/all/common/audio_resampler.c` -- Sample rate policy: `workspace//platform/platform.c` (PLAT_pickSampleRate) +- Sample rate policy: `workspace//platform/platform.c` (`PLAT_pickSampleRate`) ## References diff --git a/docs/auto-cpu-scaling.md b/docs/auto-cpu-scaling.md index e419db08..0dedbacf 100644 --- a/docs/auto-cpu-scaling.md +++ b/docs/auto-cpu-scaling.md @@ -8,7 +8,7 @@ Dynamic CPU frequency scaling for libretro emulation based on frame timing. Add an "Auto" CPU speed option that dynamically scales between existing power levels (POWERSAVE/NORMAL/PERFORMANCE) based on real-time emulation performance, saving battery when possible and boosting when needed. -**Status:** ✅ Granular frequency scaling implemented. Auto mode now uses all available CPU frequencies detected from the system. +**Status:** ✅ Topology-aware scaling implemented. Supports multi-cluster ARM SoCs (big.LITTLE, tri-cluster) with governor-based PerfState ladder, plus granular frequency scaling for single-cluster devices. ## Design Approach @@ -169,10 +169,10 @@ while (!quit) { ### Two-Layer Architecture -| Layer | Handles | Magnitude | Speed | -| --------------------- | -------------------------- | ----------------------------- | ---------- | -| **Rate control (PI)** | Jitter + persistent drift | ±1% (proportional) + integral | Per-frame | -| **CPU scaling** | Sustained performance gaps | 10-50%+ | Per-second | +| Layer | Handles | Magnitude | Speed | +| -------------------- | -------------------------- | -------------------- | ---------- | +| **Rate control (P)** | Frame-to-frame jitter | ±0.8% (proportional) | Per-frame | +| **CPU scaling** | Sustained performance gaps | 10-50%+ | Per-second | Rate control handles small timing variations. CPU scaling handles sustained performance problems that rate control can't fix. @@ -255,11 +255,17 @@ Auto CPU scaling uses a **two-thread design** to keep the main emulation loop re ### Background Thread (CPU Applier) - Polls every 50ms checking for target changes -- When target ≠ current, applies the change -- Calls `PWR_setCPUSpeed()` which may fork `system("overclock.elf")` -- Updates current level after successful application +- When target ≠ current, applies the change: + - **Topology mode**: Calls `CPU_applyPerfState()` to set governors on all clusters, queues affinity change for main thread + - **Granular mode**: Calls `PLAT_setCPUFrequency()` to set frequency via sysfs + - **Fallback mode**: Calls `PWR_setCPUSpeed()` which may fork `system("overclock.elf")` +- Updates current level/state after successful application - Stops cleanly when exiting auto mode +**Topology mode thread safety:** + +CPU affinity must be set from the emulation thread (not background thread) because `sched_setaffinity(0, ...)` affects the calling thread. The background thread sets `pending_affinity` under mutex, and the main thread applies it on next frame. + ### Thread Safety ```c @@ -439,23 +445,20 @@ if (SND_getUnderrunCount() > last_underrun_count) { The **d parameter** determines how much pitch adjustment the rate control algorithm can apply for jitter compensation. See [docs/audio-rate-control.md](audio-rate-control.md) for the full algorithm derivation. -**Current implementation (PI Controller):** +**Current implementation (Proportional Controller):** ```c -// Rate control gains (api.c) -#define SND_RATE_CONTROL_D_DEFAULT 0.010f // 1.0% - proportional gain -#define SND_RATE_CONTROL_KI 0.00005f // integral gain (drift correction) -#define SND_ERROR_AVG_ALPHA 0.003f // error smoothing (~333 frame average) -#define SND_INTEGRAL_CLAMP 0.02f // ±2% max drift correction +// Rate control gain (defines.h) +#define SND_RATE_CONTROL_D 0.012f // 1.2% max pitch adjustment ``` -**Why dual-timescale PI controller works:** +**Why pure proportional control works:** -- Error smoothing (α=0.003) filters jitter before it reaches the integral term -- Proportional term (d=1.0%) provides immediate response to buffer level changes -- Integral term operates on slower timescale, learning persistent clock offset -- Integral clamped to ±2% handles hardware clock mismatch up to ±2% -- P and I can't fight because they operate on different timescales +- Vsync mode only activates when display Hz is within 1% of game fps +- With d=1.2% and max 1% mismatch, we have 1.2x headroom (similar to Arntzen's 1.4x) +- Proportional term provides immediate response to buffer level changes +- Buffer settles at stable equilibrium (may not be exactly 50%, but stable) +- Devices outside 1% tolerance fall back to audio-clock mode (no rate control needed) ### Audio Buffer Size @@ -466,7 +469,7 @@ snd.buffer_video_frames = 5; snd.frame_count = snd.buffer_video_frames * snd.sample_rate_in / snd.frame_rate; ``` -With the PI controller, the buffer settles near 50-65% fill depending on device clock characteristics, providing headroom for jitter and ~42ms effective latency. +With proportional control, the buffer settles at a stable equilibrium. The 8-frame buffer (~133ms) provides substantial headroom for CPU frequency transitions and timing variance. ## Benchmark Methodology @@ -510,30 +513,30 @@ The discovered frequency steps and performance data come from a custom CPU bench - [Dynamic Rate Control for Retro Game Emulators](https://docs.libretro.com/guides/ratecontrol.pdf) - Hans-Kristian Arntzen, 2012 - [docs/audio-rate-control.md](audio-rate-control.md) - Our rate control implementation -- [workspace/all/common/api.c](../workspace/all/common/api.c) - `SND_calculateRateAdjust()`, `PWR_getAvailableCPUFrequencies_sysfs()`, `PWR_setCPUFrequency_sysfs()` +- [workspace/all/common/api.c](../workspace/all/common/api.c) - `SND_calculateRateAdjust()`, `PWR_getAvailableCPUFrequencies_sysfs()`, `PWR_setCPUFrequency_sysfs()`, `PWR_detectCPUTopology()`, `PWR_setCPUGovernor()`, `PWR_setThreadAffinity()` - [workspace/all/common/api.h](../workspace/all/common/api.h) - `PLAT_getAvailableCPUFrequencies()`, `PLAT_setCPUFrequency()` API - [workspace/all/player/player.c](../workspace/all/player/player.c) - Main emulation loop, `updateAutoCPU()`, `auto_cpu_detectFrequencies()` +- [workspace/all/common/cpu.c](../workspace/all/common/cpu.c) - CPU scaling algorithm, `CPU_buildPerfStates()`, `CPU_applyPerfState()`, `CPU_getPerformancePercent()` +- [workspace/all/common/cpu.h](../workspace/all/common/cpu.h) - CPU scaling types and API - [workspace/all/paks/Benchmark/](../workspace/all/paks/Benchmark/) - CPU frequency benchmark tool ## Tuning Status -| Parameter | Current | Notes | -| ----------------------- | ------------------- | ------------------------------------------------- | -| Rate control d | 1.0% | Proportional gain - handles frame-to-frame jitter | -| Rate control ki | 0.00005 | Integral gain - learns persistent clock offset | -| Error smoothing α | 0.003 (~333 frames) | Separates P and I timescales | -| Integral clamp | ±2% | Max drift correction (handles hardware variance) | -| Audio buffer | 5 frames (~83ms) | Effective latency ~42ms at 50% fill | -| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | -| Utilization high | 85% | Frame time >85% of budget = boost | -| Utilization low | 55% | Frame time <55% of budget = reduce | -| Target util | 70% | Target utilization after frequency change | -| Max step (reduce/panic) | 2 | Max frequency steps down (boost unlimited) | -| Min frequency | 400 MHz | Floor for frequency scaling | -| Boost windows | 2 (~1s) | Fast response to performance issues | -| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | -| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | -| Percentile | 90th | Ignores outliers (loading screens) | +| Parameter | Current | Notes | +| ---------------- | ------------------ | -------------------------------------------------------- | +| Rate control d | 0.8% | Proportional gain - gentler than 1.2% with larger buffer | +| Audio buffer | 8 frames (~133ms) | Matches RetroArch handheld default, CPU scaling headroom | +| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | +| Utilization high | 85% | Frame time >85% of budget = boost | +| Utilization low | 55% | Frame time <55% of budget = reduce | +| Target util | 70% | Target utilization after frequency change | +| Max step down | 1 | Max frequency steps when reducing | +| Panic step up | 2 | Frequency steps on underrun emergency | +| Min frequency | 400 MHz | Floor for frequency scaling | +| Boost windows | 2 (~1s) | Fast response to performance issues | +| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | +| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | +| Percentile | 90th | Ignores outliers (loading screens) | ### Display Rate Handling @@ -545,9 +548,9 @@ Display refresh rate is queried from SDL at init via `SDL_GetCurrentDisplayMode( | tg5040 | 60 Hz | 60.10 Hz (NES) | 60/60.10 = 0.9983 | | miyoomini | 60 Hz | 60.10 Hz (NES) | 60/60.10 = 0.9983 | -**Note:** SDL typically reports rounded integer refresh rates (60 Hz). The actual display rate may vary slightly (59.71-60.5 Hz measured via vsync timing). The PI controller's integral term learns and corrects for any mismatch over time. +**Note:** SDL typically reports rounded integer refresh rates (60 Hz). The actual display rate may vary slightly (59.71-60.5 Hz measured via vsync timing). The sync manager measures actual Hz and gates vsync mode to within 1% of game fps. -**How it works:** The PI controller adjusts the resampling ratio based on buffer fill. The proportional term (d) handles jitter, while the integral term slowly learns the persistent timing offset to maintain exactly 50% buffer fill. +**How it works:** The proportional controller adjusts the resampling ratio based on buffer fill. Buffer below 50% → produce more samples; above 50% → produce fewer. This converges to stable equilibrium. ### Debug HUD @@ -574,11 +577,14 @@ The debug overlay uses all 4 corners to show performance and scaling info: - Manual mode: `L1 b:48%` (level + buffer fill) - Auto mode (fallback): `L1 u:52% b:48%` (level + utilization + buffer fill) - Auto mode (granular): `1200 u:52% b:48%` (frequency in MHz + utilization + buffer fill) +- Auto mode (topology): `T3/5 60% u:52% b:48%` (state/max + perf% + utilization + buffer fill) **Key metrics:** - `L0/L1/L2` = CPU level (POWERSAVE/NORMAL/PERFORMANCE) - used in manual and fallback modes - `1200` = CPU frequency in MHz (e.g., 1200 = 1.2 GHz) - used in granular auto mode +- `T3/5` = PerfState index / max (e.g., state 3 of 5) - used in topology auto mode +- `60%` = Normalized performance level (0-100%) - topology mode only - `u:XX%` = Frame timing utilization (90th percentile, % of frame budget) - `b:XX%` = Audio buffer fill (should converge to ~50%) @@ -603,7 +609,7 @@ After implementing the unified RateMeter system with dual clock correction (disp - Low quality: frame timing drops → auto scaler correctly reduces CPU - The system responds to actual emulation workload, not arbitrary core labels -3. **No feedback loops** - Buffer fill is influenced by the PI controller rate adjustment and dynamic buffer sizing. Using it for CPU scaling would create two control systems fighting over the same signal. +3. **No feedback loops** - Buffer fill is influenced by the rate control adjustment and dynamic buffer sizing. Using it for CPU scaling would create two control systems fighting over the same signal. **The two-layer separation is optimal:** @@ -612,14 +618,81 @@ After implementing the unified RateMeter system with dual clock correction (disp | Rate control | Audio/video sync | Per-frame (~16ms) | Buffer fill | Resampler ratio adjustment | | CPU scaling | Performance headroom | Per-second (~1-2s) | Frame timing | CPU frequency | -### Granular Frequency Scaling (Implemented) +### Multi-Cluster Topology Mode (Implemented) + +Modern ARM SoCs use heterogeneous CPU clusters (big.LITTLE, tri-cluster) where different cores have different performance/power characteristics. Auto mode now detects and leverages this topology. + +**How it works:** + +1. **Detection**: Enumerates `/sys/devices/system/cpu/cpufreq/policy{0,1,...}` at startup +2. **Classification**: Sorts clusters by max frequency, assigns LITTLE/BIG/PRIME types +3. **PerfState Ladder**: Builds a progression of performance states using governors +4. **Application**: Sets governors and CPU affinity to guide the emulation thread + +**Governor-based approach (not frequency bounds):** + +Instead of manipulating `scaling_min_freq`/`scaling_max_freq`, we use governors: + +| Governor | Behavior | Use Case | +| ------------- | --------------------------------------- | ----------------------- | +| `powersave` | Runs at minimum frequency | Inactive clusters, idle | +| `schedutil` | Kernel dynamically scales based on load | Balanced workloads | +| `performance` | Runs at maximum frequency | Demanding workloads | + +**Why governors instead of frequency bounds:** + +- Works WITH the kernel's frequency scaling intelligence +- `schedutil` finds optimal frequency automatically +- Inactive clusters truly idle at `powersave` (power savings) +- No fighting between our algorithm and the kernel + +**PerfState Ladder Structure:** + +``` +Dual-cluster (LITTLE + BIG): + State 0: LITTLE powersave (active), BIG powersave ← lightest + State 1: LITTLE schedutil (active), BIG powersave + State 2: LITTLE performance (active), BIG powersave + State 3: BIG powersave (active), LITTLE powersave + State 4: BIG schedutil (active), LITTLE powersave + State 5: BIG performance (active), LITTLE powersave ← heaviest + +Tri-cluster adds 3 more states for PRIME (6-8) +``` + +**CPU Affinity:** + +Each PerfState sets CPU affinity to guide the emulation thread to the active cluster: + +```c +// State 0-2: Run on LITTLE cores (mask 0x0F for CPUs 0-3) +// State 3-5: Run on BIG cores (mask 0xF0 for CPUs 4-7) +sched_setaffinity(0, sizeof(set), &set); +``` + +**Cluster Classification:** -Auto mode now uses **all available CPU frequencies** detected from the system via `scaling_available_frequencies` sysfs interface. +- `LITTLE`: First cluster (lowest max frequency) +- `BIG`: Middle clusters +- `PRIME`: Last cluster if single-core OR >10% faster than previous + +**Example SoC configurations:** + +| SoC | Clusters | PerfStates | +| -------------- | --------------------- | ----------------- | +| Allwinner A53 | 4×A53 (single) | 0 (granular mode) | +| Allwinner H700 | 4×A53 (single) | 0 (granular mode) | +| Allwinner A523 | 4×A55 + 4×A76 | 6 | +| SD865 | 4×A55 + 3×A77 + 1×A77 | 9 | + +### Granular Frequency Scaling (Single-Cluster Fallback) + +For single-cluster devices, auto mode uses **all available CPU frequencies** detected from the system via `scaling_available_frequencies` sysfs interface. **Key features:** - Runtime frequency detection via `PLAT_getAvailableCPUFrequencies()` -- Direct frequency setting via `PLAT_setCPUFrequency()` +- Direct frequency setting via `PLAT_setCPUFrequency()` with `userspace` governor - Linear performance scaling for intelligent frequency selection - Minimum frequency floor (400 MHz) filters out unusably slow frequencies - Automatic fallback to 3-level mode if detection fails @@ -629,14 +702,14 @@ Auto mode now uses **all available CPU frequencies** detected from the system vi - Performance scales linearly with frequency: `new_util = current_util × (current_freq / new_freq)` - Target 70% utilization after frequency changes - **Boost**: Uses linear prediction, no step limit (aggressive is safe) -- **Reduce**: Uses linear prediction, max 2 steps (conservative to avoid underruns) +- **Reduce**: Uses linear prediction, max 1 step (conservative to avoid underruns) - **Panic**: Boost by max 2 steps on underrun, 4s cooldown - **Startup**: Begin at max frequency during 5s grace period **Preset mapping for manual modes:** -- POWERSAVE: ~25% up from minimum frequency -- NORMAL: ~75% of max frequency +- POWERSAVE: ~55% of max frequency +- NORMAL: ~80% of max frequency - PERFORMANCE: max frequency **Example on miyoomini (6 frequencies detected: 400, 600, 800, 1000, 1100, 1600 kHz):** @@ -646,6 +719,25 @@ Old: POWERSAVE → NORMAL → PERFORMANCE (3 steps) New: 400 → 600 → 800 → 1000 → 1100 → 1600 (6 steps, granular) ``` +### Unified API + +Helper functions provide a consistent interface regardless of scaling mode: + +```c +// Get normalized performance level (0-100%) +int CPU_getPerformancePercent(const CPUState* state); +// - Topology: (current_state / max_state) * 100 +// - Granular: (current_index / max_index) * 100 +// - Fallback: level * 50 (0=0%, 1=50%, 2=100%) +// - Returns -1 if scaling disabled + +// Get mode name for logging/debugging +const char* CPU_getModeName(const CPUState* state); +// - Returns: "topology", "granular", "fallback", or "disabled" +``` + +These functions enable mode-agnostic debugging, logging, and potential future UI elements. + ### Frequency Band Analysis Comprehensive analysis of benchmark data from all platforms revealed optimization opportunities. @@ -659,6 +751,33 @@ Comprehensive analysis of benchmark data from all platforms revealed optimizatio **Analysis output:** See `scripts/analyze-cpu-bands.py` and `scripts/analyze-frequency-strategies.py` for detailed frequency analysis and strategy comparison. +### Audio Clock Mode Buffer Range + +In Audio Clock mode, the CPU scaler can't rely on utilization metrics (blocking audio makes frame timing unreliable). Instead, it uses **time-based probing** with buffer-guided timing. + +**Problem discovered (TG5050):** When display Hz differs significantly from game fps (e.g., 62.9Hz vs 60.1fps), and `SDL_GL_SetSwapInterval(0)` doesn't actually disable vsync: + +1. Buffer fills due to timing mismatch (display Hz > game fps) +2. Utilization appears artificially high (~90%) because blocking time inflates frame time +3. CPU never reduces because util never drops below threshold + +**Solution:** In Audio Clock mode, use time-based probing with buffer-guided timing: + +| Buffer Level | Wait Time | Rationale | +| ------------ | ---------- | ---------------------------------------------------- | +| < 40% | N/A | Don't reduce (need headroom for transition) | +| 40-75% | 8 windows | Normal timing - reduce after ~4 seconds of stability | +| > 75% | 16 windows | Pathological timing - wait ~8 seconds before probing | + +**Key insight:** We can't trust utilization metrics in AC mode, so we: + +1. Probe by reducing after a stability period (time-based, not util-based) +2. Rely on the panic path to boost back if reduction causes underruns +3. Use buffer level to guide timing - high buffer gets longer wait because it indicates + problematic timing where reductions are more likely to cause issues + +**Files changed:** `workspace/all/player/player.c` (all three CPU scaling modes) + ### Threshold Validation The 55% LOW threshold and 85% HIGH threshold were chosen empirically but are now validated: diff --git a/docs/changes.md b/docs/changes.md index 7c0e69f8..45c6a6e3 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -56,7 +56,7 @@ The main files `player.c` (peaked at ~7200 lines) and `launcher.c` (peaked at ~2 | player_core | Core AV info processing, aspect ratio | | player_menu | In-game menu system | | player_env | Libretro environment callback handlers | -| player_cpu | Auto CPU scaling algorithm | +| cpu (common) | Auto CPU scaling algorithm | | player_game | ZIP parsing, extension matching, M3U detection | | player_scaler | Video scaling geometry calculations | | player_archive | 7z/ZIP archive extraction | @@ -110,19 +110,15 @@ Extracted duplicated rendering code from platform files into shared modules, wit **Replaced basic audio handling with adaptive resampling and rate control.** - **Linear interpolation resampling** for smooth audio at any sample rate -- **Dual-timescale PI controller** for stable audio synchronization - - Smooths error signal (0.9) to filter jitter - - Quadratic integral weighting for faster convergence far from 50% - - Integral clamped to ±1% for persistent hardware drift correction -- **Dynamic rate control** that adjusts playback speed to prevent buffer underruns - - Parameters tuned for handheld timing variance (d=0.010, 5-frame buffer) - - `SND_newFrame()` updates integral once per frame to prevent over-accumulation +- **Proportional rate control** (Arntzen algorithm) for stable audio synchronization + - Adjusts resampling ratio based on buffer fill level + - Parameters tuned for handheld timing variance (d=1.2%, 5-frame buffer) +- **Runtime-adaptive sync system** that measures display Hz and selects appropriate mode + - Vsync mode when display Hz within 1% of game fps (rate control active) + - Audio-clock mode otherwise (blocking writes, no rate control needed) - **Audio buffer status callback** enabling cores to implement frameskip -- **Dual sync modes** with compile-time selection: - - **Vsync mode** (default): Frame pacing via Bresenham accumulator, non-blocking audio writes - - **Audioclock mode** (M17): Audio hardware clock drives timing, blocking writes when buffer full -The audioclock mode fixes audio stuttering on devices with unstable vsync (like M17). +The runtime-adaptive system automatically selects audio-clock mode for devices with unstable vsync, ensuring smooth audio on all hardware. ### Removed Legacy Audio Code diff --git a/docs/libretro-compliance.md b/docs/libretro-compliance.md index c5e6ed90..1949bf9f 100644 --- a/docs/libretro-compliance.md +++ b/docs/libretro-compliance.md @@ -403,14 +403,14 @@ This section compares LessUI's libretro implementation against [RetroArch](https ### Audio Callbacks (`retro_audio_sample_t`, `retro_audio_sample_batch_t`) -| Aspect | RetroArch | LessUI | Match | -| ------------------- | ------------------------------------------------------------- | --------------------------------------------------- | ----- | -| **Sample callback** | Accumulates to conversion buffer, flushes at chunk size | Passes directly to `SND_batchSamples` | ✅ | -| **Batch callback** | Processes in chunks up to `AUDIO_CHUNK_SIZE_NONBLOCKING >> 1` | Processes via ring buffer in `SND_batchSamples` | ✅ | -| **Return value** | Returns frames processed | Returns frames processed (or `frames` during FF) | ✅ | -| **Resampling** | Converts to float, applies DSP, resamples with rate control | Linear interpolation resampler with PI rate control | ✅ | -| **Rate control** | Monitors buffer space, adjusts ratio dynamically | Dual-timescale PI controller (Arntzen-based) | ✅ | -| **Fast-forward** | Adjusts ratio with EMA smoothing, clamps to 0.0625-16x range | Skips audio entirely during fast-forward | ⚠️ | +| Aspect | RetroArch | LessUI | Match | +| ------------------- | ------------------------------------------------------------- | -------------------------------------------------- | ----- | +| **Sample callback** | Accumulates to conversion buffer, flushes at chunk size | Passes directly to `SND_batchSamples` | ✅ | +| **Batch callback** | Processes in chunks up to `AUDIO_CHUNK_SIZE_NONBLOCKING >> 1` | Processes via ring buffer in `SND_batchSamples` | ✅ | +| **Return value** | Returns frames processed | Returns frames processed (or `frames` during FF) | ✅ | +| **Resampling** | Converts to float, applies DSP, resamples with rate control | Linear interpolation resampler with P rate control | ✅ | +| **Rate control** | Monitors buffer space, adjusts ratio dynamically | Proportional controller (Arntzen algorithm) | ✅ | +| **Fast-forward** | Adjusts ratio with EMA smoothing, clamps to 0.0625-16x range | Skips audio entirely during fast-forward | ⚠️ | **Note:** LessUI skips audio during fast-forward rather than pitch-adjusting like RetroArch. This is simpler and appropriate for handheld use where audio fidelity during FF is less important. diff --git a/docs/minarch-refactoring.md b/docs/minarch-refactoring.md index a9b6458d..2ef5580d 100644 --- a/docs/minarch-refactoring.md +++ b/docs/minarch-refactoring.md @@ -54,7 +54,7 @@ player.c (orchestration, main loop, SDL integration) │ ├── player_core.c ─ AV info processing, aspect ratio │ ├── player_env.c ─ Libretro environment callbacks │ ├── player_game.c ─ Game file handling, ZIP parsing - │ └── player_cpu.c ─ Auto CPU frequency scaling + │ └── cpu.c (common) ─ Auto CPU frequency scaling │ └── UI ├── player_menu.c ─ In-game menu system @@ -122,12 +122,12 @@ player.c (orchestration, main loop, SDL integration) ### Core Integration Layer -| Module | Lines | Responsibility | Tests | -| --------------- | ----- | -------------------------------------------------------- | ----- | -| `player_core.c` | ~150 | Build game info, calculate aspect ratio, process AV info | 23 | -| `player_env.c` | ~400 | Handle 30+ libretro environment callbacks | 51 | -| `player_game.c` | ~300 | Extension parsing, ZIP headers, M3U detection | 46 | -| `player_cpu.c` | ~350 | Auto CPU frequency scaling algorithm | 42 | +| Module | Lines | Responsibility | Tests | +| ---------------- | ----- | -------------------------------------------------------- | ----- | +| `player_core.c` | ~150 | Build game info, calculate aspect ratio, process AV info | 23 | +| `player_env.c` | ~400 | Handle 30+ libretro environment callbacks | 51 | +| `player_game.c` | ~300 | Extension parsing, ZIP headers, M3U detection | 46 | +| `cpu.c` (common) | ~350 | Auto CPU frequency scaling algorithm | 42 | **Key decisions:** @@ -340,7 +340,7 @@ workspace/all/player/ # Emulator frontend ├── player_core.h/c # Core AV processing ├── player_env.h/c # Environment callbacks ├── player_game.h/c # Game file handling -├── player_cpu.h/c # CPU scaling +├── (cpu.h/c in common/) # CPU scaling (moved to common/) ├── player_menu.h/c # Menu system └── player_menu_types.h # Menu types @@ -356,7 +356,7 @@ tests/unit/all/common/ # Unit tests ├── test_player_core.c ├── test_player_env.c ├── test_player_game.c -├── test_player_cpu.c +├── test_cpu.c # (moved to common/) └── test_player_menu.c ``` @@ -476,7 +476,7 @@ Renamed **405 function references** across 8 modules to follow consistent `Playe | player_state | 5 | `Player_readState` → `PlayerState_read` | | player_utils | 3 | `Player_getCoreName` → `PlayerUtils_getCoreName` | | player_zip | 2 | `Player_zipCopy` → `PlayerZip_copy` | -| player_cpu | (earlier) | `AutoCPU_update` → `PlayerCPU_update` | +| cpu (common) | (earlier) | `AutoCPU_update` → `CPU_update` | **Benefits:** @@ -490,7 +490,7 @@ Renamed **405 function references** across 8 modules to follow consistent `Playe - Added comprehensive naming convention guide to CLAUDE.md - Includes table of all module prefixes and example functions -- Documents type naming (`PlayerCPUState`) and constant naming (`PLAYER_CPU_MAX`) +- Documents type naming (`CPUState`) and constant naming (`CPU_MAX_*`) --- diff --git a/scripts/run-coverage.sh b/scripts/run-coverage.sh index d3a51600..cbb797ed 100755 --- a/scripts/run-coverage.sh +++ b/scripts/run-coverage.sh @@ -121,8 +121,8 @@ declare -a TEST_BUILDS=( "player_memory_test:tests/unit/all/player/test_player_memory.c workspace/all/player/player_memory.c tests/support/libretro_mocks.c tests/support/test_temp.c:-D_GNU_SOURCE" "player_state_test:tests/unit/all/player/test_player_state.c workspace/all/player/player_state.c workspace/all/player/player_paths.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c workspace/all/common/log.c tests/support/libretro_mocks.c:-D_DEFAULT_SOURCE" "launcher_launcher_test:tests/unit/all/launcher/test_launcher_launcher.c workspace/all/launcher/launcher_launcher.c:" - "player_cpu_test:tests/unit/all/player/test_player_cpu.c workspace/all/player/player_cpu.c:" - "frame_pacer_test:tests/unit/all/player/test_frame_pacer.c workspace/all/player/frame_pacer.c:-lm" + "cpu_test:tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c:" + "sync_manager_test:tests/unit/all/player/test_sync_manager.c workspace/all/player/sync_manager.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c:-lm" "player_input_test:tests/unit/all/player/test_player_input.c workspace/all/player/player_input.c:" "launcher_state_test:tests/unit/all/launcher/test_launcher_state.c workspace/all/launcher/launcher_state.c workspace/all/common/stb_ds_impl.c:" "player_menu_test:tests/unit/all/player/test_player_menu.c workspace/all/player/player_context.c tests/support/menu_state_stub.c tests/support/sdl_fakes.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c workspace/all/common/log.c:-I tests/support/SDL -I workspace/all/player/libretro-common/include -I tests/vendor/fff -D_DEFAULT_SOURCE" diff --git a/tests/README.md b/tests/README.md index 3bfa06a8..4aff3d0a 100644 --- a/tests/README.md +++ b/tests/README.md @@ -58,7 +58,7 @@ tests/ │ ├── test_player_utils.c # Player utilities │ ├── test_player_config.c # Config path generation │ ├── test_player_options.c # Option management -│ ├── test_player_cpu.c # CPU scaling algorithm +│ ├── (see common/test_cpu.c) # CPU scaling (now in common/) │ ├── test_player_menu.c # Menu system │ ├── test_player_env.c # Environment callbacks │ └── ... # Other player module tests diff --git a/tests/unit/all/common/test_cpu.c b/tests/unit/all/common/test_cpu.c new file mode 100644 index 00000000..c4d01a9b --- /dev/null +++ b/tests/unit/all/common/test_cpu.c @@ -0,0 +1,1753 @@ +/** + * test_cpu.c - Unit tests for auto CPU scaling + * + * Tests the CPU frequency scaling algorithm including: + * - Frequency detection and preset calculation + * - Nearest frequency index lookup + * - Utilization-based scaling decisions + * - Panic path (underrun handling) + * - Granular vs fallback modes + * - Frame timing percentile calculation + * - Single-frequency/scaling-disabled scenarios (M17-like devices) + * - Grace period after frequency changes (prevents cascade panics) + * - Stability decay (earns back blocked frequencies over time) + * - Step-by-1 behavior for predictable scaling + * + * 100 tests organized by functionality. + */ + +#include "unity.h" +#include "cpu.h" + +#include + +/////////////////////////////// +// Stubs for API functions called by cpu.c +// These allow unit testing without linking api.c +/////////////////////////////// + +// Track calls for verification in tests +static int stub_governor_calls = 0; +static int stub_last_policy_id = -1; +static char stub_last_governor[32] = {0}; +static int stub_affinity_calls = 0; +static int stub_last_affinity_mask = 0; + +int PWR_setCPUGovernor(int policy_id, const char* governor) { + stub_governor_calls++; + stub_last_policy_id = policy_id; + if (governor) { + strncpy(stub_last_governor, governor, sizeof(stub_last_governor) - 1); + stub_last_governor[sizeof(stub_last_governor) - 1] = '\0'; + } + return 0; // Success +} + +int PWR_setThreadAffinity(int cpu_mask) { + stub_affinity_calls++; + stub_last_affinity_mask = cpu_mask; + return 0; // Success +} + +static void reset_stubs(void) { + stub_governor_calls = 0; + stub_last_policy_id = -1; + stub_last_governor[0] = '\0'; + stub_affinity_calls = 0; + stub_last_affinity_mask = 0; +} + +// Test state and config +static CPUState state; +static CPUConfig config; + +// Forward declaration for helper function (defined later with topology tests) +static void setup_dual_cluster_topology(CPUState* s); + +/////////////////////////////// +// Test Setup/Teardown +/////////////////////////////// + +void setUp(void) { + CPU_initState(&state); + CPU_initConfig(&config); + reset_stubs(); +} + +void tearDown(void) { + // Nothing to clean up +} + +/////////////////////////////// +// Config Initialization Tests +/////////////////////////////// + +void test_initConfig_sets_defaults(void) { + CPUConfig c; + CPU_initConfig(&c); + + // Verify values are sensible (not testing exact defaults) + TEST_ASSERT_GREATER_THAN(0, c.window_frames); + TEST_ASSERT_GREATER_THAN(c.util_low, c.util_high); + TEST_ASSERT_GREATER_THAN(0, c.util_low); + TEST_ASSERT_LESS_THAN(100, c.util_high); + TEST_ASSERT_GREATER_THAN(0, c.boost_windows); + TEST_ASSERT_GREATER_THAN(0, c.reduce_windows); + TEST_ASSERT_GREATER_THAN(0, c.startup_grace); + TEST_ASSERT_GREATER_OR_EQUAL(0, c.min_freq_khz); // Can be 0 (panic failsafe handles low freqs) + TEST_ASSERT_GREATER_THAN(0, c.target_util); + TEST_ASSERT_LESS_OR_EQUAL(100, c.target_util); + TEST_ASSERT_GREATER_THAN(0, c.max_step_down); + TEST_ASSERT_GREATER_THAN(0, c.panic_step_up); + TEST_ASSERT_GREATER_THAN(0, c.min_buffer_for_reduce); // Must have a minimum buffer level + TEST_ASSERT_LESS_OR_EQUAL(100, c.min_buffer_for_reduce); +} + +void test_initState_zeros_state(void) { + CPUState s; + memset(&s, 0xFF, sizeof(s)); // Fill with garbage + CPU_initState(&s); + + TEST_ASSERT_EQUAL(0, s.freq_count); + TEST_ASSERT_EQUAL(0, s.target_index); + TEST_ASSERT_EQUAL(0, s.use_granular); + TEST_ASSERT_EQUAL(0, s.frame_count); + TEST_ASSERT_EQUAL(16667, s.frame_budget_us); // 60fps default +} + +/////////////////////////////// +// findNearestIndex Tests +/////////////////////////////// + +void test_findNearestIndex_empty_array(void) { + int result = CPU_findNearestIndex(NULL, 0, 1000000); + TEST_ASSERT_EQUAL(0, result); +} + +void test_findNearestIndex_exact_match(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + int result = CPU_findNearestIndex(freqs, 4, 800000); + TEST_ASSERT_EQUAL(2, result); +} + +void test_findNearestIndex_nearest_lower(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + // 750000 is closer to 800000 than 600000 + int result = CPU_findNearestIndex(freqs, 4, 750000); + TEST_ASSERT_EQUAL(2, result); +} + +void test_findNearestIndex_nearest_higher(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + // 650000 is closer to 600000 than 800000 + int result = CPU_findNearestIndex(freqs, 4, 650000); + TEST_ASSERT_EQUAL(1, result); +} + +void test_findNearestIndex_below_min(void) { + int freqs[] = {400000, 600000, 800000}; + int result = CPU_findNearestIndex(freqs, 3, 100000); + TEST_ASSERT_EQUAL(0, result); +} + +void test_findNearestIndex_above_max(void) { + int freqs[] = {400000, 600000, 800000}; + int result = CPU_findNearestIndex(freqs, 3, 2000000); + TEST_ASSERT_EQUAL(2, result); +} + +/////////////////////////////// +// detectFrequencies Tests +/////////////////////////////// + +void test_detectFrequencies_filters_below_minimum(void) { + // Set explicit min_freq_khz to test filtering behavior + config.min_freq_khz = 400000; + int raw[] = {100000, 200000, 300000, 400000, 600000, 800000}; + CPU_detectFrequencies(&state, &config, raw, 6); + + // Should only keep 400000, 600000, 800000 (at or above min_freq_khz) + TEST_ASSERT_EQUAL(3, state.freq_count); + TEST_ASSERT_EQUAL(400000, state.frequencies[0]); + TEST_ASSERT_EQUAL(600000, state.frequencies[1]); + TEST_ASSERT_EQUAL(800000, state.frequencies[2]); +} + +void test_detectFrequencies_enables_granular_mode(void) { + int raw[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, raw, 4); + + TEST_ASSERT_EQUAL(1, state.use_granular); + TEST_ASSERT_EQUAL(1, state.frequencies_detected); +} + +void test_detectFrequencies_disables_scaling_with_one_freq(void) { + int raw[] = {800000}; // Only one frequency + CPU_detectFrequencies(&state, &config, raw, 1); + + TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled + TEST_ASSERT_EQUAL(0, state.use_granular); + TEST_ASSERT_EQUAL(1, state.freq_count); + TEST_ASSERT_EQUAL(1, state.frequencies_detected); +} + +void test_detectFrequencies_disables_scaling_with_zero_freqs(void) { + CPU_detectFrequencies(&state, &config, NULL, 0); + + TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled + TEST_ASSERT_EQUAL(0, state.use_granular); + TEST_ASSERT_EQUAL(0, state.freq_count); + TEST_ASSERT_EQUAL(1, state.frequencies_detected); +} + +void test_detectFrequencies_enables_scaling_with_multiple_freqs(void) { + int raw[] = {400000, 600000, 800000}; + CPU_detectFrequencies(&state, &config, raw, 3); + + TEST_ASSERT_EQUAL(0, state.scaling_disabled); // Scaling enabled + TEST_ASSERT_EQUAL(1, state.use_granular); + TEST_ASSERT_EQUAL(3, state.freq_count); +} + +void test_detectFrequencies_calculates_preset_indices(void) { + // Frequencies: 400, 600, 800, 1000 MHz + // Max = 1000000 + // POWERSAVE (55%): 550000 -> nearest is 600000 (index 1) + // NORMAL (80%): 800000 -> exact match (index 2) + // PERFORMANCE (100%): 1000000 (index 3) + int raw[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, raw, 4); + + TEST_ASSERT_EQUAL(1, state.preset_indices[CPU_LEVEL_POWERSAVE]); + TEST_ASSERT_EQUAL(2, state.preset_indices[CPU_LEVEL_NORMAL]); + TEST_ASSERT_EQUAL(3, state.preset_indices[CPU_LEVEL_PERFORMANCE]); +} + +/////////////////////////////// +// reset Tests +/////////////////////////////// + +void test_reset_clears_monitoring_state(void) { + state.frame_count = 100; + state.high_util_windows = 5; + state.low_util_windows = 3; + state.panic_cooldown = 8; + + CPU_reset(&state, &config, 60.0, 0); + + TEST_ASSERT_EQUAL(0, state.frame_count); + TEST_ASSERT_EQUAL(0, state.high_util_windows); + TEST_ASSERT_EQUAL(0, state.low_util_windows); + TEST_ASSERT_EQUAL(0, state.panic_cooldown); + TEST_ASSERT_EQUAL(0, state.startup_frames); +} + +void test_reset_calculates_frame_budget_60fps(void) { + CPU_reset(&state, &config, 60.0, 0); + TEST_ASSERT_EQUAL(16666, state.frame_budget_us); // 1000000/60 +} + +void test_reset_calculates_frame_budget_50fps(void) { + CPU_reset(&state, &config, 50.0, 0); + TEST_ASSERT_EQUAL(20000, state.frame_budget_us); // 1000000/50 +} + +void test_reset_defaults_to_60fps_on_zero(void) { + CPU_reset(&state, &config, 0.0, 0); + TEST_ASSERT_EQUAL(16667, state.frame_budget_us); +} + +void test_reset_stores_initial_underruns(void) { + CPU_reset(&state, &config, 60.0, 42); + TEST_ASSERT_EQUAL(42, state.last_underrun); +} + +/////////////////////////////// +// recordFrameTime Tests +/////////////////////////////// + +void test_recordFrameTime_stores_in_ring_buffer(void) { + CPU_recordFrameTime(&state, 15000); + CPU_recordFrameTime(&state, 16000); + CPU_recordFrameTime(&state, 17000); + + TEST_ASSERT_EQUAL(15000, state.frame_times[0]); + TEST_ASSERT_EQUAL(16000, state.frame_times[1]); + TEST_ASSERT_EQUAL(17000, state.frame_times[2]); + TEST_ASSERT_EQUAL(3, state.frame_time_index); +} + +void test_recordFrameTime_wraps_at_buffer_size(void) { + // Fill buffer + for (int i = 0; i < CPU_FRAME_BUFFER_SIZE; i++) { + CPU_recordFrameTime(&state, 10000 + i); + } + // Add one more - should wrap to index 0 + CPU_recordFrameTime(&state, 99999); + + TEST_ASSERT_EQUAL(99999, state.frame_times[0]); + TEST_ASSERT_EQUAL(CPU_FRAME_BUFFER_SIZE + 1, state.frame_time_index); +} + +/////////////////////////////// +// percentile90 Tests +/////////////////////////////// + +void test_percentile90_empty_returns_zero(void) { + uint64_t result = CPU_percentile90(NULL, 0); + TEST_ASSERT_EQUAL(0, result); +} + +void test_percentile90_single_value(void) { + uint64_t times[] = {12345}; + uint64_t result = CPU_percentile90(times, 1); + TEST_ASSERT_EQUAL(12345, result); +} + +void test_percentile90_ten_values(void) { + // Values 1-10, 90th percentile index = (10 * 90) / 100 = 9, sorted[9] = 10 + uint64_t times[] = {5, 3, 8, 1, 9, 2, 7, 4, 10, 6}; + uint64_t result = CPU_percentile90(times, 10); + TEST_ASSERT_EQUAL(10, result); +} + +void test_percentile90_ignores_outliers(void) { + // 9 normal values + 1 huge outlier + // Sorted: 10,11,12,13,14,15,16,17,18,1000000 + // 90% of 10 = 9, so index 9 = 1000000 + // But we want the frame times to show typical load, not spikes + uint64_t times[] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 1000000}; + uint64_t result = CPU_percentile90(times, 10); + // Index 9 (90%) is the outlier + TEST_ASSERT_EQUAL(1000000, result); +} + +/////////////////////////////// +// predictFrequency Tests +/////////////////////////////// + +void test_predictFrequency_boost_case(void) { + // At 1000MHz with 90% util, want 70% util + // new_freq = 1000 * 90 / 70 = 1285 + int result = CPU_predictFrequency(1000000, 90, 70); + TEST_ASSERT_EQUAL(1285714, result); +} + +void test_predictFrequency_reduce_case(void) { + // At 1000MHz with 40% util, want 70% util + // new_freq = 1000 * 40 / 70 = 571 + int result = CPU_predictFrequency(1000000, 40, 70); + TEST_ASSERT_EQUAL(571428, result); +} + +void test_predictFrequency_zero_target_returns_current(void) { + int result = CPU_predictFrequency(1000000, 50, 0); + TEST_ASSERT_EQUAL(1000000, result); +} + +/////////////////////////////// +// getPresetPercentage Tests +/////////////////////////////// + +void test_getPresetPercentage_powersave(void) { + TEST_ASSERT_EQUAL(55, CPU_getPresetPercentage(CPU_LEVEL_POWERSAVE)); +} + +void test_getPresetPercentage_normal(void) { + TEST_ASSERT_EQUAL(80, CPU_getPresetPercentage(CPU_LEVEL_NORMAL)); +} + +void test_getPresetPercentage_performance(void) { + TEST_ASSERT_EQUAL(100, CPU_getPresetPercentage(CPU_LEVEL_PERFORMANCE)); +} + +/////////////////////////////// +// Unified Performance Level Tests +/////////////////////////////// + +void test_getPerformancePercent_topology_mode(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + // At state 0 of 5 (0%) + state.current_state = 0; + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); + + // At state 3 of 5 (60%) + state.current_state = 3; + TEST_ASSERT_EQUAL(60, CPU_getPerformancePercent(&state)); + + // At state 5 of 5 (100%) + state.current_state = 5; + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_granular_mode(void) { + int raw[] = {600000, 800000, 1000000, 1200000, 1400000}; + CPU_detectFrequencies(&state, &config, raw, 5); + + // At index 0 of 4 (0%) + state.current_index = 0; + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); + + // At index 2 of 4 (50%) + state.current_index = 2; + TEST_ASSERT_EQUAL(50, CPU_getPerformancePercent(&state)); + + // At index 4 of 4 (100%) + state.current_index = 4; + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_fallback_mode(void) { + state.use_topology = 0; + state.use_granular = 0; + state.scaling_disabled = 0; + + state.current_level = 0; + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); + + state.current_level = 1; + TEST_ASSERT_EQUAL(50, CPU_getPerformancePercent(&state)); + + state.current_level = 2; + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_disabled_returns_negative(void) { + state.scaling_disabled = 1; + state.use_topology = 0; + TEST_ASSERT_EQUAL(-1, CPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_null_returns_negative(void) { + TEST_ASSERT_EQUAL(-1, CPU_getPerformancePercent(NULL)); +} + +void test_getModeName_topology(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + TEST_ASSERT_EQUAL_STRING("topology", CPU_getModeName(&state)); +} + +void test_getModeName_granular(void) { + int raw[] = {600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, raw, 3); + TEST_ASSERT_EQUAL_STRING("granular", CPU_getModeName(&state)); +} + +void test_getModeName_fallback(void) { + state.use_topology = 0; + state.use_granular = 0; + state.scaling_disabled = 0; + TEST_ASSERT_EQUAL_STRING("fallback", CPU_getModeName(&state)); +} + +void test_getModeName_disabled(void) { + state.scaling_disabled = 1; + state.use_topology = 0; + TEST_ASSERT_EQUAL_STRING("disabled", CPU_getModeName(&state)); +} + +void test_getModeName_null(void) { + TEST_ASSERT_EQUAL_STRING("disabled", CPU_getModeName(NULL)); +} + +/////////////////////////////// +// update Tests - Skip Conditions +/////////////////////////////// + +void test_update_skips_during_fast_forward(void) { + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, true, false, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); +} + +void test_update_skips_during_menu(void) { + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, true, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); +} + +void test_update_skips_during_grace_period(void) { + config.startup_grace = 300; + state.startup_frames = 100; // Not yet at grace period + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(101, state.startup_frames); // Incremented +} + +void test_update_skips_when_scaling_disabled(void) { + // Simulate M17-like single-frequency device + int raw[] = {1200000}; // Only one frequency (like M17) + CPU_detectFrequencies(&state, &config, raw, 1); + + TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Pre-condition: scaling disabled + + // Even with valid state and frame times, should skip + state.startup_frames = config.startup_grace; + state.frame_count = config.window_frames - 1; + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); // High utilization + } + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); +} + +void test_update_skips_when_no_frequencies(void) { + // Edge case: no frequencies at all + CPU_detectFrequencies(&state, &config, NULL, 0); + + TEST_ASSERT_EQUAL(1, state.scaling_disabled); + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); +} + +/////////////////////////////// +// update Tests - Panic Path +/////////////////////////////// + +void test_update_panic_on_underrun_granular(void) { + // Setup: granular mode, not at max + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; // Past grace + state.target_index = 1; // At 600MHz + state.last_underrun = 0; + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted by panic_step_up=1 (1+1=2) + TEST_ASSERT_EQUAL(8, state.panic_cooldown); +} + +void test_update_panic_on_underrun_fallback(void) { + // Setup: fallback mode + state.use_granular = 0; + state.startup_frames = config.startup_grace; + state.target_level = 0; // At powersave + state.last_underrun = 0; + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(1, state.target_level); // Boosted by panic_step_up=1 (0+1=1) +} + +void test_update_no_panic_when_at_max(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; // Already at max + state.last_underrun = 0; + + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should not panic, just update underrun tracking + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(3, state.target_index); // Still at max +} + +/////////////////////////////// +// update Tests - Window Completion +/////////////////////////////// + +void test_update_waits_for_full_window(void) { + int freqs[] = {400000, 600000, 800000}; + CPU_detectFrequencies(&state, &config, freqs, 3); + state.startup_frames = config.startup_grace; + state.frame_count = 10; // Not yet at window_frames (30) + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(11, state.frame_count); // Incremented +} + +/////////////////////////////// +// update Tests - Boost/Reduce +/////////////////////////////// + +void test_update_boost_on_high_util_granular(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; // 600MHz + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; // About to trigger + + // Add frame times that result in high utilization (~90%) + state.frame_budget_us = 16667; // 60fps + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); // 90% of 16667 + } + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); + TEST_ASSERT_TRUE(state.target_index > 1); // Moved up +} + +void test_update_reduce_on_low_util_granular(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; // 1000MHz + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + state.panic_cooldown = 0; + + // Add frame times that result in low utilization (~40%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // 40% of 16667 + } + + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_TRUE(state.target_index < 3); // Moved down +} + +void test_update_no_reduce_during_cooldown(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows; // Would trigger reduce + state.panic_cooldown = 5; // But in cooldown! + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should NOT reduce due to cooldown + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(3, state.target_index); + TEST_ASSERT_EQUAL(4, state.panic_cooldown); // Decremented +} + +void test_update_boost_fallback_mode(void) { + state.use_granular = 0; + state.startup_frames = config.startup_grace; + state.target_level = 0; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(1, state.target_level); +} + +void test_update_reduce_fallback_mode(void) { + state.use_granular = 0; + state.startup_frames = config.startup_grace; + state.target_level = 2; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(1, state.target_level); +} + +void test_update_sweet_spot_resets_counters(void) { + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 2; + state.frame_count = config.window_frames - 1; + state.high_util_windows = 1; + state.low_util_windows = 1; + + // Add frame times that result in sweet spot utilization (~70%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 11667); // ~70% of 16667 + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Counters should be reset + TEST_ASSERT_EQUAL(0, state.high_util_windows); + TEST_ASSERT_EQUAL(0, state.low_util_windows); +} + +void test_update_reduce_blocked_by_low_buffer(void) { + // Setup: granular mode at high frequency, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Add low utilization frame times (would normally trigger reduce) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Pass buffer_fill below threshold (default is 40) + unsigned low_buffer = config.min_buffer_for_reduce - 1; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, low_buffer, NULL); + + // Should NOT reduce because buffer is too low + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(3, state.target_index); // Still at max frequency + + // low_util_windows should have incremented but no reduce happened + TEST_ASSERT_EQUAL(config.reduce_windows, state.low_util_windows); +} + +void test_update_reduce_allowed_with_healthy_buffer(void) { + // Same setup as above + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Pass buffer_fill at threshold (default is 40) + unsigned healthy_buffer = config.min_buffer_for_reduce; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, healthy_buffer, NULL); + + // Should reduce because buffer is healthy + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Reduced from 3 to 2 +} + +void test_update_reduce_no_grace_period(void) { + // Setup: ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Healthy buffer so reduce should happen + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + + // Verify NO grace period was set (unlike boost which sets grace) + TEST_ASSERT_EQUAL(0, state.panic_grace); +} + +/////////////////////////////// +// Topology Tests +/////////////////////////////// + +void test_initTopology_zeros_topology(void) { + CPUTopology t; + memset(&t, 0xFF, sizeof(t)); // Fill with garbage + CPU_initTopology(&t); + + TEST_ASSERT_EQUAL(0, t.cluster_count); + TEST_ASSERT_EQUAL(0, t.state_count); + TEST_ASSERT_EQUAL(0, t.topology_detected); +} + +void test_parseCPUList_single_cpu(void) { + int count = 0; + int mask = CPU_parseCPUList("0", &count); + TEST_ASSERT_EQUAL(1, count); + TEST_ASSERT_EQUAL(0x1, mask); // CPU 0 +} + +void test_parseCPUList_range(void) { + int count = 0; + int mask = CPU_parseCPUList("0-3", &count); + TEST_ASSERT_EQUAL(4, count); + TEST_ASSERT_EQUAL(0xF, mask); // CPUs 0-3 +} + +void test_parseCPUList_mixed(void) { + int count = 0; + int mask = CPU_parseCPUList("0-3,7", &count); + TEST_ASSERT_EQUAL(5, count); + TEST_ASSERT_EQUAL(0x8F, mask); // CPUs 0-3 and 7 +} + +void test_parseCPUList_single_high_cpu(void) { + int count = 0; + int mask = CPU_parseCPUList("7", &count); + TEST_ASSERT_EQUAL(1, count); + TEST_ASSERT_EQUAL(0x80, mask); // CPU 7 +} + +void test_parseCPUList_empty_string(void) { + int count = 0; + int mask = CPU_parseCPUList("", &count); + TEST_ASSERT_EQUAL(0, count); + TEST_ASSERT_EQUAL(0, mask); +} + +void test_classifyClusters_single_is_little(void) { + CPUCluster clusters[1]; + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + + CPU_classifyClusters(clusters, 1); + + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); +} + +void test_classifyClusters_dual_little_big(void) { + CPUCluster clusters[2]; + // Sorted by max_khz ascending + // Use frequencies with <10% gap to get BIG (not PRIME) classification + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 1900000; // ~5.5% higher, should be BIG + clusters[1].cpu_count = 4; + + CPU_classifyClusters(clusters, 2); + + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_BIG, clusters[1].type); +} + +void test_classifyClusters_tri_little_big_prime(void) { + CPUCluster clusters[3]; + // SD865-like: Silver, Gold, Prime + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 2420000; + clusters[1].cpu_count = 3; + clusters[2].max_khz = 2840000; + clusters[2].cpu_count = 1; // Prime is single-core + + CPU_classifyClusters(clusters, 3); + + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_BIG, clusters[1].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_PRIME, clusters[2].type); +} + +void test_classifyClusters_dual_prime_by_frequency_gap(void) { + CPUCluster clusters[2]; + // >10% frequency gap makes highest PRIME even with multiple cores + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 2200000; // >10% higher + clusters[1].cpu_count = 4; + + CPU_classifyClusters(clusters, 2); + + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_PRIME, clusters[1].type); +} + +void test_pickRepresentativeFreqs_single_freq(void) { + CPUCluster c; + c.frequencies[0] = 1800000; + c.freq_count = 1; + + int low, mid, high; + CPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + + TEST_ASSERT_EQUAL(1800000, low); + TEST_ASSERT_EQUAL(1800000, mid); + TEST_ASSERT_EQUAL(1800000, high); +} + +void test_pickRepresentativeFreqs_multiple_freqs(void) { + CPUCluster c; + c.frequencies[0] = 400000; + c.frequencies[1] = 800000; + c.frequencies[2] = 1200000; + c.frequencies[3] = 1600000; + c.frequencies[4] = 2000000; + c.freq_count = 5; + + int low, mid, high; + CPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + + TEST_ASSERT_EQUAL(400000, low); + TEST_ASSERT_EQUAL(1200000, mid); // freqs[5/2] = freqs[2] + TEST_ASSERT_EQUAL(2000000, high); +} + +// Helper to set up a dual-cluster topology +static void setup_dual_cluster_topology(CPUState* s) { + s->topology.cluster_count = 2; + s->topology.topology_detected = 1; // Mark as detected so buildPerfStates works + + // LITTLE cluster (policy 0, CPUs 0-3) + s->topology.clusters[0].policy_id = 0; + s->topology.clusters[0].cpu_mask = 0x0F; + s->topology.clusters[0].cpu_count = 4; + s->topology.clusters[0].frequencies[0] = 600000; + s->topology.clusters[0].frequencies[1] = 1200000; + s->topology.clusters[0].frequencies[2] = 1800000; + s->topology.clusters[0].freq_count = 3; + s->topology.clusters[0].min_khz = 600000; + s->topology.clusters[0].max_khz = 1800000; + s->topology.clusters[0].type = CPU_CLUSTER_LITTLE; + + // BIG cluster (policy 4, CPUs 4-7) + s->topology.clusters[1].policy_id = 4; + s->topology.clusters[1].cpu_mask = 0xF0; + s->topology.clusters[1].cpu_count = 4; + s->topology.clusters[1].frequencies[0] = 800000; + s->topology.clusters[1].frequencies[1] = 1600000; + s->topology.clusters[1].frequencies[2] = 2400000; + s->topology.clusters[1].freq_count = 3; + s->topology.clusters[1].min_khz = 800000; + s->topology.clusters[1].max_khz = 2400000; + s->topology.clusters[1].type = CPU_CLUSTER_BIG; +} + +void test_buildPerfStates_dual_cluster_creates_six_states(void) { + setup_dual_cluster_topology(&state); + + CPU_buildPerfStates(&state, &config); + + TEST_ASSERT_EQUAL(6, state.topology.state_count); + TEST_ASSERT_EQUAL(1, state.use_topology); +} + +void test_buildPerfStates_dual_cluster_state_progression(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + // State 0: LITTLE powersave, BIG powersave, affinity = LITTLE + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[1]); + TEST_ASSERT_EQUAL(0, state.topology.states[0].active_cluster_idx); + TEST_ASSERT_EQUAL(0x0F, state.topology.states[0].cpu_affinity_mask); // LITTLE CPUs + + // State 1: LITTLE schedutil, BIG powersave + TEST_ASSERT_EQUAL(CPU_GOV_SCHEDUTIL, state.topology.states[1].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[1].cluster_governor[1]); + + // State 2: LITTLE performance, BIG powersave + TEST_ASSERT_EQUAL(CPU_GOV_PERFORMANCE, state.topology.states[2].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[2].cluster_governor[1]); + + // State 3: BIG powersave, LITTLE powersave, affinity = BIG + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[1]); + TEST_ASSERT_EQUAL(1, state.topology.states[3].active_cluster_idx); + TEST_ASSERT_EQUAL(0xF0, state.topology.states[3].cpu_affinity_mask); // BIG CPUs + + // State 5: BIG performance (highest state) + TEST_ASSERT_EQUAL(CPU_GOV_PERFORMANCE, state.topology.states[5].cluster_governor[1]); +} + +void test_buildPerfStates_single_cluster_skips_topology(void) { + state.topology.cluster_count = 1; + + CPU_buildPerfStates(&state, &config); + + TEST_ASSERT_EQUAL(0, state.topology.state_count); + TEST_ASSERT_EQUAL(0, state.use_topology); +} + +void test_applyPerfState_calls_governors(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.target_state = 0; + state.current_state = -1; + + int result = CPU_applyPerfState(&state); + + TEST_ASSERT_EQUAL(0, result); + // Should call governor for each cluster (2 clusters = 2 calls) + TEST_ASSERT_EQUAL(2, stub_governor_calls); +} + +void test_applyPerfState_does_not_set_affinity_directly(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.target_state = 0; + state.current_state = -1; + state.pending_affinity = 0; + + CPU_applyPerfState(&state); + + // applyPerfState should NOT set pending_affinity or call PWR_setThreadAffinity + // The caller is responsible for setting pending_affinity under mutex + TEST_ASSERT_EQUAL(0, state.pending_affinity); + TEST_ASSERT_EQUAL(0, stub_affinity_calls); +} + +void test_applyPerfState_updates_current_state(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.target_state = 3; + state.current_state = -1; + + CPU_applyPerfState(&state); + + TEST_ASSERT_EQUAL(3, state.current_state); +} + +void test_update_topology_boost_increments_state(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 2; + state.current_state = 2; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization frames (>85%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); // ~90% + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(3, state.target_state); +} + +void test_update_topology_reduce_decrements_state(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // ~40% + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_LESS_THAN(4, state.target_state); +} + +void test_update_topology_reduce_blocked_by_low_buffer(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // ~40% + } + + // Buffer below threshold + unsigned low_buffer = config.min_buffer_for_reduce - 1; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, low_buffer, NULL); + + // Should NOT reduce because buffer is too low + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(4, state.target_state); // Still at original state +} + +void test_update_topology_reduce_allowed_with_healthy_buffer(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // ~40% + } + + // Buffer at threshold + unsigned healthy_buffer = config.min_buffer_for_reduce; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, healthy_buffer, NULL); + + // Should reduce because buffer is healthy + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_LESS_THAN(4, state.target_state); +} + +void test_update_topology_panic_jumps_states(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 1; + state.current_state = 1; + state.last_underrun = 0; + + // Underrun detected + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_GREATER_THAN(1, state.target_state); +} + +void test_update_topology_no_boost_at_max_state(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 5; // Already at max + state.current_state = 5; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization frames + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(5, state.target_state); +} + +void test_update_topology_no_reduce_at_min_state(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 0; // Already at min + state.current_state = 0; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); + } + + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(0, state.target_state); +} + +/////////////////////////////// +// Grace Period Tests +/////////////////////////////// + +void test_panic_grace_ignores_underruns(void) { + // Setup: granular mode with grace period active + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period active + + // Underrun occurs during grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should NOT panic - grace period protects + TEST_ASSERT_NOT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(1, state.target_index); // Unchanged +} + +void test_panic_grace_allows_panic_when_expired(void) { + // Setup: granular mode with grace period expired + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; // Grace period expired + + // Underrun occurs after grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should panic normally + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted by 1 +} + +void test_panic_sets_grace_period(void) { + // Setup: granular mode + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + + // Trigger panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace period should be set + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_panic_resets_stability_streak(void) { + // Setup: granular mode with stability streak + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + state.stability_streak = 5; // Had some stability + + // Trigger panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Stability streak should be reset + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +void test_panic_grace_decrements_each_update(void) { + // Setup: granular mode with grace period + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.panic_grace = 10; + + // Call update (no underrun, not completing a window) + state.frame_count = 0; + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Grace should decrement + TEST_ASSERT_EQUAL(9, state.panic_grace); +} + +void test_panic_grace_topology_mode(void) { + // Setup: topology mode with grace period active + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + state.startup_frames = config.startup_grace; + state.target_state = 1; + state.current_state = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period active + + // Underrun occurs during grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should NOT panic + TEST_ASSERT_NOT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(1, state.target_state); // Unchanged +} + +void test_grace_underruns_tracked_during_grace(void) { + // Setup: granular mode with grace period active + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; + state.grace_underruns = 0; + + // Underrun occurs during grace period + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace underruns should be tracked + TEST_ASSERT_EQUAL(1, state.grace_underruns); +} + +void test_grace_period_override_on_max_underruns(void) { + // Setup: granular mode with grace period active but near max underruns + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period still active + state.grace_underruns = CPU_PANIC_GRACE_MAX_UNDERRUNS - 1; // One more triggers override + + // Underrun occurs - should exceed max and trigger panic despite grace + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should PANIC despite grace period (catastrophic failure override) + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted +} + +void test_grace_underruns_reset_on_panic(void) { + // Setup: granular mode, trigger a panic + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; // No grace period + state.grace_underruns = 3; // Some accumulated + + // Underrun occurs - triggers panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace underruns should be reset (along with grace period being set) + TEST_ASSERT_EQUAL(0, state.grace_underruns); + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_stability_decay_does_not_affect_lower_frequencies(void) { + // Setup: stable at 800MHz (index 2), 400MHz (index 0) is blocked + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 2; // At 800MHz + + // Block 400MHz + state.panic_count[0] = CPU_PANIC_THRESHOLD; + state.stability_streak = CPU_STABILITY_DECAY_WINDOWS - 1; + state.frame_count = config.window_frames - 1; + + // Complete a stable window + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); + } + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // 400MHz should still be blocked (below current, not decayed) + TEST_ASSERT_EQUAL(CPU_PANIC_THRESHOLD, state.panic_count[0]); +} + +/////////////////////////////// +// Stability Decay Tests +/////////////////////////////// + +void test_stability_streak_increments_on_stable_window(void) { + // Setup: granular mode, complete a window without panic + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.frame_count = config.window_frames - 1; + state.stability_streak = 0; + + // Add frame times for a complete window (low util, sweet spot) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); // ~60% - in sweet spot + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Stability streak should increment + TEST_ASSERT_EQUAL(1, state.stability_streak); +} + +void test_stability_decay_after_threshold_windows(void) { + // Setup: granular mode with panic counts at index 1 (600MHz) + // Stability at 600MHz should decay 600/800/1000 but NOT 400MHz + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz + state.frame_count = config.window_frames - 1; + state.stability_streak = CPU_STABILITY_DECAY_WINDOWS - 1; // One more for decay + + // Set panic counts: below, at, and above current index + state.panic_count[0] = 2; // Below current - should NOT decay + state.panic_count[1] = 2; // At current - should decay + state.panic_count[2] = 1; // Above current - should decay + state.panic_count[3] = 0; // Above current - stays 0 + + // Add frame times for stable window + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); // ~60% + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Only current index and above should decay + TEST_ASSERT_EQUAL(2, state.panic_count[0]); // Below - unchanged + TEST_ASSERT_EQUAL(1, state.panic_count[1]); // At current: 2 -> 1 + TEST_ASSERT_EQUAL(0, state.panic_count[2]); // Above: 1 -> 0 + TEST_ASSERT_EQUAL(0, state.panic_count[3]); // Above: stays 0 + // Stability streak should reset after decay + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +void test_stability_decay_unblocks_frequency(void) { + // Setup: frequency 1 (600MHz) is blocked, we're stable at that frequency + // Only being stable AT a frequency can unblock it (not being stable above it) + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz - same as blocked frequency + + // Block frequency 1 (panic_count at threshold) + state.panic_count[1] = CPU_PANIC_THRESHOLD; + + // Run enough stable windows to decay + state.frame_budget_us = 16667; + for (int w = 0; w < CPU_PANIC_THRESHOLD; w++) { + // Each iteration: reach decay threshold, then check + for (int s = 0; s < CPU_STABILITY_DECAY_WINDOWS; s++) { + state.frame_count = config.window_frames - 1; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); + } + CPU_update(&state, &config, false, false, 0, 100, NULL); + } + } + + // After enough decays, frequency should be unblocked + TEST_ASSERT_LESS_THAN(CPU_PANIC_THRESHOLD, state.panic_count[1]); +} + +void test_no_stability_increment_during_panic(void) { + // Setup: a panic happens this frame + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + state.stability_streak = 3; + + // Panic happens + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Stability streak should be reset, not incremented + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +/////////////////////////////// +// Step-by-1 Tests +/////////////////////////////// + +void test_panic_step_default_is_one(void) { + CPUConfig cfg; + CPU_initConfig(&cfg); + + TEST_ASSERT_EQUAL(1, cfg.panic_step_up); +} + +void test_granular_boost_steps_by_one(void) { + // Setup: granular mode, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization (would predict big jump with old algorithm) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 16000); // ~96% - would have jumped more before + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 + TEST_ASSERT_EQUAL(2, state.target_index); // 1 -> 2, not 1 -> 4 +} + +void test_granular_boost_extreme_util_still_steps_by_one(void) { + // Setup: granular mode at lowest freq, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 0; // At 400MHz (lowest) + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // Extreme utilization - 200% would predict 400*200/70 = 1142MHz (index 4) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 33334); // 200% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 even with extreme utilization + TEST_ASSERT_EQUAL(1, state.target_index); // 0 -> 1, NOT 0 -> 4 +} + +void test_granular_boost_sets_grace_period(void) { + // Setup: granular mode, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + state.panic_grace = 0; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); // 90% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Boost should set grace period + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_granular_reduce_no_grace_period(void) { + // Setup: granular mode, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + state.panic_grace = 0; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 5000); // 30% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Reduce should NOT set grace period (if we underrun, frequency is too slow) + TEST_ASSERT_EQUAL(0, state.panic_grace); +} + +void test_granular_reduce_steps_by_one(void) { + // Setup: granular mode, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 4; // At 1200MHz + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization (would predict big drop with old algorithm) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 3333); // ~20% - would have dropped more before + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 + TEST_ASSERT_EQUAL(3, state.target_index); // 4 -> 3, not 4 -> 0 +} + +/////////////////////////////// +// Test Runner +/////////////////////////////// + +int main(void) { + UNITY_BEGIN(); + + // Config/State init + RUN_TEST(test_initConfig_sets_defaults); + RUN_TEST(test_initState_zeros_state); + + // findNearestIndex + RUN_TEST(test_findNearestIndex_empty_array); + RUN_TEST(test_findNearestIndex_exact_match); + RUN_TEST(test_findNearestIndex_nearest_lower); + RUN_TEST(test_findNearestIndex_nearest_higher); + RUN_TEST(test_findNearestIndex_below_min); + RUN_TEST(test_findNearestIndex_above_max); + + // detectFrequencies + RUN_TEST(test_detectFrequencies_filters_below_minimum); + RUN_TEST(test_detectFrequencies_enables_granular_mode); + RUN_TEST(test_detectFrequencies_disables_scaling_with_one_freq); + RUN_TEST(test_detectFrequencies_disables_scaling_with_zero_freqs); + RUN_TEST(test_detectFrequencies_enables_scaling_with_multiple_freqs); + RUN_TEST(test_detectFrequencies_calculates_preset_indices); + + // reset + RUN_TEST(test_reset_clears_monitoring_state); + RUN_TEST(test_reset_calculates_frame_budget_60fps); + RUN_TEST(test_reset_calculates_frame_budget_50fps); + RUN_TEST(test_reset_defaults_to_60fps_on_zero); + RUN_TEST(test_reset_stores_initial_underruns); + + // recordFrameTime + RUN_TEST(test_recordFrameTime_stores_in_ring_buffer); + RUN_TEST(test_recordFrameTime_wraps_at_buffer_size); + + // percentile90 + RUN_TEST(test_percentile90_empty_returns_zero); + RUN_TEST(test_percentile90_single_value); + RUN_TEST(test_percentile90_ten_values); + RUN_TEST(test_percentile90_ignores_outliers); + + // predictFrequency + RUN_TEST(test_predictFrequency_boost_case); + RUN_TEST(test_predictFrequency_reduce_case); + RUN_TEST(test_predictFrequency_zero_target_returns_current); + + // getPresetPercentage + RUN_TEST(test_getPresetPercentage_powersave); + RUN_TEST(test_getPresetPercentage_normal); + RUN_TEST(test_getPresetPercentage_performance); + + // getPerformancePercent (unified) + RUN_TEST(test_getPerformancePercent_topology_mode); + RUN_TEST(test_getPerformancePercent_granular_mode); + RUN_TEST(test_getPerformancePercent_fallback_mode); + RUN_TEST(test_getPerformancePercent_disabled_returns_negative); + RUN_TEST(test_getPerformancePercent_null_returns_negative); + + // getModeName (unified) + RUN_TEST(test_getModeName_topology); + RUN_TEST(test_getModeName_granular); + RUN_TEST(test_getModeName_fallback); + RUN_TEST(test_getModeName_disabled); + RUN_TEST(test_getModeName_null); + + // update - skip conditions + RUN_TEST(test_update_skips_during_fast_forward); + RUN_TEST(test_update_skips_during_menu); + RUN_TEST(test_update_skips_during_grace_period); + RUN_TEST(test_update_skips_when_scaling_disabled); + RUN_TEST(test_update_skips_when_no_frequencies); + + // update - panic + RUN_TEST(test_update_panic_on_underrun_granular); + RUN_TEST(test_update_panic_on_underrun_fallback); + RUN_TEST(test_update_no_panic_when_at_max); + + // update - window + RUN_TEST(test_update_waits_for_full_window); + + // update - boost/reduce + RUN_TEST(test_update_boost_on_high_util_granular); + RUN_TEST(test_update_reduce_on_low_util_granular); + RUN_TEST(test_update_no_reduce_during_cooldown); + RUN_TEST(test_update_boost_fallback_mode); + RUN_TEST(test_update_reduce_fallback_mode); + RUN_TEST(test_update_sweet_spot_resets_counters); + RUN_TEST(test_update_reduce_blocked_by_low_buffer); + RUN_TEST(test_update_reduce_allowed_with_healthy_buffer); + RUN_TEST(test_update_reduce_no_grace_period); + + // Topology - initialization + RUN_TEST(test_initTopology_zeros_topology); + + // Topology - CPU list parsing + RUN_TEST(test_parseCPUList_single_cpu); + RUN_TEST(test_parseCPUList_range); + RUN_TEST(test_parseCPUList_mixed); + RUN_TEST(test_parseCPUList_single_high_cpu); + RUN_TEST(test_parseCPUList_empty_string); + + // Topology - cluster classification + RUN_TEST(test_classifyClusters_single_is_little); + RUN_TEST(test_classifyClusters_dual_little_big); + RUN_TEST(test_classifyClusters_tri_little_big_prime); + RUN_TEST(test_classifyClusters_dual_prime_by_frequency_gap); + + // Topology - representative frequencies + RUN_TEST(test_pickRepresentativeFreqs_single_freq); + RUN_TEST(test_pickRepresentativeFreqs_multiple_freqs); + + // Topology - PerfState building + RUN_TEST(test_buildPerfStates_dual_cluster_creates_six_states); + RUN_TEST(test_buildPerfStates_dual_cluster_state_progression); + RUN_TEST(test_buildPerfStates_single_cluster_skips_topology); + + // Topology - PerfState application + RUN_TEST(test_applyPerfState_calls_governors); + RUN_TEST(test_applyPerfState_does_not_set_affinity_directly); + RUN_TEST(test_applyPerfState_updates_current_state); + + // Topology - update decisions + RUN_TEST(test_update_topology_boost_increments_state); + RUN_TEST(test_update_topology_reduce_decrements_state); + RUN_TEST(test_update_topology_reduce_blocked_by_low_buffer); + RUN_TEST(test_update_topology_reduce_allowed_with_healthy_buffer); + RUN_TEST(test_update_topology_panic_jumps_states); + RUN_TEST(test_update_topology_no_boost_at_max_state); + RUN_TEST(test_update_topology_no_reduce_at_min_state); + + // Grace period + RUN_TEST(test_panic_grace_ignores_underruns); + RUN_TEST(test_panic_grace_allows_panic_when_expired); + RUN_TEST(test_panic_sets_grace_period); + RUN_TEST(test_panic_resets_stability_streak); + RUN_TEST(test_panic_grace_decrements_each_update); + RUN_TEST(test_panic_grace_topology_mode); + RUN_TEST(test_grace_underruns_tracked_during_grace); + RUN_TEST(test_grace_period_override_on_max_underruns); + RUN_TEST(test_grace_underruns_reset_on_panic); + + // Stability decay + RUN_TEST(test_stability_streak_increments_on_stable_window); + RUN_TEST(test_stability_decay_after_threshold_windows); + RUN_TEST(test_stability_decay_unblocks_frequency); + RUN_TEST(test_no_stability_increment_during_panic); + RUN_TEST(test_stability_decay_does_not_affect_lower_frequencies); + + // Step-by-1 behavior + RUN_TEST(test_panic_step_default_is_one); + RUN_TEST(test_granular_boost_steps_by_one); + RUN_TEST(test_granular_boost_extreme_util_still_steps_by_one); + RUN_TEST(test_granular_boost_sets_grace_period); + RUN_TEST(test_granular_reduce_no_grace_period); + RUN_TEST(test_granular_reduce_steps_by_one); + + return UNITY_END(); +} diff --git a/tests/unit/all/player/test_frame_pacer.c b/tests/unit/all/player/test_frame_pacer.c deleted file mode 100644 index 7fbb5484..00000000 --- a/tests/unit/all/player/test_frame_pacer.c +++ /dev/null @@ -1,385 +0,0 @@ -/** - * test_frame_pacer.c - Unit tests for frame pacing - * - * Tests the Bresenham-style frame pacing algorithm including: - * - Initialization with Q16.16 fixed-point - * - Direct mode detection - * - Paced mode accumulator behavior - * - First frame always steps - * - Long-run stability (no drift) - * - Reset functionality - */ - -#include "unity.h" -#include "frame_pacer.h" - -// Stub for PLAT_getDisplayHz - not tested here, just needed for linking -double PLAT_getDisplayHz(void) { - return 60.0; -} - -// Q16.16 scale factor for test assertions -#define Q16_SCALE 65536 - -// Test state -static FramePacer pacer; - -/////////////////////////////// -// Test Setup/Teardown -/////////////////////////////// - -void setUp(void) { - // Fresh pacer for each test - FramePacer_init(&pacer, 60.0, 60.0); -} - -void tearDown(void) { - // Nothing to clean up -} - -/////////////////////////////// -// Initialization Tests -/////////////////////////////// - -void test_init_60fps_60hz_direct_mode(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_TRUE(pacer.direct_mode); - // Accumulator initialized to display_hz for first-frame-steps - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.accumulator); -} - -void test_init_5994fps_60hz_direct_mode(void) { - // NTSC 59.94fps @ 60Hz = 0.1% diff → direct mode (within 2% tolerance) - FramePacer_init(&pacer, 59.94, 60.0); - - TEST_ASSERT_TRUE(pacer.direct_mode); -} - -void test_init_60fps_61hz_direct_mode(void) { - // 60fps @ 61Hz = 1.6% diff → direct mode (within 2% tolerance) - // This is the kind of hardware variance audio rate control can handle - FramePacer_init(&pacer, 60.0, 61.0); - - TEST_ASSERT_TRUE(pacer.direct_mode); -} - -void test_init_60fps_63hz_paced_mode(void) { - // 60fps @ 63Hz = 4.8% diff → paced mode (outside 2% tolerance) - FramePacer_init(&pacer, 60.0, 63.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_60fps_72hz_paced_mode(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_FALSE(pacer.direct_mode); - // Accumulator initialized to display_hz for first-frame-steps - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.accumulator); -} - -void test_init_50fps_60hz_paced_mode(void) { - // PAL games on NTSC display - FramePacer_init(&pacer, 50.0, 60.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_30fps_60hz_paced_mode(void) { - // Half-speed games - FramePacer_init(&pacer, 30.0, 60.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_preserves_fractional_fps(void) { - // 59.73fps (SNES) should preserve precision - FramePacer_init(&pacer, 59.73, 60.0); - - // 59.73 * 65536 = 3,913,359.28 -> 3,913,359 - int32_t expected = (int32_t)(59.73 * Q16_SCALE); - TEST_ASSERT_EQUAL_INT32(expected, pacer.game_fps_q16); -} - -/////////////////////////////// -// Direct Mode Tests -/////////////////////////////// - -void test_direct_mode_always_steps(void) { - FramePacer_init(&pacer, 60.0, 60.0); - TEST_ASSERT_TRUE(pacer.direct_mode); - - // Every call should return true - for (int i = 0; i < 100; i++) { - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); - } -} - -void test_isDirectMode_returns_correct_value(void) { - FramePacer_init(&pacer, 60.0, 60.0); - TEST_ASSERT_TRUE(FramePacer_isDirectMode(&pacer)); - - FramePacer_init(&pacer, 60.0, 72.0); - TEST_ASSERT_FALSE(FramePacer_isDirectMode(&pacer)); -} - -/////////////////////////////// -// Paced Mode Tests (60fps @ 72Hz) -/////////////////////////////// - -void test_60fps_72hz_first_vsync_steps(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // First vsync: acc = 72, >= 72 -> step (first frame always steps) - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); -} - -void test_60fps_72hz_second_vsync_repeats(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // First vsync: step - FramePacer_step(&pacer); - - // Second vsync: acc = 60, < 72 -> repeat - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); -} - -void test_60fps_72hz_pattern_6_vsyncs(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // With acc starting at 72 (display_hz), pattern is: - // Vsync 1: acc=72, >=72 -> step, acc = 72 - 72 + 60 = 60 - // Vsync 2: acc=60, <72 -> repeat, acc = 60 + 60 = 120 - // Vsync 3: acc=120, >=72 -> step, acc = 120 - 72 + 60 = 108 - // Vsync 4: acc=108, >=72 -> step, acc = 108 - 72 + 60 = 96 - // Vsync 5: acc=96, >=72 -> step, acc = 96 - 72 + 60 = 84 - // Vsync 6: acc=84, >=72 -> step, acc = 84 - 72 + 60 = 72 - // Result: 5 steps, 1 repeat in 6 vsyncs = 83.3% = 60/72 - - bool results[6]; - for (int i = 0; i < 6; i++) { - results[i] = FramePacer_step(&pacer); - } - - TEST_ASSERT_TRUE(results[0]); // step - TEST_ASSERT_FALSE(results[1]); // repeat - TEST_ASSERT_TRUE(results[2]); // step - TEST_ASSERT_TRUE(results[3]); // step - TEST_ASSERT_TRUE(results[4]); // step - TEST_ASSERT_TRUE(results[5]); // step - - // Count: 5 steps, 1 repeat - int steps = 0; - for (int i = 0; i < 6; i++) { - if (results[i]) steps++; - } - TEST_ASSERT_EQUAL(5, steps); -} - -/////////////////////////////// -// Paced Mode Tests (50fps @ 60Hz - PAL) -/////////////////////////////// - -void test_50fps_60hz_pattern_6_vsyncs(void) { - FramePacer_init(&pacer, 50.0, 60.0); - - // 50fps @ 60Hz = step 50/60 = 83.3% of vsyncs - // Pattern with acc starting at 60: - // Vsync 1: acc=60, >=60 -> step, acc = 60 - 60 + 50 = 50 - // Vsync 2: acc=50, <60 -> repeat, acc = 50 + 50 = 100 - // Vsync 3: acc=100, >=60 -> step, acc = 100 - 60 + 50 = 90 - // Vsync 4: acc=90, >=60 -> step, acc = 90 - 60 + 50 = 80 - // Vsync 5: acc=80, >=60 -> step, acc = 80 - 60 + 50 = 70 - // Vsync 6: acc=70, >=60 -> step, acc = 70 - 60 + 50 = 60 - - bool results[6]; - for (int i = 0; i < 6; i++) { - results[i] = FramePacer_step(&pacer); - } - - TEST_ASSERT_TRUE(results[0]); // step - TEST_ASSERT_FALSE(results[1]); // repeat - TEST_ASSERT_TRUE(results[2]); // step - TEST_ASSERT_TRUE(results[3]); // step - TEST_ASSERT_TRUE(results[4]); // step - TEST_ASSERT_TRUE(results[5]); // step -} - -/////////////////////////////// -// Paced Mode Tests (30fps @ 60Hz) -/////////////////////////////// - -void test_30fps_60hz_alternates(void) { - FramePacer_init(&pacer, 30.0, 60.0); - - // 30fps @ 60Hz = step every other frame - // Vsync 1: acc=60, >=60 -> step, acc = 60 - 60 + 30 = 30 - // Vsync 2: acc=30, <60 -> repeat, acc = 30 + 30 = 60 - // Vsync 3: acc=60, >=60 -> step, acc = 60 - 60 + 30 = 30 - // Vsync 4: acc=30, <60 -> repeat, acc = 30 + 30 = 60 - - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); // step - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); // repeat - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); // step - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); // repeat -} - -/////////////////////////////// -// Long-Run Stability Tests -/////////////////////////////// - -void test_60fps_72hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - int steps = 0; - int total_vsyncs = 7200; // 100 seconds at 72Hz - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 60/72 * 7200 = 6000 steps exactly - TEST_ASSERT_EQUAL(6000, steps); -} - -void test_50fps_60hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 50.0, 60.0); - - int steps = 0; - int total_vsyncs = 6000; // 100 seconds at 60Hz - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 50/60 * 6000 = 5000 steps exactly - TEST_ASSERT_EQUAL(5000, steps); -} - -void test_30fps_60hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 30.0, 60.0); - - int steps = 0; - int total_vsyncs = 6000; - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 30/60 * 6000 = 3000 steps exactly - TEST_ASSERT_EQUAL(3000, steps); -} - -void test_accumulator_stays_bounded(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Run for many iterations and verify accumulator never exceeds 2x display_hz - // (theoretical max is display_hz + game_fps - 1) - int32_t max_expected = pacer.display_hz_q16 + pacer.game_fps_q16; - for (int i = 0; i < 10000; i++) { - FramePacer_step(&pacer); - TEST_ASSERT_LESS_THAN(max_expected, pacer.accumulator); - } -} - -/////////////////////////////// -// Reset Tests -/////////////////////////////// - -void test_reset_to_display_hz(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Build up some accumulator - FramePacer_step(&pacer); - FramePacer_step(&pacer); - - // Reset - FramePacer_reset(&pacer); - - // Should be back to display_hz - TEST_ASSERT_EQUAL_INT32(pacer.display_hz_q16, pacer.accumulator); -} - -void test_reset_ensures_next_step(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Drain accumulator - for (int i = 0; i < 10; i++) { - FramePacer_step(&pacer); - } - - // Reset - FramePacer_reset(&pacer); - - // Next call should step (accumulator = display_hz) - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); -} - -void test_reset_preserves_settings(void) { - FramePacer_init(&pacer, 60.0, 72.0); - FramePacer_step(&pacer); - - FramePacer_reset(&pacer); - - // Settings should be preserved - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -/////////////////////////////// -// Test Runner -/////////////////////////////// - -int main(void) { - UNITY_BEGIN(); - - // Initialization and tolerance tests - RUN_TEST(test_init_60fps_60hz_direct_mode); - RUN_TEST(test_init_5994fps_60hz_direct_mode); - RUN_TEST(test_init_60fps_61hz_direct_mode); // within 2% tolerance - RUN_TEST(test_init_60fps_63hz_paced_mode); // outside 2% tolerance - RUN_TEST(test_init_60fps_72hz_paced_mode); - RUN_TEST(test_init_50fps_60hz_paced_mode); - RUN_TEST(test_init_30fps_60hz_paced_mode); - RUN_TEST(test_init_preserves_fractional_fps); - - // Direct mode - RUN_TEST(test_direct_mode_always_steps); - RUN_TEST(test_isDirectMode_returns_correct_value); - - // 60fps @ 72Hz - RUN_TEST(test_60fps_72hz_first_vsync_steps); - RUN_TEST(test_60fps_72hz_second_vsync_repeats); - RUN_TEST(test_60fps_72hz_pattern_6_vsyncs); - - // 50fps @ 60Hz (PAL) - RUN_TEST(test_50fps_60hz_pattern_6_vsyncs); - - // 30fps @ 60Hz - RUN_TEST(test_30fps_60hz_alternates); - - // Long-run stability - RUN_TEST(test_60fps_72hz_long_run_correct_ratio); - RUN_TEST(test_50fps_60hz_long_run_correct_ratio); - RUN_TEST(test_30fps_60hz_long_run_correct_ratio); - RUN_TEST(test_accumulator_stays_bounded); - - // Reset - RUN_TEST(test_reset_to_display_hz); - RUN_TEST(test_reset_ensures_next_step); - RUN_TEST(test_reset_preserves_settings); - - return UNITY_END(); -} diff --git a/tests/unit/all/player/test_player_cpu.c b/tests/unit/all/player/test_player_cpu.c deleted file mode 100644 index 6f9160aa..00000000 --- a/tests/unit/all/player/test_player_cpu.c +++ /dev/null @@ -1,651 +0,0 @@ -/** - * test_player_cpu.c - Unit tests for auto CPU scaling - * - * Tests the CPU frequency scaling algorithm including: - * - Frequency detection and preset calculation - * - Nearest frequency index lookup - * - Utilization-based scaling decisions - * - Panic path (underrun handling) - * - Granular vs fallback modes - * - Frame timing percentile calculation - * - Single-frequency/scaling-disabled scenarios (M17-like devices) - * - * 46 tests organized by functionality. - */ - -#include "unity.h" -#include "player_cpu.h" - -#include - -// Test state and config -static PlayerCPUState state; -static PlayerCPUConfig config; - -/////////////////////////////// -// Test Setup/Teardown -/////////////////////////////// - -void setUp(void) { - PlayerCPU_initState(&state); - PlayerCPU_initConfig(&config); -} - -void tearDown(void) { - // Nothing to clean up -} - -/////////////////////////////// -// Config Initialization Tests -/////////////////////////////// - -void test_initConfig_sets_defaults(void) { - PlayerCPUConfig c; - PlayerCPU_initConfig(&c); - - // Verify values are sensible (not testing exact defaults) - TEST_ASSERT_GREATER_THAN(0, c.window_frames); - TEST_ASSERT_GREATER_THAN(c.util_low, c.util_high); - TEST_ASSERT_GREATER_THAN(0, c.util_low); - TEST_ASSERT_LESS_THAN(100, c.util_high); - TEST_ASSERT_GREATER_THAN(0, c.boost_windows); - TEST_ASSERT_GREATER_THAN(0, c.reduce_windows); - TEST_ASSERT_GREATER_THAN(0, c.startup_grace); - TEST_ASSERT_GREATER_OR_EQUAL(0, c.min_freq_khz); // Can be 0 (panic failsafe handles low freqs) - TEST_ASSERT_GREATER_THAN(0, c.target_util); - TEST_ASSERT_LESS_OR_EQUAL(100, c.target_util); - TEST_ASSERT_GREATER_THAN(0, c.max_step_down); - TEST_ASSERT_GREATER_THAN(0, c.panic_step_up); -} - -void test_initState_zeros_state(void) { - PlayerCPUState s; - memset(&s, 0xFF, sizeof(s)); // Fill with garbage - PlayerCPU_initState(&s); - - TEST_ASSERT_EQUAL(0, s.freq_count); - TEST_ASSERT_EQUAL(0, s.target_index); - TEST_ASSERT_EQUAL(0, s.use_granular); - TEST_ASSERT_EQUAL(0, s.frame_count); - TEST_ASSERT_EQUAL(16667, s.frame_budget_us); // 60fps default -} - -/////////////////////////////// -// findNearestIndex Tests -/////////////////////////////// - -void test_findNearestIndex_empty_array(void) { - int result = PlayerCPU_findNearestIndex(NULL, 0, 1000000); - TEST_ASSERT_EQUAL(0, result); -} - -void test_findNearestIndex_exact_match(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - int result = PlayerCPU_findNearestIndex(freqs, 4, 800000); - TEST_ASSERT_EQUAL(2, result); -} - -void test_findNearestIndex_nearest_lower(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - // 750000 is closer to 800000 than 600000 - int result = PlayerCPU_findNearestIndex(freqs, 4, 750000); - TEST_ASSERT_EQUAL(2, result); -} - -void test_findNearestIndex_nearest_higher(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - // 650000 is closer to 600000 than 800000 - int result = PlayerCPU_findNearestIndex(freqs, 4, 650000); - TEST_ASSERT_EQUAL(1, result); -} - -void test_findNearestIndex_below_min(void) { - int freqs[] = {400000, 600000, 800000}; - int result = PlayerCPU_findNearestIndex(freqs, 3, 100000); - TEST_ASSERT_EQUAL(0, result); -} - -void test_findNearestIndex_above_max(void) { - int freqs[] = {400000, 600000, 800000}; - int result = PlayerCPU_findNearestIndex(freqs, 3, 2000000); - TEST_ASSERT_EQUAL(2, result); -} - -/////////////////////////////// -// detectFrequencies Tests -/////////////////////////////// - -void test_detectFrequencies_filters_below_minimum(void) { - // Set explicit min_freq_khz to test filtering behavior - config.min_freq_khz = 400000; - int raw[] = {100000, 200000, 300000, 400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 6); - - // Should only keep 400000, 600000, 800000 (at or above min_freq_khz) - TEST_ASSERT_EQUAL(3, state.freq_count); - TEST_ASSERT_EQUAL(400000, state.frequencies[0]); - TEST_ASSERT_EQUAL(600000, state.frequencies[1]); - TEST_ASSERT_EQUAL(800000, state.frequencies[2]); -} - -void test_detectFrequencies_enables_granular_mode(void) { - int raw[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 4); - - TEST_ASSERT_EQUAL(1, state.use_granular); - TEST_ASSERT_EQUAL(1, state.frequencies_detected); -} - -void test_detectFrequencies_disables_scaling_with_one_freq(void) { - int raw[] = {800000}; // Only one frequency - PlayerCPU_detectFrequencies(&state, &config, raw, 1); - - TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled - TEST_ASSERT_EQUAL(0, state.use_granular); - TEST_ASSERT_EQUAL(1, state.freq_count); - TEST_ASSERT_EQUAL(1, state.frequencies_detected); -} - -void test_detectFrequencies_disables_scaling_with_zero_freqs(void) { - PlayerCPU_detectFrequencies(&state, &config, NULL, 0); - - TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled - TEST_ASSERT_EQUAL(0, state.use_granular); - TEST_ASSERT_EQUAL(0, state.freq_count); - TEST_ASSERT_EQUAL(1, state.frequencies_detected); -} - -void test_detectFrequencies_enables_scaling_with_multiple_freqs(void) { - int raw[] = {400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 3); - - TEST_ASSERT_EQUAL(0, state.scaling_disabled); // Scaling enabled - TEST_ASSERT_EQUAL(1, state.use_granular); - TEST_ASSERT_EQUAL(3, state.freq_count); -} - -void test_detectFrequencies_calculates_preset_indices(void) { - // Frequencies: 400, 600, 800, 1000 MHz - // Max = 1000000 - // POWERSAVE (55%): 550000 -> nearest is 600000 (index 1) - // NORMAL (80%): 800000 -> exact match (index 2) - // PERFORMANCE (100%): 1000000 (index 3) - int raw[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 4); - - TEST_ASSERT_EQUAL(1, state.preset_indices[PLAYER_CPU_LEVEL_POWERSAVE]); - TEST_ASSERT_EQUAL(2, state.preset_indices[PLAYER_CPU_LEVEL_NORMAL]); - TEST_ASSERT_EQUAL(3, state.preset_indices[PLAYER_CPU_LEVEL_PERFORMANCE]); -} - -/////////////////////////////// -// reset Tests -/////////////////////////////// - -void test_reset_clears_monitoring_state(void) { - state.frame_count = 100; - state.high_util_windows = 5; - state.low_util_windows = 3; - state.panic_cooldown = 8; - - PlayerCPU_reset(&state, &config, 60.0, 0); - - TEST_ASSERT_EQUAL(0, state.frame_count); - TEST_ASSERT_EQUAL(0, state.high_util_windows); - TEST_ASSERT_EQUAL(0, state.low_util_windows); - TEST_ASSERT_EQUAL(0, state.panic_cooldown); - TEST_ASSERT_EQUAL(0, state.startup_frames); -} - -void test_reset_calculates_frame_budget_60fps(void) { - PlayerCPU_reset(&state, &config, 60.0, 0); - TEST_ASSERT_EQUAL(16666, state.frame_budget_us); // 1000000/60 -} - -void test_reset_calculates_frame_budget_50fps(void) { - PlayerCPU_reset(&state, &config, 50.0, 0); - TEST_ASSERT_EQUAL(20000, state.frame_budget_us); // 1000000/50 -} - -void test_reset_defaults_to_60fps_on_zero(void) { - PlayerCPU_reset(&state, &config, 0.0, 0); - TEST_ASSERT_EQUAL(16667, state.frame_budget_us); -} - -void test_reset_stores_initial_underruns(void) { - PlayerCPU_reset(&state, &config, 60.0, 42); - TEST_ASSERT_EQUAL(42, state.last_underrun); -} - -/////////////////////////////// -// recordFrameTime Tests -/////////////////////////////// - -void test_recordFrameTime_stores_in_ring_buffer(void) { - PlayerCPU_recordFrameTime(&state, 15000); - PlayerCPU_recordFrameTime(&state, 16000); - PlayerCPU_recordFrameTime(&state, 17000); - - TEST_ASSERT_EQUAL(15000, state.frame_times[0]); - TEST_ASSERT_EQUAL(16000, state.frame_times[1]); - TEST_ASSERT_EQUAL(17000, state.frame_times[2]); - TEST_ASSERT_EQUAL(3, state.frame_time_index); -} - -void test_recordFrameTime_wraps_at_buffer_size(void) { - // Fill buffer - for (int i = 0; i < PLAYER_CPU_FRAME_BUFFER_SIZE; i++) { - PlayerCPU_recordFrameTime(&state, 10000 + i); - } - // Add one more - should wrap to index 0 - PlayerCPU_recordFrameTime(&state, 99999); - - TEST_ASSERT_EQUAL(99999, state.frame_times[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_FRAME_BUFFER_SIZE + 1, state.frame_time_index); -} - -/////////////////////////////// -// percentile90 Tests -/////////////////////////////// - -void test_percentile90_empty_returns_zero(void) { - uint64_t result = PlayerCPU_percentile90(NULL, 0); - TEST_ASSERT_EQUAL(0, result); -} - -void test_percentile90_single_value(void) { - uint64_t times[] = {12345}; - uint64_t result = PlayerCPU_percentile90(times, 1); - TEST_ASSERT_EQUAL(12345, result); -} - -void test_percentile90_ten_values(void) { - // Values 1-10, 90th percentile index = (10 * 90) / 100 = 9, sorted[9] = 10 - uint64_t times[] = {5, 3, 8, 1, 9, 2, 7, 4, 10, 6}; - uint64_t result = PlayerCPU_percentile90(times, 10); - TEST_ASSERT_EQUAL(10, result); -} - -void test_percentile90_ignores_outliers(void) { - // 9 normal values + 1 huge outlier - // Sorted: 10,11,12,13,14,15,16,17,18,1000000 - // 90% of 10 = 9, so index 9 = 1000000 - // But we want the frame times to show typical load, not spikes - uint64_t times[] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 1000000}; - uint64_t result = PlayerCPU_percentile90(times, 10); - // Index 9 (90%) is the outlier - TEST_ASSERT_EQUAL(1000000, result); -} - -/////////////////////////////// -// predictFrequency Tests -/////////////////////////////// - -void test_predictFrequency_boost_case(void) { - // At 1000MHz with 90% util, want 70% util - // new_freq = 1000 * 90 / 70 = 1285 - int result = PlayerCPU_predictFrequency(1000000, 90, 70); - TEST_ASSERT_EQUAL(1285714, result); -} - -void test_predictFrequency_reduce_case(void) { - // At 1000MHz with 40% util, want 70% util - // new_freq = 1000 * 40 / 70 = 571 - int result = PlayerCPU_predictFrequency(1000000, 40, 70); - TEST_ASSERT_EQUAL(571428, result); -} - -void test_predictFrequency_zero_target_returns_current(void) { - int result = PlayerCPU_predictFrequency(1000000, 50, 0); - TEST_ASSERT_EQUAL(1000000, result); -} - -/////////////////////////////// -// getPresetPercentage Tests -/////////////////////////////// - -void test_getPresetPercentage_powersave(void) { - TEST_ASSERT_EQUAL(55, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_POWERSAVE)); -} - -void test_getPresetPercentage_normal(void) { - TEST_ASSERT_EQUAL(80, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_NORMAL)); -} - -void test_getPresetPercentage_performance(void) { - TEST_ASSERT_EQUAL(100, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_PERFORMANCE)); -} - -/////////////////////////////// -// update Tests - Skip Conditions -/////////////////////////////// - -void test_update_skips_during_fast_forward(void) { - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, true, false, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, result.decision); -} - -void test_update_skips_during_menu(void) { - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, true, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); -} - -void test_update_skips_during_grace_period(void) { - config.startup_grace = 300; - state.startup_frames = 100; // Not yet at grace period - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); - TEST_ASSERT_EQUAL(101, state.startup_frames); // Incremented -} - -void test_update_skips_when_scaling_disabled(void) { - // Simulate M17-like single-frequency device - int raw[] = {1200000}; // Only one frequency (like M17) - PlayerCPU_detectFrequencies(&state, &config, raw, 1); - - TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Pre-condition: scaling disabled - - // Even with valid state and frame times, should skip - state.startup_frames = config.startup_grace; - state.frame_count = config.window_frames - 1; - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); // High utilization - } - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, result.decision); -} - -void test_update_skips_when_no_frequencies(void) { - // Edge case: no frequencies at all - PlayerCPU_detectFrequencies(&state, &config, NULL, 0); - - TEST_ASSERT_EQUAL(1, state.scaling_disabled); - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); -} - -/////////////////////////////// -// update Tests - Panic Path -/////////////////////////////// - -void test_update_panic_on_underrun_granular(void) { - // Setup: granular mode, not at max - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; // Past grace - state.target_index = 1; // At 600MHz - state.last_underrun = 0; - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); - TEST_ASSERT_EQUAL(3, state.target_index); // Boosted by panic_step_up=2 (1+2=3) - TEST_ASSERT_EQUAL(8, state.panic_cooldown); -} - -void test_update_panic_on_underrun_fallback(void) { - // Setup: fallback mode - state.use_granular = 0; - state.startup_frames = config.startup_grace; - state.target_level = 0; // At powersave - state.last_underrun = 0; - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); - TEST_ASSERT_EQUAL(2, state.target_level); // Boosted to max -} - -void test_update_no_panic_when_at_max(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; - state.target_index = 3; // Already at max - state.last_underrun = 0; - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, NULL); - - // Should not panic, just update underrun tracking - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); - TEST_ASSERT_EQUAL(3, state.target_index); // Still at max -} - -/////////////////////////////// -// update Tests - Window Completion -/////////////////////////////// - -void test_update_waits_for_full_window(void) { - int freqs[] = {400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 3); - state.startup_frames = config.startup_grace; - state.frame_count = 10; // Not yet at window_frames (30) - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); - TEST_ASSERT_EQUAL(11, state.frame_count); // Incremented -} - -/////////////////////////////// -// update Tests - Boost/Reduce -/////////////////////////////// - -void test_update_boost_on_high_util_granular(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; - state.target_index = 1; // 600MHz - state.frame_count = config.window_frames - 1; - state.high_util_windows = config.boost_windows - 1; // About to trigger - - // Add frame times that result in high utilization (~90%) - state.frame_budget_us = 16667; // 60fps - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); // 90% of 16667 - } - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); - TEST_ASSERT_TRUE(state.target_index > 1); // Moved up -} - -void test_update_reduce_on_low_util_granular(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; - state.target_index = 3; // 1000MHz - state.frame_count = config.window_frames - 1; - state.low_util_windows = config.reduce_windows - 1; - state.panic_cooldown = 0; - - // Add frame times that result in low utilization (~40%) - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); // 40% of 16667 - } - - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); - TEST_ASSERT_TRUE(state.target_index < 3); // Moved down -} - -void test_update_no_reduce_during_cooldown(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; - state.target_index = 3; - state.frame_count = config.window_frames - 1; - state.low_util_windows = config.reduce_windows; // Would trigger reduce - state.panic_cooldown = 5; // But in cooldown! - - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); // Low util - } - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); - - // Should NOT reduce due to cooldown - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); - TEST_ASSERT_EQUAL(3, state.target_index); - TEST_ASSERT_EQUAL(4, state.panic_cooldown); // Decremented -} - -void test_update_boost_fallback_mode(void) { - state.use_granular = 0; - state.startup_frames = config.startup_grace; - state.target_level = 0; - state.frame_count = config.window_frames - 1; - state.high_util_windows = config.boost_windows - 1; - - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); - } - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); - TEST_ASSERT_EQUAL(1, state.target_level); -} - -void test_update_reduce_fallback_mode(void) { - state.use_granular = 0; - state.startup_frames = config.startup_grace; - state.target_level = 2; - state.frame_count = config.window_frames - 1; - state.low_util_windows = config.reduce_windows - 1; - - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); - } - - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); - - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); - TEST_ASSERT_EQUAL(1, state.target_level); -} - -void test_update_sweet_spot_resets_counters(void) { - int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); - state.startup_frames = config.startup_grace; - state.target_index = 2; - state.frame_count = config.window_frames - 1; - state.high_util_windows = 1; - state.low_util_windows = 1; - - // Add frame times that result in sweet spot utilization (~70%) - state.frame_budget_us = 16667; - for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 11667); // ~70% of 16667 - } - - PlayerCPU_update(&state, &config, false, false, 0, NULL); - - // Counters should be reset - TEST_ASSERT_EQUAL(0, state.high_util_windows); - TEST_ASSERT_EQUAL(0, state.low_util_windows); -} - -/////////////////////////////// -// Test Runner -/////////////////////////////// - -int main(void) { - UNITY_BEGIN(); - - // Config/State init - RUN_TEST(test_initConfig_sets_defaults); - RUN_TEST(test_initState_zeros_state); - - // findNearestIndex - RUN_TEST(test_findNearestIndex_empty_array); - RUN_TEST(test_findNearestIndex_exact_match); - RUN_TEST(test_findNearestIndex_nearest_lower); - RUN_TEST(test_findNearestIndex_nearest_higher); - RUN_TEST(test_findNearestIndex_below_min); - RUN_TEST(test_findNearestIndex_above_max); - - // detectFrequencies - RUN_TEST(test_detectFrequencies_filters_below_minimum); - RUN_TEST(test_detectFrequencies_enables_granular_mode); - RUN_TEST(test_detectFrequencies_disables_scaling_with_one_freq); - RUN_TEST(test_detectFrequencies_disables_scaling_with_zero_freqs); - RUN_TEST(test_detectFrequencies_enables_scaling_with_multiple_freqs); - RUN_TEST(test_detectFrequencies_calculates_preset_indices); - - // reset - RUN_TEST(test_reset_clears_monitoring_state); - RUN_TEST(test_reset_calculates_frame_budget_60fps); - RUN_TEST(test_reset_calculates_frame_budget_50fps); - RUN_TEST(test_reset_defaults_to_60fps_on_zero); - RUN_TEST(test_reset_stores_initial_underruns); - - // recordFrameTime - RUN_TEST(test_recordFrameTime_stores_in_ring_buffer); - RUN_TEST(test_recordFrameTime_wraps_at_buffer_size); - - // percentile90 - RUN_TEST(test_percentile90_empty_returns_zero); - RUN_TEST(test_percentile90_single_value); - RUN_TEST(test_percentile90_ten_values); - RUN_TEST(test_percentile90_ignores_outliers); - - // predictFrequency - RUN_TEST(test_predictFrequency_boost_case); - RUN_TEST(test_predictFrequency_reduce_case); - RUN_TEST(test_predictFrequency_zero_target_returns_current); - - // getPresetPercentage - RUN_TEST(test_getPresetPercentage_powersave); - RUN_TEST(test_getPresetPercentage_normal); - RUN_TEST(test_getPresetPercentage_performance); - - // update - skip conditions - RUN_TEST(test_update_skips_during_fast_forward); - RUN_TEST(test_update_skips_during_menu); - RUN_TEST(test_update_skips_during_grace_period); - RUN_TEST(test_update_skips_when_scaling_disabled); - RUN_TEST(test_update_skips_when_no_frequencies); - - // update - panic - RUN_TEST(test_update_panic_on_underrun_granular); - RUN_TEST(test_update_panic_on_underrun_fallback); - RUN_TEST(test_update_no_panic_when_at_max); - - // update - window - RUN_TEST(test_update_waits_for_full_window); - - // update - boost/reduce - RUN_TEST(test_update_boost_on_high_util_granular); - RUN_TEST(test_update_reduce_on_low_util_granular); - RUN_TEST(test_update_no_reduce_during_cooldown); - RUN_TEST(test_update_boost_fallback_mode); - RUN_TEST(test_update_reduce_fallback_mode); - RUN_TEST(test_update_sweet_spot_resets_counters); - - return UNITY_END(); -} diff --git a/tests/unit/all/player/test_player_utils.c b/tests/unit/all/player/test_player_utils.c index f624ac7b..97624e8c 100644 --- a/tests/unit/all/player/test_player_utils.c +++ b/tests/unit/all/player/test_player_utils.c @@ -10,7 +10,7 @@ * - PlayerUtils_escapeSingleQuotes - Shell quote escaping * * For option-related tests, see test_player_options.c - * For CPU frequency tests, see test_player_cpu.c + * For CPU frequency tests, see test_cpu.c (in common/) */ #include "unity.h" diff --git a/tests/unit/all/player/test_sync_manager.c b/tests/unit/all/player/test_sync_manager.c new file mode 100644 index 00000000..dbe8e3bf --- /dev/null +++ b/tests/unit/all/player/test_sync_manager.c @@ -0,0 +1,350 @@ +/** + * test_sync_manager.c - Unit tests for audio/video sync mode management + * + * Tests the runtime-adaptive sync mode switching including: + * - Initialization (starts in AUDIO_CLOCK mode) + * - Vsync measurement with circular buffer and stddev-based convergence + * - Mode switching based on measured Hz + * - Drift detection and fallback to AUDIO_CLOCK + * - shouldRunCore (always returns true) + * - shouldUseRateControl (always true, both modes use rate control) + * - shouldBlockAudio based on mode + */ + +#include "unity.h" +#include "sync_manager.h" +#include +#include + +// Stub for getMicroseconds - returns controllable time for vsync measurement +static uint64_t mock_time_us = 0; +uint64_t getMicroseconds(void) { + return mock_time_us; +} + +// Stub for LOG_info - suppress output during tests +void LOG_info(const char* fmt, ...) { + (void)fmt; +} + +// Test state +static SyncManager manager; + +/////////////////////////////// +// Test Setup/Teardown +/////////////////////////////// + +void setUp(void) { + SyncManager_init(&manager, 60.0, 60.0); + mock_time_us = 1000000; // Start at 1 second +} + +void tearDown(void) { + // No cleanup needed +} + +/////////////////////////////// +// Initialization Tests +/////////////////////////////// + +void test_init_starts_in_audio_clock_mode(void) { + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); +} + +void test_init_stores_game_fps(void) { + SyncManager_init(&manager, 59.94, 60.0); + TEST_ASSERT_EQUAL_FLOAT(59.94, manager.game_fps); +} + +void test_init_stores_display_hz(void) { + SyncManager_init(&manager, 60.0, 72.0); + TEST_ASSERT_EQUAL_FLOAT(72.0, manager.display_hz); +} + +void test_init_with_zero_hz_defaults_to_60(void) { + SyncManager_init(&manager, 60.0, 0.0); + TEST_ASSERT_EQUAL_FLOAT(60.0, manager.display_hz); +} + +void test_init_measurement_not_stable(void) { + TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); +} + +/////////////////////////////// +// Vsync Measurement Tests +/////////////////////////////// + +void test_first_vsync_just_records_timestamp(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + TEST_ASSERT_EQUAL_UINT64(1000000, manager.last_vsync_time); + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +void test_second_vsync_records_interval(void) { + // First call - just records timestamp + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + TEST_ASSERT_EQUAL(0, manager.sample_count); + + // Second call - 16.667ms later (60Hz) - records first interval + mock_time_us = 1016667; + SyncManager_recordVsync(&manager); + + // Should have recorded the interval (measured_hz only set when stable) + TEST_ASSERT_EQUAL(1, manager.sample_count); + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); // Not stable yet +} + +void test_rejects_outlier_too_low(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 30Hz (too low, < 50Hz threshold) + mock_time_us = 1033333; + SyncManager_recordVsync(&manager); + + // Should be rejected, no measurement + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); + TEST_ASSERT_EQUAL(0, manager.sample_count); +} + +void test_rejects_outlier_too_high(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 150Hz (too high, > 120Hz threshold) + mock_time_us = 1006667; + SyncManager_recordVsync(&manager); + + // Should be rejected + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +void test_rejects_zero_interval(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Same timestamp (division by zero protection) + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Should be rejected + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +/////////////////////////////// +// Mode Switching Tests +/////////////////////////////// + +void test_switches_to_vsync_when_compatible(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Simulate 120 samples at 60Hz (compatible with 60fps game) + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; // 60Hz interval + SyncManager_recordVsync(&manager); + } + + // Should switch to VSYNC mode (< 1% mismatch) + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, SyncManager_getMode(&manager)); + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_stays_in_audio_clock_when_incompatible(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Simulate 120 samples at 68Hz (incompatible with 60fps game, 13% mismatch) + for (int i = 0; i < 120; i++) { + mock_time_us += 14706; // 68Hz interval + SyncManager_recordVsync(&manager); + } + + // Should stay in AUDIO_CLOCK mode (> 1% mismatch) + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_measurement_stable_after_60_samples(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // With consistent samples, should converge after SYNC_MIN_SAMPLES (60) + // Not stable until we have 60+ samples with low stddev + for (int i = 0; i < 59; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); + } + + // 60th sample - should now be stable (consistent samples = low stddev) + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_drift_detection_switches_back_to_audio_clock(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Initial measurement at 60Hz - should switch to VSYNC + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + } + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, SyncManager_getMode(&manager)); + + // Drift to 65Hz over 300 frames (> 1% mismatch) + for (int i = 0; i < 300; i++) { + mock_time_us += 15385; // 65Hz interval + SyncManager_recordVsync(&manager); + } + + // Should detect drift and switch back to AUDIO_CLOCK + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); +} + +/////////////////////////////// +// API Tests +/////////////////////////////// + +void test_should_run_core_always_returns_true(void) { + TEST_ASSERT_TRUE(SyncManager_shouldRunCore(&manager)); + + // Even after switching to VSYNC + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_TRUE(SyncManager_shouldRunCore(&manager)); +} + +void test_should_use_rate_control_in_vsync_mode(void) { + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_TRUE(SyncManager_shouldUseRateControl(&manager)); +} + +void test_should_use_rate_control_in_audio_clock_too(void) { + // Both modes now use rate control as buffer health mechanism + manager.mode = SYNC_MODE_AUDIO_CLOCK; + TEST_ASSERT_TRUE(SyncManager_shouldUseRateControl(&manager)); +} + +void test_should_block_audio_in_audio_clock_mode(void) { + manager.mode = SYNC_MODE_AUDIO_CLOCK; + TEST_ASSERT_TRUE(SyncManager_shouldBlockAudio(&manager)); +} + +void test_should_not_block_audio_in_vsync_mode(void) { + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_FALSE(SyncManager_shouldBlockAudio(&manager)); +} + +void test_get_measured_hz_returns_zero_when_not_stable(void) { + TEST_ASSERT_EQUAL_FLOAT(0.0, SyncManager_getMeasuredHz(&manager)); +} + +void test_get_measured_hz_returns_value_when_stable(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 120 samples at 60Hz + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + } + + double measured = SyncManager_getMeasuredHz(&manager); + TEST_ASSERT_FLOAT_WITHIN(0.5, 60.0, measured); +} + +void test_mode_name_audio_clock(void) { + TEST_ASSERT_EQUAL_STRING("Audio Clock", SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); +} + +void test_mode_name_vsync(void) { + TEST_ASSERT_EQUAL_STRING("Vsync", SyncManager_getModeName(SYNC_MODE_VSYNC)); +} + +/////////////////////////////// +// Edge Cases +/////////////////////////////// + +void test_mean_smooths_noisy_measurements(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Alternate between 59.5Hz and 60.5Hz (simulating light jitter) + // stddev/mean < 1% so it should still converge + for (int i = 0; i < 60; i++) { + if (i % 2 == 0) { + mock_time_us += 16807; // 59.5Hz + } else { + mock_time_us += 16529; // 60.5Hz + } + SyncManager_recordVsync(&manager); + } + + // Mean should be ~60Hz + double measured = SyncManager_getMeasuredHz(&manager); + TEST_ASSERT_FLOAT_WITHIN(1.0, 60.0, measured); +} + +void test_drift_check_only_after_stable(void) { + // Before stable, drift check shouldn't run + manager.measurement_stable = false; + manager.mode = SYNC_MODE_VSYNC; + + mock_time_us = 1000000; + for (int i = 0; i < 300; i++) { + mock_time_us += 15385; // 65Hz (should trigger drift) + SyncManager_recordVsync(&manager); + } + + // Mode shouldn't change (not stable yet) + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, manager.mode); +} + +/////////////////////////////// +// Test Runner +/////////////////////////////// + +int main(void) { + UNITY_BEGIN(); + + // Initialization tests + RUN_TEST(test_init_starts_in_audio_clock_mode); + RUN_TEST(test_init_stores_game_fps); + RUN_TEST(test_init_stores_display_hz); + RUN_TEST(test_init_with_zero_hz_defaults_to_60); + RUN_TEST(test_init_measurement_not_stable); + + // Vsync measurement tests + RUN_TEST(test_first_vsync_just_records_timestamp); + RUN_TEST(test_second_vsync_records_interval); + RUN_TEST(test_rejects_outlier_too_low); + RUN_TEST(test_rejects_outlier_too_high); + RUN_TEST(test_rejects_zero_interval); + + // Mode switching tests + RUN_TEST(test_switches_to_vsync_when_compatible); + RUN_TEST(test_stays_in_audio_clock_when_incompatible); + RUN_TEST(test_measurement_stable_after_60_samples); + RUN_TEST(test_drift_detection_switches_back_to_audio_clock); + + // API tests + RUN_TEST(test_should_run_core_always_returns_true); + RUN_TEST(test_should_use_rate_control_in_vsync_mode); + RUN_TEST(test_should_use_rate_control_in_audio_clock_too); + RUN_TEST(test_should_block_audio_in_audio_clock_mode); + RUN_TEST(test_should_not_block_audio_in_vsync_mode); + RUN_TEST(test_get_measured_hz_returns_zero_when_not_stable); + RUN_TEST(test_get_measured_hz_returns_value_when_stable); + RUN_TEST(test_mode_name_audio_clock); + RUN_TEST(test_mode_name_vsync); + + // Edge cases + RUN_TEST(test_mean_smooths_noisy_measurements); + RUN_TEST(test_drift_check_only_after_stable); + + return UNITY_END(); +} diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 9d8f4e6f..5c568bac 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -21,6 +21,9 @@ * - Font resources managed through TTF_CloseFont */ +// Enable GNU extensions for CPU affinity macros (must be before any includes) +#define _GNU_SOURCE + #include #include #include @@ -710,6 +713,28 @@ void GFX_present(GFX_Renderer* renderer) { PLAT_present(renderer); } +/** + * Default (weak) implementation of debug HUD rendering (software). + * Player overrides this to render debug overlay before flip. + */ +FALLBACK_IMPLEMENTATION void PLAT_renderDebugHUD(SDL_Surface* surface) { + (void)surface; + // No-op by default +} + +/** + * Default (weak) implementation of debug HUD buffer (hardware/GL). + * Player overrides this to provide RGBA buffer for GL compositing. + */ +FALLBACK_IMPLEMENTATION uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, + int screen_h) { + (void)src_w; + (void)src_h; + (void)screen_w; + (void)screen_h; + return NULL; // No HUD by default +} + /** * Waits for vsync without presenting new content. * @@ -1653,14 +1678,6 @@ void GFX_blitText(TTF_Font* ttf_font, char* str, int leading, SDL_Color color, S // SND_RATE_CONTROL_D is defined in defines.h (platforms can override) // See docs/audio-rate-control.md for tuning guidance. -// Dual-timescale PI controller: integral operates on smoothed error to avoid fighting proportional -// ki: Integral gain - very slow accumulation for persistent drift only -// alpha: Error smoothing factor (~300 frame average, ~5 seconds at 60fps) -// clamp: Max integral magnitude (handles up to ±2% persistent clock mismatch) -#define SND_RATE_CONTROL_KI 0.00005f -#define SND_ERROR_AVG_ALPHA 0.003f -#define SND_INTEGRAL_CLAMP 0.02f - // SND_BUFFER_SAMPLES is defined in defines.h (platforms can override) // Sound context manages the ring buffer and resampling @@ -1678,6 +1695,12 @@ static struct SND_Context { int frame_out; // Read position int frame_filled; // Last consumed position + // Thread synchronization for blocking audio writes + // In audio-clock mode, writers block via SDL_CondWait when buffer is full. + // The callback signals when space becomes available. + SDL_mutex* mutex; // Protects buffer access + SDL_cond* cond; // Signals space available (for blocking writes) + // Linear interpolation resampler with dynamic rate control AudioResampler resampler; @@ -1694,9 +1717,7 @@ static struct SND_Context { double cumulative_total_adjust; // Sum of total_adjust values applied uint64_t total_adjust_count; // Number of total_adjust applications - // Rate control state (persistent across frames) - float rate_integral; // PI integral term (accumulates from smoothed error) - float error_avg; // Smoothed error for slow integral timescale + // Rate control state float last_rate_adjust; // Last computed adjustment (for snapshot without side effects) // SDL callback timing diagnostics @@ -1705,6 +1726,10 @@ static struct SND_Context { uint64_t callback_interval_sum; // Sum of intervals (for average) unsigned callback_samples_min; // Min samples requested unsigned callback_samples_max; // Max samples requested + + // Sync mode callbacks (set by player via SND_setSyncCallbacks) + SND_SyncCallback should_use_rate_control; + SND_SyncCallback should_block_audio; } snd = {0}; /** @@ -1714,18 +1739,25 @@ static struct SND_Context { * Reads samples from the ring buffer and writes them to the output stream. * If buffer runs dry, repeats last sample or outputs silence. * + * Thread synchronization: + * - Uses snd.mutex to protect buffer access + * - Signals snd.cond after draining to wake blocked writers (audio-clock mode) + * * @param userdata Unused user data pointer * @param stream Output audio buffer to fill * @param len Length of output buffer in bytes * * @note Runs on SDL's audio thread, not the main thread */ -static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // plat_sound_callback - - // return (void)memset(stream,0,len); // TODO: tmp, silent +static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { + (void)userdata; - if (snd.frame_count == 0) + if (snd.frame_count == 0 || !snd.mutex) { + memset(stream, 0, len); return; + } + + SDL_LockMutex(snd.mutex); int16_t* out = (int16_t*)stream; len /= (sizeof(int16_t) * 2); @@ -1768,7 +1800,7 @@ static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // pla // Log underrun with context (every occurrence - these are critical events) float fill_before = (float)(requested - len) / (float)snd.frame_count * 100.0f; - LOG_warn("Audio underrun #%u: needed %d more samples (had %d/%d, fill was %.0f%%)\n", + LOG_warn("Audio underrun #%u: needed %d more samples (had %d/%d, fill was %.0f%%)", snd.underrun_count, len, requested - len, requested, fill_before); if (snd.frame_filled >= 0 && snd.frame_filled < (int)snd.frame_count) { @@ -1784,39 +1816,56 @@ static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // pla memset(out, 0, len * sizeof(int16_t) * 2); } } + + // Signal writers that space is available (wakes blocked audio-clock writers) + SDL_CondSignal(snd.cond); + + SDL_UnlockMutex(snd.mutex); } /** * Allocates the audio ring buffer. * - * Buffer size is SND_BUFFER_SAMPLES (~83ms at 48kHz with 4000 samples). - * Locks audio thread during resize to prevent corruption. + * Buffer size is SND_BUFFER_SAMPLES (~133ms at 48kHz, ~8 video frames at 60fps). + * Locks mutex during resize to prevent corruption. * - * @note Called during init + * @note Called during init (before audio thread starts) */ static void SND_resizeBuffer(void) { snd.frame_count = SND_BUFFER_SAMPLES; if (snd.frame_count == 0) return; - SDL_LockAudio(); + // Lock mutex if available (may be called during init before mutex exists) + if (snd.mutex) + SDL_LockMutex(snd.mutex); int buffer_bytes = snd.frame_count * sizeof(SND_Frame); void* new_buffer = realloc(snd.buffer, buffer_bytes); if (!new_buffer) { LOG_error("Failed to allocate audio buffer (%d bytes)\n", buffer_bytes); - SDL_UnlockAudio(); + if (snd.mutex) + SDL_UnlockMutex(snd.mutex); return; } snd.buffer = new_buffer; memset(snd.buffer, 0, buffer_bytes); - snd.frame_in = 0; + // Pre-fill buffer to 50% with silence to give headroom at startup. + // This prevents immediate underruns before the core has a chance to + // produce real audio samples. The silence will be gradually replaced + // as the core submits audio. + snd.frame_in = snd.frame_count / 2; snd.frame_out = 0; snd.frame_filled = snd.frame_count - 1; - SDL_UnlockAudio(); + if (snd.mutex) + SDL_UnlockMutex(snd.mutex); + + LOG_info("Audio buffer allocated: %d samples (%d bytes, ~%.1fms at %dHz, pre-filled to 50%%)", + snd.frame_count, buffer_bytes, (float)snd.frame_count / snd.sample_rate_out * 1000.0f, + snd.sample_rate_out); } /** @@ -1839,98 +1888,174 @@ static float SND_getBufferFillLevel(void) { } /** - * Calculates dynamic rate adjustment using a dual-timescale PI controller. + * Calculate dynamic rate adjustment using proportional control. * - * Extends the Arntzen algorithm with an integral term on a separate (slower) - * timescale to correct persistent hardware drift without fighting proportional. + * Based on Arntzen's "Dynamic Rate Control for Retro Game Emulators" (2012). + * Pure proportional control adjusts resampling ratio based on buffer fill: * - * Dual-timescale PI: - * error = (1 - 2*fill) - * p_term = error * d // Fast: frame-to-frame jitter - * error_avg = α*error + (1-α)*error_avg // Smooth error (~100 frames) - * integral += error_avg * ki // Slow: learns persistent offset - * adjustment = p_term + integral + * error = (1 - 2*fill) // +1 when empty, 0 at half, -1 when full + * adjustment = error * d // Bounded by ±d * - * Key insight: Original PI failed because both terms operated on same timescale, - * causing them to fight. By smoothing error before integrating, the integral - * only sees persistent trends, not per-frame noise. + * Buffer behavior: + * - Empty (fill=0): error=+1 → produce MORE samples → fill buffer + * - Half (fill=0.5): error=0 → maintain equilibrium + * - Full (fill=1): error=-1 → produce FEWER samples → drain buffer * - * Tuning guide: - * d: Higher = faster jitter response, more pitch variation (0.005-0.025) - * ki: Integral gain, 100× slower than error averaging (0.00005) - * α: Error smoothing factor, ~100 frame average (0.01) + * The paper proves this converges to stable equilibrium. Only used when + * display Hz is within 1% of game fps (sync manager gates this). * - * Our resampler divides by ratio_adjust (larger = fewer outputs), so: - * ratio_adjust = 1 - adjustment - * - * @return Rate adjustment factor for resampler step size + * @return Rate adjustment factor for resampler (1.0 - adjustment) */ static float SND_calculateRateAdjust(void) { float fill = SND_getBufferFillLevel(); - - // Arntzen error formula: positive when buffer low, negative when high - // Buffer low (fill<0.5) → produce more samples (adjustment > 0) - // Buffer high (fill>0.5) → produce fewer samples (adjustment < 0) float error = 1.0f - 2.0f * fill; - - // Fast timescale (proportional): immediate response to buffer level changes - float p_term = error * SND_RATE_CONTROL_D; - - // Slow timescale (integral): persistent offset learned in SND_newFrame() - // Integral is updated once per frame, not here (avoids N updates for N audio batches) - float adjustment = p_term + snd.rate_integral; + float adjustment = error * SND_RATE_CONTROL_D; // Invert for our resampler convention (larger ratio = fewer outputs) snd.last_rate_adjust = 1.0f - adjustment; return snd.last_rate_adjust; } +/** + * Helper to calculate available write space in FIFO. + * Caller must hold snd.mutex. + */ +static int SND_getWriteAvailable(void) { + if (snd.frame_in >= snd.frame_out) { + return snd.frame_count - (snd.frame_in - snd.frame_out) - 1; + } else { + return snd.frame_out - snd.frame_in - 1; + } +} + /** * Writes a batch of audio samples to the ring buffer. * - * Two implementations based on sync mode: + * Runtime adaptive behavior based on sync mode: * - * SYNC_MODE_AUDIOCLOCK (audio-driven timing): - * - Blocks when buffer is full (up to 10ms) - * - Audio hardware clock drives emulation timing + * Audio-clock mode (should_block_audio = true): + * - TRUE blocking via SDL_CondWait when buffer is full + * - Audio hardware clock drives emulation timing naturally * - Fixed 1.0 resampling ratio (no dynamic rate control) - * - For devices with unstable vsync + * - Blocking provides frame pacing without SDL_Delay * - * Default (vsync-driven timing): + * Vsync mode (should_block_audio = false): * - Non-blocking with dynamic rate control * - Adjusts pitch ±0.5% to maintain buffer at 50% full - * - For devices with stable vsync + * - For devices with stable vsync (<1% mismatch) * * @param frames Array of audio frames to write * @param frame_count Number of frames in array * @return Number of frames consumed */ -size_t SND_batchSamples(const SND_Frame* frames, - size_t frame_count) { // plat_sound_write / plat_sound_write_resample - - if (snd.frame_count == 0) +size_t SND_batchSamples(const SND_Frame* frames, size_t frame_count) { + if (snd.frame_count == 0 || !snd.mutex) return 0; -#ifdef SYNC_MODE_AUDIOCLOCK - // ======================================================================== - // AUDIOCLOCK MODE: Blocking writes with audio hardware timing - // ======================================================================== + // Check sync mode via callback (defaults to vsync mode if not set) + bool should_block = snd.should_block_audio && snd.should_block_audio(); + bool should_use_rate_control = + !should_block && (!snd.should_use_rate_control || snd.should_use_rate_control()); + + if (should_block) { + // ======================================================================== + // AUDIO-CLOCK MODE: TRUE blocking when buffer is full + // ======================================================================== + // + // Strategy: Write to FIFO until full, then block via SDL_CondWait. + // The audio callback signals the cond when it drains samples. + // This provides natural backpressure from the audio hardware clock - + // no SDL_Delay needed; blocking IS the timing mechanism. + + SDL_LockMutex(snd.mutex); + + size_t consumed = 0; + while (frame_count > 0) { + // Calculate available space + int available = SND_getWriteAvailable(); + + // If buffer is nearly full (>90%), block until callback drains + int overflow_threshold = snd.frame_count / 10; // 10% available = 90% full + while (available < overflow_threshold) { + // Block with timeout to allow checking for shutdown + // 100ms timeout: long enough to not spin, short enough for responsive shutdown + int wait_result = SDL_CondWaitTimeout(snd.cond, snd.mutex, 100); + (void)wait_result; + + // Check if we're shutting down (SND_quit sets initialized=0) + if (!snd.initialized) { + SDL_UnlockMutex(snd.mutex); + return consumed; + } + + available = SND_getWriteAvailable(); + } + + // Audio-clock mode: gentle rate control as buffer health mechanism. + // Like RetroArch, we use proportional rate control (±0.5%) in both modes + // to handle timing variations when true blocking can't provide pacing + // (e.g., platform can't disable vsync). This is gentle enough that + // underruns will still occur if CPU is truly too slow. + float ratio = SND_calculateRateAdjust(); + + AudioRingBuffer ring = { + .frames = snd.buffer, + .capacity = snd.frame_count, + .write_pos = snd.frame_in, + .read_pos = snd.frame_out, + }; + + ResampleResult result = + AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, ratio); + + snd.frame_in = ring.write_pos; + snd.samples_in += result.frames_consumed; + snd.samples_written += result.frames_written; + + frames += result.frames_consumed; + frame_count -= result.frames_consumed; + consumed += result.frames_consumed; + } + + SDL_UnlockMutex(snd.mutex); + + return consumed; + + } else { + // ======================================================================== + // VSYNC MODE: Non-blocking with dynamic rate control + // ======================================================================== + + SDL_LockMutex(snd.mutex); + + // Determine resampling ratio + float total_adjust; + if (should_use_rate_control) { + // Dynamic rate control: adjust based on buffer fill + total_adjust = SND_calculateRateAdjust(); - SDL_LockAudio(); + // Track cumulative adjust for diagnostics + snd.cumulative_total_adjust += total_adjust; + snd.total_adjust_count++; + } else { + // No rate control: fixed 1.0 ratio + total_adjust = 1.0f; + } - size_t consumed = 0; - while (frame_count > 0) { - int tries = 0; + // Estimate output size for diagnostics + int estimated_output = + AudioResampler_estimateOutput(&snd.resampler, frame_count, total_adjust); - // Wait for audio callback to drain buffer (up to 10ms) - while (tries < 10 && snd.frame_in == snd.frame_filled) { - tries++; - SDL_UnlockAudio(); - SDL_Delay(1); - SDL_LockAudio(); + // Calculate available space + int available = SND_getWriteAvailable(); + + // Warn if buffer nearly full + if (available < estimated_output) { + LOG_warn("Audio buffer nearly full: %d available, %d needed (fill=%.0f%%)", available, + estimated_output, SND_getBufferFillLevel() * 100.0f); } - // Write samples with fixed 1.0 ratio (no rate control) + // Resample into ring buffer AudioRingBuffer ring = { .frames = snd.buffer, .capacity = snd.frame_count, @@ -1939,84 +2064,16 @@ size_t SND_batchSamples(const SND_Frame* frames, }; ResampleResult result = - AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, 1.0f); + AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, total_adjust); snd.frame_in = ring.write_pos; snd.samples_in += result.frames_consumed; snd.samples_written += result.frames_written; - frames += result.frames_consumed; - frame_count -= result.frames_consumed; - consumed += result.frames_consumed; - } - - SDL_UnlockAudio(); - return consumed; - -#else - // ======================================================================== - // VSYNC MODE: Non-blocking with dynamic rate control - // ======================================================================== - - SDL_LockAudio(); - - // Dynamic rate control per Arntzen paper: adjust resampling ratio based on buffer fill - // Buffer empty → produce more samples (fill up), buffer full → produce fewer (drain) - // The system naturally converges to a stable equilibrium point - float total_adjust = SND_calculateRateAdjust(); - - // Note: Debug logging moved to player's unified snapshot logging (SND_getSnapshot) - - // Estimate how many OUTPUT frames we'll produce (may be more than input when upsampling) - int estimated_output = AudioResampler_estimateOutput(&snd.resampler, frame_count, total_adjust); - - // Calculate how much space is available in the ring buffer (for diagnostics) - int available; - if (snd.frame_in >= snd.frame_out) { - available = snd.frame_count - (snd.frame_in - snd.frame_out) - 1; - } else { - available = snd.frame_out - snd.frame_in - 1; - } + SDL_UnlockMutex(snd.mutex); - // Warn if buffer is nearly full (indicates rate control failure) - // The resampler will handle buffer full gracefully (partial write + save state) - if (available < estimated_output) { - LOG_warn( - "Audio buffer nearly full: %d available, %d needed (fill=%.0f%%) - rate control may " - "be failing\n", - available, estimated_output, SND_getBufferFillLevel() * 100.0f); + return result.frames_consumed; } - - // Set up ring buffer wrapper for the resampler - AudioRingBuffer ring = { - .frames = snd.buffer, - .capacity = snd.frame_count, - .write_pos = snd.frame_in, - .read_pos = snd.frame_out, - }; - - // Resample with combined adjustment (base correction + dynamic rate control) - ResampleResult result = - AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, total_adjust); - - // Update ring buffer write position - snd.frame_in = ring.write_pos; - - // Track sample flow for diagnostics - snd.samples_in += result.frames_consumed; // Input samples consumed by resampler - snd.samples_written += result.frames_written; // Output samples written to buffer - - // Track cumulative total_adjust for window-averaged comparisons - snd.cumulative_total_adjust += total_adjust; - snd.total_adjust_count++; - - // Note: frame_filled is managed by the audio callback (SND_audioCallback) - // to track what has been consumed. We don't update it here. - - SDL_UnlockAudio(); - - return result.frames_consumed; -#endif } /** @@ -2045,9 +2102,24 @@ void SND_init(double sample_rate, double frame_rate) { // plat_sound_init LOG_debug("Current audio driver: %s\n", SDL_GetCurrentAudioDriver()); #endif + // Preserve sync mode callbacks across reinit + SND_SyncCallback saved_rate_control = snd.should_use_rate_control; + SND_SyncCallback saved_block_audio = snd.should_block_audio; + memset(&snd, 0, sizeof(struct SND_Context)); snd.frame_rate = frame_rate; + // Restore callbacks + snd.should_use_rate_control = saved_rate_control; + snd.should_block_audio = saved_block_audio; + + // Create synchronization primitives for blocking audio writes + snd.mutex = SDL_CreateMutex(); + snd.cond = SDL_CreateCond(); + if (!snd.mutex || !snd.cond) { + LOG_error("Failed to create audio sync primitives"); + } + SDL_AudioSpec spec_in; SDL_AudioSpec spec_out; @@ -2082,14 +2154,16 @@ void SND_init(double sample_rate, double frame_rate) { // plat_sound_init * Gets current audio buffer fill level as a percentage. * * Used by libretro cores for audio-based frameskip decisions. - * Thread-safe: locks audio to read consistent buffer state. + * Thread-safe: locks mutex to read consistent buffer state. * * @return Fill level 0-100 (0 = empty, 100 = full) */ unsigned SND_getBufferOccupancy(void) { - SDL_LockAudio(); + if (!snd.mutex) + return 0; + SDL_LockMutex(snd.mutex); float fill = SND_getBufferFillLevel(); - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return (unsigned)(fill * 100.0f); } @@ -2102,10 +2176,17 @@ unsigned SND_getBufferOccupancy(void) { * For auto CPU scaling, underruns are an emergency signal - if rate control * stress is high AND underruns are occurring, immediate CPU boost is needed. * + * Thread-safe: locks mutex to read consistent value. + * * @return Number of underruns since SND_init() or last SND_resetUnderrunCount() */ unsigned SND_getUnderrunCount(void) { - return snd.underrun_count; + if (!snd.mutex) + return 0; + SDL_LockMutex(snd.mutex); + unsigned count = snd.underrun_count; + SDL_UnlockMutex(snd.mutex); + return count; } /** @@ -2113,52 +2194,32 @@ unsigned SND_getUnderrunCount(void) { * * Call after handling an underrun event (e.g., after boosting CPU) * to track new underruns going forward. + * + * Thread-safe: locks mutex to write consistent value. */ void SND_resetUnderrunCount(void) { + if (!snd.mutex) + return; + SDL_LockMutex(snd.mutex); snd.underrun_count = 0; + SDL_UnlockMutex(snd.mutex); } /** * Signals start of a new video frame for audio rate control. * - * Updates the PI integral term once per frame based on current buffer fill. - * Call once per frame before core.run() produces audio. - * - * This prevents the integral from accumulating N times when cores use - * per-sample audio callbacks (audio_sample_callback instead of batch). - * Some cores (e.g., 64-bit snes9x) call audio ~535 times per frame. + * Previously used for PI integral updates, now a no-op since we use + * pure proportional control (Arntzen algorithm). Kept for API compatibility. */ void SND_newFrame(void) { -#ifdef SYNC_MODE_AUDIOCLOCK - // No-op in audioclock mode - no rate control needed - return; -#else - if (!snd.initialized) - return; - - SDL_LockAudio(); - - float fill = SND_getBufferFillLevel(); - float error = 1.0f - 2.0f * fill; - - // Update smoothed error and integral (once per frame) - snd.error_avg = SND_ERROR_AVG_ALPHA * error + (1.0f - SND_ERROR_AVG_ALPHA) * snd.error_avg; - snd.rate_integral += snd.error_avg * SND_RATE_CONTROL_KI; - - // Clamp integral to prevent windup (handles up to ±2% clock mismatch) - if (snd.rate_integral > SND_INTEGRAL_CLAMP) - snd.rate_integral = SND_INTEGRAL_CLAMP; - if (snd.rate_integral < -SND_INTEGRAL_CLAMP) - snd.rate_integral = -SND_INTEGRAL_CLAMP; - - SDL_UnlockAudio(); -#endif + // No-op: pure proportional control doesn't need per-frame state updates + (void)0; } /** * Captures an atomic snapshot of all audio state for diagnostics. * - * All values are read while holding the audio lock to ensure consistency. + * All values are read while holding the mutex to ensure consistency. * Includes buffer state, sample flow counters, and rate control parameters. * * @return Snapshot of current audio state @@ -2166,7 +2227,10 @@ void SND_newFrame(void) { SND_Snapshot SND_getSnapshot(void) { SND_Snapshot snap = {0}; - SDL_LockAudio(); + if (!snd.mutex) + return snap; + + SDL_LockMutex(snd.mutex); // Timestamp for delta calculations snap.timestamp_us = getMicroseconds(); @@ -2183,14 +2247,11 @@ SND_Snapshot SND_getSnapshot(void) { snap.samples_consumed = snd.samples_consumed; snap.samples_requested = snd.samples_requested; - // Rate control parameters (PI controller - read last computed values to avoid side effects) + // Rate control parameters (proportional control - read last computed value) snap.frame_rate = snd.frame_rate; snap.rate_adjust = snd.last_rate_adjust; snap.total_adjust = snd.last_rate_adjust; - snap.rate_integral = snd.rate_integral; snap.rate_control_d = SND_RATE_CONTROL_D; - snap.rate_control_ki = SND_RATE_CONTROL_KI; - snap.error_avg = snd.error_avg; // Resampler state snap.sample_rate_in = snd.sample_rate_in; @@ -2220,7 +2281,7 @@ SND_Snapshot SND_getSnapshot(void) { snap.callback_avg_interval_ms = 0; } - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return snap; } @@ -2228,16 +2289,31 @@ SND_Snapshot SND_getSnapshot(void) { /** * Shuts down the audio subsystem and frees resources. * - * Pauses audio, closes SDL audio device, frees ring buffer. + * Sets initialized=0 first to signal blocked writers to exit, + * then pauses audio, closes device, and frees resources. * Safe to call even if audio was never initialized. */ -void SND_quit(void) { // plat_sound_finish +void SND_quit(void) { if (!snd.initialized) return; + // Signal shutdown first - wakes any blocked SDL_CondWaitTimeout + snd.initialized = 0; + + // Pause and close audio - SDL_CloseAudio waits for callback to complete SDL_PauseAudio(1); SDL_CloseAudio(); + // Destroy synchronization primitives (safe now that callback has stopped) + if (snd.cond) { + SDL_DestroyCond(snd.cond); + snd.cond = NULL; + } + if (snd.mutex) { + SDL_DestroyMutex(snd.mutex); + snd.mutex = NULL; + } + if (snd.buffer) { free(snd.buffer); snd.buffer = NULL; @@ -2288,13 +2364,16 @@ void SND_setMinLatency(unsigned latency_ms) { LOG_info("SET_MINIMUM_AUDIO_LATENCY: %ums - resizing buffer from %zu to %zu samples", latency_ms, snd.frame_count, required_samples); - SDL_LockAudio(); + if (!snd.mutex) + return; + + SDL_LockMutex(snd.mutex); size_t buffer_bytes = required_samples * sizeof(SND_Frame); void* new_buffer = realloc(snd.buffer, buffer_bytes); if (!new_buffer) { LOG_error("Failed to allocate audio buffer (%zu bytes)", buffer_bytes); - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return; } snd.buffer = new_buffer; @@ -2306,7 +2385,22 @@ void SND_setMinLatency(unsigned latency_ms) { snd.frame_out = 0; snd.frame_filled = snd.frame_count - 1; - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); +} + +/** + * Configure sync mode callbacks for runtime adaptive behavior. + * + * The audio system uses these callbacks to adapt its behavior based on + * the current sync mode (audio-clock vs vsync). + * + * @param should_use_rate_control Callback returning true if audio rate control should run + * @param should_block_audio Callback returning true if audio writes should block + */ +void SND_setSyncCallbacks(SND_SyncCallback should_use_rate_control, + SND_SyncCallback should_block_audio) { + snd.should_use_rate_control = should_use_rate_control; + snd.should_block_audio = should_block_audio; } /////////////////////////////// @@ -3328,6 +3422,363 @@ int PWR_setCPUFrequency_sysfs(int freq_khz) { return -1; } +/////////////////////////////// +// Multi-cluster CPU topology support +/////////////////////////////// + +// Include cpu.h for topology types +#include "cpu.h" + +// Base path for cpufreq policies +#define CPUFREQ_BASE_PATH "/sys/devices/system/cpu/cpufreq" + +/** + * Comparison function for sorting clusters by max_khz ascending. + */ +static int compare_cluster_by_max_khz(const void* a, const void* b) { + const CPUCluster* ca = (const CPUCluster*)a; + const CPUCluster* cb = (const CPUCluster*)b; + if (ca->max_khz < cb->max_khz) + return -1; + if (ca->max_khz > cb->max_khz) + return 1; + return 0; +} + +/** + * Reads an integer from a sysfs file. + * + * @param path Full path to sysfs file + * @return Value read, or 0 on failure + */ +static int read_sysfs_int(const char* path) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + int value = 0; + if (fscanf(fp, "%d", &value) != 1) { + value = 0; + } + (void)fclose(fp); + return value; +} + +/** + * Reads available frequencies from a sysfs file into a cluster. + * + * @param path Path to scaling_available_frequencies + * @param cluster Cluster to populate + * @return Number of frequencies read + */ +static int read_cluster_frequencies(const char* path, CPUCluster* cluster) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + char buffer[256]; + int count = 0; + + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + char* token = strtok(buffer, " \t\n"); + while (token != NULL && count < CPU_MAX_FREQS_PER_CLUSTER) { + int freq = atoi(token); + if (freq > 0) { + cluster->frequencies[count++] = freq; + } + token = strtok(NULL, " \t\n"); + } + } + (void)fclose(fp); + + // Sort frequencies ascending + if (count > 1) { + qsort(cluster->frequencies, count, sizeof(int), compare_int_asc); + } + + cluster->freq_count = count; + return count; +} + +/** + * Parses related_cpus file to get CPU mask and count. + * + * Format can be: "0 1 2 3" or "0-3" or "0-3 5 7-8" + * + * @param path Path to related_cpus file + * @param cpu_mask Output: bitmask of CPUs + * @param cpu_count Output: number of CPUs + * @return 1 on success, 0 on failure + */ +static int parse_related_cpus(const char* path, int* cpu_mask, int* cpu_count) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + char buffer[128]; + *cpu_mask = 0; + *cpu_count = 0; + + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + char* ptr = buffer; + while (*ptr) { + // Skip whitespace + while (*ptr == ' ' || *ptr == '\t' || *ptr == '\n') + ptr++; + if (!*ptr) + break; + + // Parse number + int start = atoi(ptr); + while (*ptr >= '0' && *ptr <= '9') + ptr++; + + int end = start; + if (*ptr == '-') { + // Range: "0-3" + ptr++; + end = atoi(ptr); + while (*ptr >= '0' && *ptr <= '9') + ptr++; + } + + // Add CPUs to mask (check for duplicates) + for (int cpu = start; cpu <= end && cpu < 32; cpu++) { + int bit = 1 << cpu; + if (!(*cpu_mask & bit)) { + *cpu_mask |= bit; + (*cpu_count)++; + } + } + + // Skip comma if present + if (*ptr == ',') + ptr++; + } + } + (void)fclose(fp); + return (*cpu_count > 0) ? 1 : 0; +} + +int PWR_detectCPUTopology(struct CPUTopology* topology) { + if (!topology) { + return 0; + } + + // Initialize topology + memset(topology, 0, sizeof(*topology)); + + // Enumerate policies (0, 1, 2, ... up to 15) + // Policies may not be contiguous (e.g., policy0, policy4, policy7) + char path[256]; + int cluster_count = 0; + + for (int policy_id = 0; policy_id < 16 && cluster_count < CPU_MAX_CLUSTERS; policy_id++) { + (void)snprintf(path, sizeof(path), "%s/policy%d", CPUFREQ_BASE_PATH, policy_id); + + // Check if policy directory exists by trying to read cpuinfo_max_freq + char max_freq_path[256]; + (void)snprintf(max_freq_path, sizeof(max_freq_path), "%s/cpuinfo_max_freq", path); + int max_khz = read_sysfs_int(max_freq_path); + if (max_khz <= 0) { + continue; // Policy doesn't exist + } + + CPUCluster* cluster = &topology->clusters[cluster_count]; + cluster->policy_id = policy_id; + cluster->max_khz = max_khz; + + // Read min freq + char min_freq_path[256]; + (void)snprintf(min_freq_path, sizeof(min_freq_path), "%s/cpuinfo_min_freq", path); + cluster->min_khz = read_sysfs_int(min_freq_path); + + // Read related_cpus + char cpus_path[256]; + (void)snprintf(cpus_path, sizeof(cpus_path), "%s/related_cpus", path); + if (!parse_related_cpus(cpus_path, &cluster->cpu_mask, &cluster->cpu_count)) { + LOG_warn("PWR_detectCPUTopology: failed to parse related_cpus for policy%d\n", + policy_id); + continue; + } + + // Read available frequencies + char freqs_path[256]; + (void)snprintf(freqs_path, sizeof(freqs_path), "%s/scaling_available_frequencies", path); + read_cluster_frequencies(freqs_path, cluster); + + // If no frequencies available, use min/max as fallback + if (cluster->freq_count == 0 && cluster->min_khz > 0 && cluster->max_khz > 0) { + cluster->frequencies[0] = cluster->min_khz; + cluster->frequencies[1] = (cluster->min_khz + cluster->max_khz) / 2; + cluster->frequencies[2] = cluster->max_khz; + cluster->freq_count = 3; + } + + LOG_debug("PWR_detectCPUTopology: policy%d: cpus=%d (mask=0x%x), %d-%d kHz, %d freqs\n", + policy_id, cluster->cpu_count, cluster->cpu_mask, cluster->min_khz, + cluster->max_khz, cluster->freq_count); + + cluster_count++; + } + + if (cluster_count == 0) { + LOG_info("PWR_detectCPUTopology: no clusters detected\n"); + return 0; + } + + // Sort clusters by max_khz ascending (LITTLE → BIG → PRIME) + if (cluster_count > 1) { + qsort(topology->clusters, cluster_count, sizeof(CPUCluster), compare_cluster_by_max_khz); + } + + // Classify clusters (LITTLE/BIG/PRIME) + CPU_classifyClusters(topology->clusters, cluster_count); + + // Log classification results + const char* type_names[] = {"LITTLE", "BIG", "PRIME"}; + for (int i = 0; i < cluster_count; i++) { + CPUCluster* cluster = &topology->clusters[i]; + LOG_info("PWR_detectCPUTopology: cluster %d (policy%d): %s, %d CPUs, %d-%d kHz\n", i, + cluster->policy_id, type_names[cluster->type], cluster->cpu_count, + cluster->min_khz, cluster->max_khz); + } + + topology->cluster_count = cluster_count; + topology->topology_detected = 1; + + LOG_info("PWR_detectCPUTopology: detected %d cluster(s), multi-cluster=%s\n", cluster_count, + (cluster_count > 1) ? "yes" : "no"); + + return cluster_count; +} + +int PWR_setCPUClusterBounds(int policy_id, int min_khz, int max_khz) { + char path[256]; + int result = 0; + + // Write min_freq if specified + if (min_khz > 0) { + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_min_freq", CPUFREQ_BASE_PATH, + policy_id); + FILE* fp = fopen(path, "w"); + if (fp) { + (void)fprintf(fp, "%d\n", min_khz); + (void)fclose(fp); + } else { + LOG_warn("PWR_setCPUClusterBounds: failed to write min_freq for policy%d\n", policy_id); + result = -1; + } + } + + // Write max_freq if specified + if (max_khz > 0) { + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_max_freq", CPUFREQ_BASE_PATH, + policy_id); + FILE* fp = fopen(path, "w"); + if (fp) { + (void)fprintf(fp, "%d\n", max_khz); + (void)fclose(fp); + } else { + LOG_warn("PWR_setCPUClusterBounds: failed to write max_freq for policy%d\n", policy_id); + result = -1; + } + } + + return result; +} + +int PWR_setCPUGovernor(int policy_id, const char* governor) { + if (!governor) { + return -1; + } + + char path[256]; + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_governor", CPUFREQ_BASE_PATH, + policy_id); + + FILE* fp = fopen(path, "w"); + if (!fp) { + LOG_warn("PWR_setCPUGovernor: failed to open %s\n", path); + return -1; + } + + int written = fprintf(fp, "%s\n", governor); + int close_result = fclose(fp); + + if (written < 0 || close_result != 0) { + LOG_warn("PWR_setCPUGovernor: write failed for policy%d governor %s\n", policy_id, + governor); + return -1; + } + + LOG_debug("PWR_setCPUGovernor: set policy%d governor to %s\n", policy_id, governor); + return 0; +} + +int PWR_setLowPowerMode(void) { + int clusters_configured = 0; + + // Enumerate all cpufreq policies and set to powersave + for (int policy_id = 0; policy_id < 16; policy_id++) { + char path[256]; + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_governor", CPUFREQ_BASE_PATH, + policy_id); + + // Check if policy exists + if (access(path, F_OK) != 0) + continue; + + // Set to powersave governor + if (PWR_setCPUGovernor(policy_id, "powersave") == 0) { + clusters_configured++; + } + } + + if (clusters_configured > 0) { + LOG_info("PWR_setLowPowerMode: set %d cluster(s) to powersave\n", clusters_configured); + } else { + // Single-cluster device without cpufreq policies + PLAT_setCPUSpeed(CPU_SPEED_POWERSAVE); + } + + return clusters_configured; +} + +#if defined(__linux__) +#include + +int PWR_setThreadAffinity(int cpu_mask) { + if (cpu_mask <= 0) { + return -1; + } + + cpu_set_t set; + CPU_ZERO(&set); + + for (int cpu = 0; cpu < 32; cpu++) { + if (cpu_mask & (1 << cpu)) { + CPU_SET(cpu, &set); + } + } + + // Set affinity for current thread + if (sched_setaffinity(0, sizeof(set), &set) != 0) { + LOG_warn("PWR_setThreadAffinity: sched_setaffinity failed: %s\n", strerror(errno)); + return -1; + } + + LOG_debug("PWR_setThreadAffinity: set affinity mask to 0x%x\n", cpu_mask); + return 0; +} +#else +// Non-Linux platforms: no-op +int PWR_setThreadAffinity(int cpu_mask) { + (void)cpu_mask; + return 0; +} +#endif + /////////////////////////////// // Platform utility functions /////////////////////////////// diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index de916940..11ecfbcf 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -850,10 +850,31 @@ void SND_resetUnderrunCount(void); /** * Signals start of a new video frame for audio rate control. - * Call once per frame before core.run() to limit integral updates. + * Currently a no-op (pure proportional control). Kept for API compatibility. */ void SND_newFrame(void); +/** + * Callback type for sync mode queries. + * + * Used by audio system to query sync manager for runtime mode decisions. + * + * @return true if the feature should be enabled, false otherwise + */ +typedef bool (*SND_SyncCallback)(void); + +/** + * Configure sync mode callbacks for runtime adaptive behavior. + * + * The audio system uses these callbacks to adapt its behavior based on + * the current sync mode (audio-clock vs vsync). + * + * @param should_use_rate_control Callback returning true if audio rate control should run + * @param should_block_audio Callback returning true if audio writes should block + */ +void SND_setSyncCallbacks(SND_SyncCallback should_use_rate_control, + SND_SyncCallback should_block_audio); + /** * Shuts down the audio subsystem. */ @@ -891,14 +912,11 @@ typedef struct { uint64_t samples_consumed; // Total samples consumed by audio callback uint64_t samples_requested; // Total samples requested by SDL callback - // Rate control parameters (PI controller based on Arntzen algorithm) + // Rate control parameters (proportional control based on Arntzen algorithm) float frame_rate; // Core frame rate (e.g., 60.0988) float rate_adjust; // Dynamic rate control adjustment (1.0 ± d) float total_adjust; // Same as rate_adjust (no separate corrections) - float rate_integral; // PI controller integral term (drift correction) float rate_control_d; // Proportional gain - float rate_control_ki; // Integral gain - float error_avg; // Smoothed error (for debugging integral behavior) // Resampler state int sample_rate_in; // Input sample rate (from core) @@ -1386,6 +1404,32 @@ scaler_t PLAT_getScaler(GFX_Renderer* renderer); */ void PLAT_present(GFX_Renderer* renderer); +/** + * Render debug HUD overlay to display surface (software rendering). + * + * Called by PLAT_present() implementations before buffer flip. + * Weak default does nothing; player provides implementation. + * + * @param surface Final display surface to render HUD onto (RGB565) + */ +FALLBACK_IMPLEMENTATION void PLAT_renderDebugHUD(SDL_Surface* surface); + +/** + * Get debug HUD buffer for GL compositing (hardware rendering). + * + * Called by SDL2_present() GLES path before swap buffers. + * Returns an RGBA8888 buffer that will be composited over the game frame. + * Weak default returns NULL (no HUD); player provides implementation. + * + * @param src_w Source (game) width for HUD text generation + * @param src_h Source (game) height for HUD text generation + * @param screen_w Screen width in pixels + * @param screen_h Screen height in pixels + * @return RGBA8888 pixel buffer (screen_w x screen_h) or NULL if no HUD + */ +FALLBACK_IMPLEMENTATION uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, + int screen_h); + /** * Platform-specific overscan support check. * @@ -1521,6 +1565,73 @@ int PWR_getAvailableCPUFrequencies_sysfs(int* frequencies, int max_count); */ int PWR_setCPUFrequency_sysfs(int freq_khz); +/////////////////////////////// +// Multi-cluster CPU topology support +/////////////////////////////// + +// Forward declarations from cpu.h (avoid circular include) +struct CPUTopology; + +/** + * Detects CPU topology from sysfs. + * + * Enumerates /sys/devices/system/cpu/cpufreq/policy{0,1,...} and reads: + * - related_cpus: Which CPUs belong to this cluster + * - cpuinfo_min_freq / cpuinfo_max_freq: Frequency bounds + * - scaling_available_frequencies: Available frequency steps + * + * Clusters are sorted by max_khz ascending (LITTLE → BIG → PRIME). + * + * @param topology Output structure to populate + * @return Number of clusters found (0 on failure, 1 for single-cluster) + */ +int PWR_detectCPUTopology(struct CPUTopology* topology); + +/** + * Sets frequency bounds for a CPU cluster. + * + * For multi-cluster mode with schedutil governor, writes to: + * - /sys/devices/system/cpu/cpufreq/policy{N}/scaling_min_freq + * - /sys/devices/system/cpu/cpufreq/policy{N}/scaling_max_freq + * + * @param policy_id Policy number (0, 4, 7, etc.) + * @param min_khz Minimum frequency in kHz + * @param max_khz Maximum frequency in kHz (0 = don't change) + * @return 0 on success, -1 on failure + */ +int PWR_setCPUClusterBounds(int policy_id, int min_khz, int max_khz); + +/** + * Sets CPU governor for a cluster. + * + * @param policy_id Policy number (0, 4, 7, etc.) + * @param governor Governor name ("userspace", "schedutil", etc.) + * @return 0 on success, -1 on failure + */ +int PWR_setCPUGovernor(int policy_id, const char* governor); + +/** + * Sets CPU affinity for the current thread. + * + * Uses pthread_setaffinity_np() to restrict thread to specific CPUs. + * + * @param cpu_mask Bitmask of allowed CPUs (bit 0 = CPU0, bit 1 = CPU1, etc.) + * @return 0 on success, -1 on failure + */ +int PWR_setThreadAffinity(int cpu_mask); + +/** + * Sets all CPU clusters to low-power mode. + * + * On multi-cluster devices: sets all cpufreq policies to "powersave" governor. + * On single-cluster devices: uses PLAT_setCPUSpeed(CPU_SPEED_POWERSAVE). + * + * Use this for non-gaming contexts (menus, tools) to save power and reduce heat. + * + * @return Number of clusters configured (0 for single-cluster devices) + */ +int PWR_setLowPowerMode(void); + /** * Platform-specific rumble/vibration control. * diff --git a/workspace/all/common/api_types.h b/workspace/all/common/api_types.h index 1fd7169e..dc008fde 100644 --- a/workspace/all/common/api_types.h +++ b/workspace/all/common/api_types.h @@ -8,6 +8,7 @@ #ifndef __API_TYPES_H__ #define __API_TYPES_H__ +#include #include /////////////////////////////// diff --git a/workspace/all/common/build.mk b/workspace/all/common/build.mk index 5a29dc99..5e9774fd 100644 --- a/workspace/all/common/build.mk +++ b/workspace/all/common/build.mk @@ -67,7 +67,7 @@ include $(COMMON_DIR)/cflags.mk ########################################################### # Paths and sources -INCDIR = -I. -I$(COMMON_DIR)/ -I$(PLATFORM_DIR)/ -I$(PLATFORM_DEPTH)all/player/libretro-common/include -isystem $(PLATFORM_DEPTH)all/vendor/stb $(EXTRA_INCDIR) +INCDIR = -I. -I$(COMMON_DIR)/ -I$(PLATFORM_DIR)/ -I$(PLATFORM_DEPTH)all/player/ -I$(PLATFORM_DEPTH)all/player/libretro-common/include -isystem $(PLATFORM_DEPTH)all/vendor/stb $(EXTRA_INCDIR) COMMON_SOURCE = \ $(COMMON_DIR)/utils.c \ @@ -81,6 +81,7 @@ COMMON_SOURCE = \ $(COMMON_DIR)/scaler.c \ $(COMMON_DIR)/platform_variant.c \ $(COMMON_DIR)/paths.c \ + $(COMMON_DIR)/cpu.c \ $(PLATFORM_DIR)/platform.c # Add shared rendering modules diff --git a/workspace/all/common/cpu.c b/workspace/all/common/cpu.c new file mode 100644 index 00000000..0d428c5b --- /dev/null +++ b/workspace/all/common/cpu.c @@ -0,0 +1,845 @@ +/** + * cpu.c - CPU scaling and topology utilities + * + * Implements CPU topology detection and dynamic frequency scaling. + * Used by both launcher (topology detection) and player (autoscaling). + * + * The autoscaling algorithm uses frame execution time (90th percentile) to + * determine CPU utilization, then adjusts frequency to maintain target. + * + * Key concepts: + * - Performance scales linearly with frequency + * - Boost aggressively (jump to predicted frequency) to avoid stuttering + * - Reduce conservatively (limited steps) to avoid oscillation + * - Panic path on audio underrun with cooldown + */ + +#include "cpu.h" + +#include +#include + +/////////////////////////////// +// Comparison for qsort +/////////////////////////////// + +static int compare_uint64(const void* a, const void* b) { + uint64_t va = *(const uint64_t*)a; + uint64_t vb = *(const uint64_t*)b; + if (va < vb) + return -1; + if (va > vb) + return 1; + return 0; +} + +/////////////////////////////// +// Public Functions +/////////////////////////////// + +void CPU_initConfig(CPUConfig* config) { + config->window_frames = CPU_DEFAULT_WINDOW_FRAMES; + config->util_high = CPU_DEFAULT_UTIL_HIGH; + config->util_low = CPU_DEFAULT_UTIL_LOW; + config->boost_windows = CPU_DEFAULT_BOOST_WINDOWS; + config->reduce_windows = CPU_DEFAULT_REDUCE_WINDOWS; + config->startup_grace = CPU_DEFAULT_STARTUP_GRACE; + config->min_freq_khz = CPU_DEFAULT_MIN_FREQ_KHZ; + config->target_util = CPU_DEFAULT_TARGET_UTIL; + config->max_step_down = CPU_DEFAULT_MAX_STEP_DOWN; + config->panic_step_up = CPU_DEFAULT_PANIC_STEP_UP; + config->min_buffer_for_reduce = CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE; +} + +void CPU_initState(CPUState* state) { + memset(state, 0, sizeof(CPUState)); + // Set sensible defaults + state->frame_budget_us = 16667; // 60fps default +} + +int CPU_findNearestIndex(const int* frequencies, int count, int target_khz) { + if (count <= 0) + return 0; + + int best_idx = 0; + int best_diff = abs(frequencies[0] - target_khz); + + for (int i = 1; i < count; i++) { + int diff = abs(frequencies[i] - target_khz); + if (diff < best_diff) { + best_diff = diff; + best_idx = i; + } + } + return best_idx; +} + +void CPU_detectFrequencies(CPUState* state, const CPUConfig* config, const int* raw_frequencies, + int raw_count) { + // Filter frequencies below minimum threshold + state->freq_count = 0; + for (int i = 0; i < raw_count && state->freq_count < CPU_MAX_FREQUENCIES; i++) { + if (raw_frequencies[i] >= config->min_freq_khz) { + state->frequencies[state->freq_count++] = raw_frequencies[i]; + } + } + + // Disable scaling if only 0 or 1 frequency available (nothing to scale) + if (state->freq_count <= 1) { + state->scaling_disabled = 1; + state->use_granular = 0; + state->frequencies_detected = 1; + return; + } + + state->scaling_disabled = 0; + state->use_granular = 1; + + // Calculate preset indices based on percentage of max frequency + int max_freq = state->frequencies[state->freq_count - 1]; + + // POWERSAVE: 55% of max + int ps_target = max_freq * 55 / 100; + state->preset_indices[CPU_LEVEL_POWERSAVE] = + CPU_findNearestIndex(state->frequencies, state->freq_count, ps_target); + + // NORMAL: 80% of max + int normal_target = max_freq * 80 / 100; + state->preset_indices[CPU_LEVEL_NORMAL] = + CPU_findNearestIndex(state->frequencies, state->freq_count, normal_target); + + // PERFORMANCE: max frequency + state->preset_indices[CPU_LEVEL_PERFORMANCE] = state->freq_count - 1; + + state->frequencies_detected = 1; +} + +void CPU_reset(CPUState* state, const CPUConfig* config, double fps, unsigned current_underruns) { + (void)config; // May be used in future for configurable grace period + + state->frame_count = 0; + state->high_util_windows = 0; + state->low_util_windows = 0; + state->last_underrun = current_underruns; + state->startup_frames = 0; + state->panic_cooldown = 0; + state->frame_time_index = 0; + + // Calculate frame budget from FPS + if (fps > 0) { + state->frame_budget_us = (uint64_t)(1000000.0 / fps); + } else { + state->frame_budget_us = 16667; // Default to 60fps + } + + // Clear frame time buffer + memset(state->frame_times, 0, sizeof(state->frame_times)); +} + +void CPU_recordFrameTime(CPUState* state, uint64_t frame_time_us) { + state->frame_times[state->frame_time_index % CPU_FRAME_BUFFER_SIZE] = frame_time_us; + state->frame_time_index++; +} + +uint64_t CPU_percentile90(const uint64_t* frame_times, int count) { + if (count <= 0) + return 0; + + // Limit to buffer size + if (count > CPU_FRAME_BUFFER_SIZE) + count = CPU_FRAME_BUFFER_SIZE; + + // Copy and sort + uint64_t sorted[CPU_FRAME_BUFFER_SIZE]; + memcpy(sorted, frame_times, count * sizeof(uint64_t)); + qsort(sorted, count, sizeof(uint64_t), compare_uint64); + + // 90th percentile index + int p90_idx = (count * 90) / 100; + if (p90_idx >= count) + p90_idx = count - 1; + + return sorted[p90_idx]; +} + +int CPU_predictFrequency(int current_freq, int current_util, int target_util) { + if (target_util <= 0) + return current_freq; + + // Linear scaling: new_freq = current_freq * current_util / target_util + return current_freq * current_util / target_util; +} + +int CPU_getPresetPercentage(CPULevel level) { + switch (level) { + case CPU_LEVEL_POWERSAVE: + return 55; + case CPU_LEVEL_NORMAL: + return 80; + case CPU_LEVEL_PERFORMANCE: + default: + return 100; + } +} + +int CPU_getPerformancePercent(const CPUState* state) { + if (!state) + return -1; + + if (state->scaling_disabled && !state->use_topology) { + return -1; + } + + if (state->use_topology) { + // Topology mode: normalize state index to 0-100 + int max_state = state->topology.state_count - 1; + if (max_state <= 0) + return 100; + int current = state->current_state; + if (current < 0) + current = state->target_state; + return (current * 100) / max_state; + } else if (state->use_granular) { + // Granular mode: normalize frequency index to 0-100 + int max_idx = state->freq_count - 1; + if (max_idx <= 0) + return 100; + return (state->current_index * 100) / max_idx; + } else { + // Fallback mode: 0=0%, 1=50%, 2=100% + return state->current_level * 50; + } +} + +const char* CPU_getModeName(const CPUState* state) { + if (!state) + return "disabled"; + + if (state->scaling_disabled && !state->use_topology) { + return "disabled"; + } + + if (state->use_topology) { + return "topology"; + } else if (state->use_granular) { + return "granular"; + } else { + return "fallback"; + } +} + +CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, + unsigned current_underruns, unsigned buffer_fill_percent, + CPUResult* result) { + // Initialize result if provided + if (result) { + result->decision = CPU_DECISION_NONE; + result->new_index = state->target_index; + result->new_level = state->target_level; + result->utilization = 0; + result->p90_time = 0; + } + + // Skip if scaling is disabled (0 or 1 frequency available) AND not using topology mode + if (state->scaling_disabled && !state->use_topology) { + if (result) + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; + } + + // Skip during special states + if (fast_forward || show_menu) { + if (result) + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; + } + + // Startup grace period + if (state->startup_frames < config->startup_grace) { + state->startup_frames++; + if (result) + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; + } + + // Decrement panic grace period (ignore underruns after frequency change) + if (state->panic_grace > 0) { + state->panic_grace--; + } + + // Get current indices based on mode + int current_idx = state->target_index; + int current_level = state->target_level; + int current_state_idx = state->target_state; + int max_idx = state->freq_count - 1; + if (max_idx < 0) + max_idx = 0; + int max_state = state->topology.state_count - 1; + if (max_state < 0) + max_state = 0; + + // Check if at max based on mode + bool at_max; + if (state->use_topology) { + at_max = (current_state_idx >= max_state); + } else if (state->use_granular) { + at_max = (current_idx >= max_idx); + } else { + at_max = (current_level >= 2); + } + + // Track underruns during grace period + bool underrun_detected = (current_underruns > state->last_underrun); + if (underrun_detected && state->panic_grace > 0) { + state->grace_underruns++; + } + + // Emergency: check for underruns (panic path) + // Skip if in grace period UNLESS too many underruns (catastrophic failure) + bool grace_exceeded = (state->grace_underruns >= CPU_PANIC_GRACE_MAX_UNDERRUNS); + if (underrun_detected && !at_max && (state->panic_grace == 0 || grace_exceeded)) { + // Underrun detected - boost by panic_step_up + if (state->use_topology) { + int new_state = current_state_idx + config->panic_step_up; + if (new_state > max_state) + new_state = max_state; + state->target_state = new_state; + if (result) { + result->decision = CPU_DECISION_PANIC; + result->new_index = new_state; // Use new_index for state index + } + } else if (state->use_granular) { + int new_idx = current_idx + config->panic_step_up; + if (new_idx > max_idx) + new_idx = max_idx; + state->target_index = new_idx; + if (result) { + result->decision = CPU_DECISION_PANIC; + result->new_index = new_idx; + } + } else { + int new_level = current_level + config->panic_step_up; + if (new_level > 2) + new_level = 2; + state->target_level = new_level; + if (result) { + result->decision = CPU_DECISION_PANIC; + result->new_level = new_level; + } + } + + state->high_util_windows = 0; + state->low_util_windows = 0; + state->stability_streak = 0; + state->panic_cooldown = 8; // ~4 seconds before allowing reduction + state->panic_grace = CPU_PANIC_GRACE_FRAMES; // Ignore underruns while new freq settles + state->grace_underruns = 0; + state->last_underrun = 0; // Reset after handling + + return CPU_DECISION_PANIC; + } + + // Update underrun tracking (even if at max) + if (current_underruns > state->last_underrun) { + state->last_underrun = current_underruns; + } + + // Count frames in current window + state->frame_count++; + + // Check if window is complete + if (state->frame_count < config->window_frames) { + return CPU_DECISION_NONE; + } + + // Calculate 90th percentile frame time + int samples = state->frame_time_index; + if (samples > CPU_FRAME_BUFFER_SIZE) + samples = CPU_FRAME_BUFFER_SIZE; + + if (samples < 5) { + // Not enough samples - reset and wait + state->frame_count = 0; + return CPU_DECISION_NONE; + } + + uint64_t p90_time = CPU_percentile90(state->frame_times, samples); + + // Calculate utilization as percentage of frame budget + unsigned util = 0; + if (state->frame_budget_us > 0) { + util = (unsigned)((p90_time * 100) / state->frame_budget_us); + if (util > 200) + util = 200; // Cap at 200% for sanity + } + + if (result) { + result->utilization = util; + result->p90_time = p90_time; + } + + CPUDecision decision = CPU_DECISION_NONE; + + if (state->use_topology) { + // Topology mode: multi-cluster PerfState scaling + // Decrement panic cooldown + if (state->panic_cooldown > 0) { + state->panic_cooldown--; + } + + if (util > config->util_high) { + // Need more performance + state->high_util_windows++; + state->low_util_windows = 0; + + if (state->high_util_windows >= config->boost_windows && + current_state_idx < max_state) { + // Step up one state at a time (conservative approach for multi-cluster) + int new_state = current_state_idx + 1; + if (new_state > max_state) + new_state = max_state; + + state->target_state = new_state; + state->high_util_windows = 0; + decision = CPU_DECISION_BOOST; + + if (result) { + result->decision = CPU_DECISION_BOOST; + result->new_index = new_state; + } + } + } else if (util < config->util_low) { + // Can reduce power + state->low_util_windows++; + state->high_util_windows = 0; + + // Only reduce if: enough windows, cooldown expired, buffer healthy + bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && + (state->panic_cooldown == 0) && (current_state_idx > 0) && + (buffer_fill_percent >= config->min_buffer_for_reduce); + + if (reduce_ok) { + // Step down one state at a time + int new_state = current_state_idx - config->max_step_down; + if (new_state < 0) + new_state = 0; + + state->target_state = new_state; + state->low_util_windows = 0; + decision = CPU_DECISION_REDUCE; + + if (result) { + result->decision = CPU_DECISION_REDUCE; + result->new_index = new_state; + } + } + } else { + // In sweet spot - reset counters + state->high_util_windows = 0; + state->low_util_windows = 0; + } + } else if (state->use_granular) { + // Granular mode: linear frequency scaling + + // Decrement panic cooldown + if (state->panic_cooldown > 0) { + state->panic_cooldown--; + } + + if (util > config->util_high) { + // Need more performance + state->high_util_windows++; + state->low_util_windows = 0; + + if (state->high_util_windows >= config->boost_windows && current_idx < max_idx) { + // Step up by 1 - simple and predictable + int new_idx = current_idx + 1; + if (new_idx > max_idx) + new_idx = max_idx; + + state->target_index = new_idx; + state->high_util_windows = 0; + state->panic_grace = CPU_PANIC_GRACE_FRAMES; + state->grace_underruns = 0; + decision = CPU_DECISION_BOOST; + + if (result) { + result->decision = CPU_DECISION_BOOST; + result->new_index = new_idx; + } + } + } else if (util < config->util_low) { + // Can reduce power + state->low_util_windows++; + state->high_util_windows = 0; + + // Only reduce if: enough windows, panic cooldown expired, buffer healthy + bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && + (state->panic_cooldown == 0) && (current_idx > 0) && + (buffer_fill_percent >= config->min_buffer_for_reduce); + + if (reduce_ok) { + // Step down by 1 - simple and predictable + int new_idx = current_idx - 1; + + // Skip blocked frequencies + while (new_idx >= 0 && state->panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { + new_idx--; + } + + if (new_idx >= 0) { + state->target_index = new_idx; + state->low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + decision = CPU_DECISION_REDUCE; + + if (result) { + result->decision = CPU_DECISION_REDUCE; + result->new_index = new_idx; + } + } + } + } else { + // In sweet spot - reset counters + state->high_util_windows = 0; + state->low_util_windows = 0; + } + } else { + // Fallback mode: 3-level scaling + + // Decrement panic cooldown + if (state->panic_cooldown > 0) { + state->panic_cooldown--; + } + + if (util > config->util_high) { + state->high_util_windows++; + state->low_util_windows = 0; + } else if (util < config->util_low) { + state->low_util_windows++; + state->high_util_windows = 0; + } else { + state->high_util_windows = 0; + state->low_util_windows = 0; + } + + // Boost if sustained high utilization + if (state->high_util_windows >= config->boost_windows && current_level < 2) { + int new_level = current_level + 1; + state->target_level = new_level; + state->high_util_windows = 0; + state->panic_grace = CPU_PANIC_GRACE_FRAMES; + state->grace_underruns = 0; + decision = CPU_DECISION_BOOST; + + if (result) { + result->decision = CPU_DECISION_BOOST; + result->new_level = new_level; + } + } + + // Reduce if sustained low utilization (and panic cooldown expired, buffer healthy) + if (state->low_util_windows >= config->reduce_windows && current_level > 0 && + state->panic_cooldown == 0 && buffer_fill_percent >= config->min_buffer_for_reduce) { + int new_level = current_level - 1; + state->target_level = new_level; + state->low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + decision = CPU_DECISION_REDUCE; + + if (result) { + result->decision = CPU_DECISION_REDUCE; + result->new_level = new_level; + } + } + } + + // Track stability for panic count decay + // If we reached here, no panic happened during this window + state->stability_streak++; + if (state->stability_streak >= CPU_STABILITY_DECAY_WINDOWS) { + // Earned stability - decay panic counts for current freq and above only + // Being stable at 600MHz proves 800/1000/1200 are fine too, but not 400MHz + for (int i = current_idx; i < state->freq_count; i++) { + if (state->panic_count[i] > 0) { + state->panic_count[i]--; + } + } + state->stability_streak = 0; + } + + // Reset window counter + state->frame_count = 0; + + return decision; +} + +/////////////////////////////// +// Multi-cluster topology functions +/////////////////////////////// + +// Forward declaration for PWR functions (defined in api.c) +extern int PWR_setCPUGovernor(int policy_id, const char* governor); +extern int PWR_setThreadAffinity(int cpu_mask); + +/** + * Returns the governor string for a given governor type. + */ +static const char* governor_name(CPUGovernor gov) { + switch (gov) { + case CPU_GOV_POWERSAVE: + return "powersave"; + case CPU_GOV_SCHEDUTIL: + return "schedutil"; + case CPU_GOV_PERFORMANCE: + return "performance"; + default: + return "schedutil"; + } +} + +void CPU_initTopology(CPUTopology* topology) { + memset(topology, 0, sizeof(CPUTopology)); +} + +int CPU_parseCPUList(const char* str, int* cpu_count) { + if (!str || !cpu_count) { + if (cpu_count) + *cpu_count = 0; + return 0; + } + + int mask = 0; + *cpu_count = 0; + + const char* ptr = str; + while (*ptr) { + // Skip whitespace and commas + while (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == ',') + ptr++; + if (!*ptr) + break; + + // Parse number + int start = 0; + while (*ptr >= '0' && *ptr <= '9') { + start = start * 10 + (*ptr - '0'); + ptr++; + } + + int end = start; + if (*ptr == '-') { + // Range: "0-3" + ptr++; + end = 0; + while (*ptr >= '0' && *ptr <= '9') { + end = end * 10 + (*ptr - '0'); + ptr++; + } + } + + // Add CPUs to mask + for (int cpu = start; cpu <= end && cpu < 32; cpu++) { + if (!(mask & (1 << cpu))) { + mask |= (1 << cpu); + (*cpu_count)++; + } + } + } + + return mask; +} + +void CPU_classifyClusters(CPUCluster* clusters, int count) { + if (!clusters || count <= 0) + return; + + for (int i = 0; i < count; i++) { + CPUCluster* cluster = &clusters[i]; + + if (i == 0) { + // First cluster (lowest max_khz) is always LITTLE + cluster->type = CPU_CLUSTER_LITTLE; + } else if (i == count - 1) { + // Last cluster might be PRIME if single CPU or significantly faster + int prev_max = clusters[i - 1].max_khz; + int freq_gap_percent = 0; + if (prev_max > 0) { + freq_gap_percent = ((cluster->max_khz - prev_max) * 100) / prev_max; + } + + if (cluster->cpu_count == 1 || freq_gap_percent > 10) { + cluster->type = CPU_CLUSTER_PRIME; + } else { + cluster->type = CPU_CLUSTER_BIG; + } + } else { + // Middle clusters are BIG + cluster->type = CPU_CLUSTER_BIG; + } + } +} + +void CPU_pickRepresentativeFreqs(const CPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz) { + if (!cluster || cluster->freq_count <= 0) { + if (low_khz) + *low_khz = 0; + if (mid_khz) + *mid_khz = 0; + if (high_khz) + *high_khz = 0; + return; + } + + // Low: first frequency + if (low_khz) { + *low_khz = cluster->frequencies[0]; + } + + // Mid: middle frequency + if (mid_khz) { + int mid_idx = cluster->freq_count / 2; + *mid_khz = cluster->frequencies[mid_idx]; + } + + // High: last frequency + if (high_khz) { + *high_khz = cluster->frequencies[cluster->freq_count - 1]; + } +} + +/** + * Builds a single PerfState entry using governors instead of frequency bounds. + * + * @param state PerfState to populate + * @param cluster_count Number of clusters in topology + * @param active_cluster_idx Index of the active cluster for this state + * @param clusters Array of cluster info + * @param governor_level 0=powersave, 1=schedutil, 2=performance for active cluster + */ +static void build_perf_state(CPUPerfState* state, int cluster_count, int active_cluster_idx, + const CPUCluster* clusters, int governor_level) { + memset(state, 0, sizeof(*state)); + + state->active_cluster_idx = active_cluster_idx; + state->cpu_affinity_mask = 0; + + // Set governors for all clusters + for (int i = 0; i < cluster_count && i < CPU_MAX_CLUSTERS; i++) { + const CPUCluster* cluster = &clusters[i]; + + if (i == active_cluster_idx) { + // Active cluster: use the specified governor level + switch (governor_level) { + case 0: + state->cluster_governor[i] = CPU_GOV_POWERSAVE; + break; + case 1: + state->cluster_governor[i] = CPU_GOV_SCHEDUTIL; + break; + case 2: + default: + state->cluster_governor[i] = CPU_GOV_PERFORMANCE; + break; + } + // Add active cluster to affinity + state->cpu_affinity_mask |= cluster->cpu_mask; + } else { + // Inactive clusters: powersave (let them idle/sleep) + state->cluster_governor[i] = CPU_GOV_POWERSAVE; + } + } +} + +void CPU_buildPerfStates(CPUState* state, const CPUConfig* config) { + (void)config; // Reserved for future configuration + + CPUTopology* topo = &state->topology; + + if (!topo->topology_detected || topo->cluster_count <= 1) { + // Single-cluster or no topology: don't use PerfState mode + topo->state_count = 0; + state->use_topology = 0; + return; + } + + int cluster_count = topo->cluster_count; + int state_idx = 0; + + // Build states for each cluster tier using governors + // Structure: 3 governor levels per cluster (powersave/schedutil/performance) + // + // Dual-cluster (LITTLE + BIG): + // 0: LITTLE powersave, BIG powersave - lightest workloads + // 1: LITTLE schedutil, BIG powersave - light workloads (kernel finds sweet spot) + // 2: LITTLE performance, BIG powersave - moderate workloads + // 3: BIG powersave, LITTLE powersave - heavier workloads (conserve power) + // 4: BIG schedutil, LITTLE powersave - heavy workloads (kernel scales) + // 5: BIG performance, LITTLE powersave - demanding workloads + // + // Tri-cluster adds 3 more states for PRIME (6-8) + + for (int cluster_idx = 0; cluster_idx < cluster_count && state_idx < CPU_MAX_PERF_STATES; + cluster_idx++) { + // 3 governor levels per cluster + for (int gov_level = 0; gov_level < 3 && state_idx < CPU_MAX_PERF_STATES; gov_level++) { + CPUPerfState* ps = &topo->states[state_idx]; + build_perf_state(ps, cluster_count, cluster_idx, topo->clusters, gov_level); + + // For PRIME cluster, include BIG in affinity (allow scheduler some flexibility) + if (cluster_idx == cluster_count - 1 && cluster_count >= 3 && + topo->clusters[cluster_idx].type == CPU_CLUSTER_PRIME) { + // Add BIG cluster(s) to affinity + for (int i = 1; i < cluster_idx; i++) { + if (topo->clusters[i].type == CPU_CLUSTER_BIG) { + ps->cpu_affinity_mask |= topo->clusters[i].cpu_mask; + } + } + } + + state_idx++; + } + } + + topo->state_count = state_idx; + state->use_topology = 1; + state->target_state = state_idx - 1; // Start at highest (performance on fastest cluster) + state->current_state = -1; // Not yet applied +} + +int CPU_applyPerfState(CPUState* state) { + CPUTopology* topo = &state->topology; + + if (!state->use_topology || topo->state_count <= 0) { + return -1; + } + + int target = state->target_state; + if (target < 0) + target = 0; + if (target >= topo->state_count) + target = topo->state_count - 1; + + CPUPerfState* ps = &topo->states[target]; + int result = 0; + + // Apply governors to each cluster + for (int i = 0; i < topo->cluster_count; i++) { + int policy_id = topo->clusters[i].policy_id; + const char* gov = governor_name(ps->cluster_governor[i]); + + if (PWR_setCPUGovernor(policy_id, gov) != 0) { + result = -1; + } + } + + // Note: pending_affinity is NOT set here to avoid race conditions. + // The caller is responsible for setting pending_affinity under mutex + // after this function returns. See auto_cpu_scaling_thread(). + + // Update current state + state->current_state = target; + + return result; +} diff --git a/workspace/all/common/cpu.h b/workspace/all/common/cpu.h new file mode 100644 index 00000000..ae47b719 --- /dev/null +++ b/workspace/all/common/cpu.h @@ -0,0 +1,439 @@ +/** + * cpu.h - CPU scaling and topology utilities + * + * Provides types and functions for CPU topology detection and dynamic + * frequency scaling. Used by both the launcher (for topology detection) + * and player (for performance-based autoscaling). + * + * Three scaling modes are supported: + * - Topology mode: Multi-cluster SoCs (big.LITTLE, etc.) using PerfState ladder + * - Granular mode: Single-cluster with all available frequencies (linear scaling) + * - Fallback mode: 3 fixed levels (powersave/normal/performance) + * + * Topology mode: + * - Detects CPU clusters via sysfs and builds a performance state ladder + * - Uses GOVERNORS (powersave/schedutil/performance) rather than frequency bounds + * - Works WITH the kernel's frequency scaling instead of fighting it + * - Creates a gradient: 3 states per cluster tier (powersave/schedutil/performance) + * - Progresses: LITTLE tier → BIG tier → PRIME tier (if available) + * - Uses CPU affinity to guide which cluster the emulation thread runs on + * + * Designed for testability with injectable state and callbacks. + */ + +#ifndef __CPU_H__ +#define __CPU_H__ + +#include +#include + +/** + * Maximum number of CPU frequencies that can be detected. + */ +#define CPU_MAX_FREQUENCIES 32 + +/** + * Ring buffer size for frame timing samples. + */ +#define CPU_FRAME_BUFFER_SIZE 64 + +/** + * Default tuning constants. + * These can be overridden via CPUConfig. + */ +#define CPU_DEFAULT_WINDOW_FRAMES 30 // ~500ms at 60fps +#define CPU_DEFAULT_UTIL_HIGH 85 // Boost threshold (%) +#define CPU_DEFAULT_UTIL_LOW 55 // Reduce threshold (%) +#define CPU_DEFAULT_BOOST_WINDOWS 2 // Windows before boost (~1s) +#define CPU_DEFAULT_REDUCE_WINDOWS 4 // Windows before reduce (~2s) +#define CPU_DEFAULT_STARTUP_GRACE 300 // Frames to skip (~5s at 60fps) +#define CPU_DEFAULT_MIN_FREQ_KHZ 0 // No minimum (panic failsafe handles problematic freqs) +#define CPU_DEFAULT_TARGET_UTIL 70 // Target utilization after change +#define CPU_DEFAULT_MAX_STEP_DOWN 1 // Max frequency steps when reducing +#define CPU_DEFAULT_PANIC_STEP_UP 1 // Frequency steps on panic (underrun) +#define CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics +#define CPU_PANIC_GRACE_FRAMES 60 // Frames to ignore underruns after freq change (~1s at 60fps) +#define CPU_PANIC_GRACE_MAX_UNDERRUNS 5 // Max underruns during grace before panic anyway +#define CPU_STABILITY_DECAY_WINDOWS 8 // Stable windows before decaying panic counts (~4s) +// Windows before reduce in audio-clock mode (~4s) +// Higher than normal (4) since util metrics are unreliable +#define CPU_AUDIO_CLOCK_REDUCE_WINDOWS 8 +#define CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE 40 // Min audio buffer % to allow reduce + +/** + * Multi-cluster topology constants. + */ +#define CPU_MAX_CLUSTERS 8 // Maximum CPU clusters (policies) +#define CPU_MAX_PERF_STATES 16 // Maximum performance states in ladder +#define CPU_MAX_FREQS_PER_CLUSTER 16 // Maximum frequencies per cluster + +/** + * Preset level indices. + */ +typedef enum { CPU_LEVEL_POWERSAVE = 0, CPU_LEVEL_NORMAL = 1, CPU_LEVEL_PERFORMANCE = 2 } CPULevel; + +/** + * Cluster type classification based on relative performance. + * Determined by sorting clusters by max_khz and analyzing the distribution. + */ +typedef enum { + CPU_CLUSTER_LITTLE = 0, // Efficiency cores (lowest max_khz) + CPU_CLUSTER_BIG = 1, // Performance cores (middle) + CPU_CLUSTER_PRIME = 2, // Premium core (highest max_khz, often single) +} CPUClusterType; + +/** + * Governor types for PerfState ladder. + * + * Instead of manipulating frequency bounds, we use governors to create + * a gradient of performance levels within each cluster tier: + * - POWERSAVE: runs at minimum frequency (very efficient) + * - SCHEDUTIL: dynamic scaling based on load (balanced) + * - PERFORMANCE: runs at maximum frequency (full power) + */ +typedef enum { + CPU_GOV_POWERSAVE = 0, // Min frequency - for light workloads + CPU_GOV_SCHEDUTIL = 1, // Dynamic scaling - kernel finds sweet spot + CPU_GOV_PERFORMANCE = 2, // Max frequency - for demanding workloads +} CPUGovernor; + +/** + * Information about a single CPU cluster (cpufreq policy). + * Each cluster represents a group of CPUs that share a frequency. + */ +typedef struct { + int policy_id; // Policy number (0, 4, 7, etc. from policyN) + int cpu_mask; // Bitmask of CPUs in this cluster + int cpu_count; // Number of CPUs in cluster + int frequencies[CPU_MAX_FREQS_PER_CLUSTER]; // Available frequencies (kHz, sorted ascending) + int freq_count; // Number of frequencies + int min_khz; // cpuinfo_min_freq + int max_khz; // cpuinfo_max_freq + CPUClusterType type; // LITTLE/BIG/PRIME classification +} CPUCluster; + +/** + * A performance state represents one step in the autoscaler's ladder. + * + * Instead of manipulating frequency bounds, each state specifies: + * - Which cluster is "active" (where the emulation thread should run) + * - What governor to use on each cluster + * - CPU affinity to guide the scheduler + * + * This works WITH the kernel's frequency scaling rather than against it. + */ +typedef struct { + CPUGovernor cluster_governor[CPU_MAX_CLUSTERS]; // Governor per cluster + int cpu_affinity_mask; // Bitmask of CPUs for emulation thread + int active_cluster_idx; // Which cluster is the "active" one +} CPUPerfState; + +/** + * Complete CPU topology information detected from sysfs. + * Populated by PWR_detectCPUTopology() at initialization. + */ +typedef struct CPUTopology { + CPUCluster clusters[CPU_MAX_CLUSTERS]; // Detected clusters (sorted by max_khz) + int cluster_count; // Number of clusters detected + CPUPerfState states[CPU_MAX_PERF_STATES]; // Performance state ladder + int state_count; // Number of states in ladder + int topology_detected; // 1 if detection completed successfully +} CPUTopology; + +/** + * Decision type returned by CPU_update(). + */ +typedef enum { + CPU_DECISION_NONE = 0, // No change needed + CPU_DECISION_BOOST, // Increase frequency/level + CPU_DECISION_REDUCE, // Decrease frequency/level + CPU_DECISION_PANIC, // Emergency boost (underrun detected) + CPU_DECISION_SKIP // Skipped (grace period, menu, etc.) +} CPUDecision; + +/** + * Configuration constants for auto CPU scaling. + */ +typedef struct { + int window_frames; // Frames per monitoring window + unsigned int util_high; // High utilization threshold (%) + unsigned int util_low; // Low utilization threshold (%) + int boost_windows; // Consecutive windows before boost + int reduce_windows; // Consecutive windows before reduce + int startup_grace; // Grace period frames at startup + int min_freq_khz; // Minimum frequency to consider (kHz) + unsigned int target_util; // Target utilization after frequency change + int max_step_down; // Max frequency steps when reducing + int panic_step_up; // Frequency steps on panic (underrun) + unsigned int min_buffer_for_reduce; // Min audio buffer % to allow reduce +} CPUConfig; + +/** + * State for auto CPU scaling. + * All fields can be inspected for testing. + */ +typedef struct { + // Frequency array (populated by detectFrequencies) + int frequencies[CPU_MAX_FREQUENCIES]; // Available frequencies (kHz, sorted low→high) + int freq_count; // Number of valid frequencies + + // Granular mode state + int target_index; // Target frequency index (set by algorithm) + int current_index; // Actually applied frequency index + int preset_indices[3]; // Preset mappings [POWERSAVE, NORMAL, PERFORMANCE] + int use_granular; // 1 if granular mode, 0 for 3-level fallback + + // Fallback mode state (3-level) + int target_level; // Target level (0-2) + int current_level; // Actually applied level + + // Monitoring state + int frame_count; // Frames in current window + int high_util_windows; // Consecutive high-util windows + int low_util_windows; // Consecutive low-util windows + unsigned last_underrun; // Last seen underrun count + int startup_frames; // Frames since start (for grace period) + int panic_cooldown; // Windows to wait after panic + + // Frame timing data + uint64_t frame_times[CPU_FRAME_BUFFER_SIZE]; // Ring buffer of frame times (us) + int frame_time_index; // Current ring buffer position + uint64_t frame_budget_us; // Target frame time (from fps) + + // Flags for frequency detection + int frequencies_detected; // 1 if frequencies have been detected + int scaling_disabled; // 1 if scaling is disabled (0 or 1 frequency available) + + // Per-frequency panic tracking (failsafe for problematic frequencies) + int panic_count[CPU_MAX_FREQUENCIES]; // Count of panics at each frequency + + // Grace period and stability tracking + int panic_grace; // Frames remaining where underruns are ignored after freq change + int grace_underruns; // Underruns accumulated during grace period + int stability_streak; // Consecutive windows without panic (for decay) + + // Multi-cluster topology support + CPUTopology topology; // Detected CPU topology + int target_state; // Target PerfState index (multi-cluster mode) + int current_state; // Currently applied PerfState index + int use_topology; // 1 = multi-cluster mode active + int pending_affinity; // CPU mask to apply from main thread (0 = none pending) +} CPUState; + +/** + * Result of an update operation (for detailed testing). + */ +typedef struct { + CPUDecision decision; // What decision was made + int new_index; // New frequency index (if granular) + int new_level; // New level (if fallback) + unsigned utilization; // Calculated utilization (%) + uint64_t p90_time; // 90th percentile frame time +} CPUResult; + +/** + * Initializes config with default values. + * + * @param config Config to initialize + */ +void CPU_initConfig(CPUConfig* config); + +/** + * Initializes state to empty/zero state. + * + * @param state State to initialize + */ +void CPU_initState(CPUState* state); + +/** + * Finds the index of the nearest frequency to the target. + * + * @param frequencies Array of frequencies in kHz + * @param count Number of frequencies in array + * @param target_khz Target frequency to find + * @return Index of nearest frequency (0 if count <= 0) + */ +int CPU_findNearestIndex(const int* frequencies, int count, int target_khz); + +/** + * Detects available CPU frequencies and initializes granular scaling. + * + * Populates state->frequencies and state->preset_indices based on + * available system frequencies. + * + * @param state State to populate + * @param config Configuration (uses min_freq_khz) + * @param raw_frequencies Array of frequencies from platform + * @param raw_count Number of frequencies from platform + */ +void CPU_detectFrequencies(CPUState* state, const CPUConfig* config, const int* raw_frequencies, + int raw_count); + +/** + * Resets auto CPU state for a new session. + * + * Called when entering auto mode or starting a new game. + * + * @param state State to reset + * @param config Configuration + * @param fps Game's target FPS (for frame budget calculation) + * @param current_underruns Current underrun count from audio system + */ +void CPU_reset(CPUState* state, const CPUConfig* config, double fps, unsigned current_underruns); + +/** + * Records a frame time sample. + * + * Called after each frame with the execution time of core.run(). + * + * @param state State to update + * @param frame_time_us Frame execution time in microseconds + */ +void CPU_recordFrameTime(CPUState* state, uint64_t frame_time_us); + +/** + * Main update function - determines if CPU frequency should change. + * + * Should be called once per frame when in auto mode. + * Returns a decision indicating what action should be taken. + * + * @param state Current state (will be modified) + * @param config Configuration constants + * @param fast_forward True if fast-forwarding (skip scaling) + * @param show_menu True if menu is showing (skip scaling) + * @param current_underruns Current underrun count from audio + * @param buffer_fill_percent Current audio buffer fill (0-100) + * @param result Optional output for detailed result info + * @return Decision type (NONE, BOOST, REDUCE, PANIC, SKIP) + */ +CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, + unsigned current_underruns, unsigned buffer_fill_percent, CPUResult* result); + +/** + * Calculates the recommended frequency for a target utilization. + * + * Uses linear scaling: new_freq = current_freq * current_util / target_util + * + * @param current_freq Current frequency in kHz + * @param current_util Current utilization percentage + * @param target_util Target utilization percentage + * @return Recommended frequency in kHz + */ +int CPU_predictFrequency(int current_freq, int current_util, int target_util); + +/** + * Returns the percentage of max frequency for a preset level. + * + * @param level Preset level (0=POWERSAVE, 1=NORMAL, 2=PERFORMANCE) + * @return Percentage of max frequency (55, 80, or 100) + */ +int CPU_getPresetPercentage(CPULevel level); + +/** + * Returns the current performance level as a normalized percentage (0-100). + * + * Provides a unified view of CPU scaling state regardless of mode: + * - Topology mode: (current_state / (state_count - 1)) * 100 + * - Granular mode: (current_index / (freq_count - 1)) * 100 + * - Fallback mode: current_level * 50 (0=0%, 1=50%, 2=100%) + * + * @param state CPU state to query + * @return Performance level 0-100, or -1 if scaling disabled + */ +int CPU_getPerformancePercent(const CPUState* state); + +/** + * Returns a string describing the current CPU scaling mode. + * + * @param state CPU state to query + * @return "topology", "granular", "fallback", or "disabled" + */ +const char* CPU_getModeName(const CPUState* state); + +/** + * Calculates the 90th percentile of frame times. + * + * @param frame_times Array of frame times + * @param count Number of samples (uses min of count and buffer size) + * @return 90th percentile value + */ +uint64_t CPU_percentile90(const uint64_t* frame_times, int count); + +/////////////////////////////// +// Multi-cluster topology functions +/////////////////////////////// + +/** + * Initializes topology structure to empty state. + * + * @param topology Topology to initialize + */ +void CPU_initTopology(CPUTopology* topology); + +/** + * Builds the PerfState ladder from detected topology. + * + * Creates a progression of performance states using governors: + * - Single-cluster: No states built (use existing frequency array) + * - Dual-cluster: 6 states (LITTLE powersave/schedutil/performance, + * BIG powersave/schedutil/performance) + * - Tri-cluster: 9 states (add PRIME powersave/schedutil/performance) + * + * Each state sets: + * - Active cluster's governor (powersave/schedutil/performance) + * - Inactive clusters to powersave (let them idle) + * - CPU affinity to guide emulation thread to active cluster + * + * @param state CPU state with populated topology.clusters + * @param config Configuration + */ +void CPU_buildPerfStates(CPUState* state, const CPUConfig* config); + +/** + * Applies a PerfState by setting cluster governors and thread affinity. + * + * Called by background thread when target_state != current_state. + * Sets governors on all clusters and queues affinity change for main thread. + * + * @param state CPU state with target_state set + * @return 0 on success, -1 on failure + */ +int CPU_applyPerfState(CPUState* state); + +/** + * Parses a CPU list string (e.g., "0-3" or "0 1 2 3") into a bitmask. + * + * @param str CPU list string from sysfs (e.g., "0-3,5,7-8") + * @param cpu_count Output: number of CPUs in the list + * @return Bitmask of CPUs + */ +int CPU_parseCPUList(const char* str, int* cpu_count); + +/** + * Classifies clusters based on their relative performance. + * + * After clusters are sorted by max_khz, this assigns LITTLE/BIG/PRIME types: + * - clusters[0] = LITTLE + * - clusters[N-1] = PRIME if single CPU or >10% faster than next + * - Middle clusters = BIG + * + * @param clusters Array of clusters (must be sorted by max_khz ascending) + * @param count Number of clusters + */ +void CPU_classifyClusters(CPUCluster* clusters, int count); + +/** + * Picks 3 representative frequencies from a cluster's available frequencies. + * + * Selects low (min), mid (middle), and high (max) frequencies for building + * the PerfState ladder. + * + * @param cluster Cluster with populated frequencies + * @param low_khz Output: low frequency (freqs[0]) + * @param mid_khz Output: mid frequency (freqs[count/2]) + * @param high_khz Output: high frequency (freqs[count-1]) + */ +void CPU_pickRepresentativeFreqs(const CPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz); + +#endif // __CPU_H__ diff --git a/workspace/all/common/defines.h b/workspace/all/common/defines.h index 897a1444..e6960b58 100644 --- a/workspace/all/common/defines.h +++ b/workspace/all/common/defines.h @@ -371,23 +371,25 @@ /** * Audio ring buffer size in samples (stereo frames). - * Controls how much audio is buffered ahead (~85ms at 48kHz with 4096 samples). - * Lower values reduce latency, higher values prevent underruns on slow devices. + * Controls how much audio is buffered ahead (~133ms at 48kHz, ~8 video frames at 60fps). + * Matches RetroArch's 128ms default for handheld devices. + * Provides headroom for CPU frequency scaling and timing variance. * Platforms can override this in platform.h if needed. */ #ifndef SND_BUFFER_SAMPLES -#define SND_BUFFER_SAMPLES 4096 +#define SND_BUFFER_SAMPLES 6400 #endif /** * Rate control proportional gain (d parameter from Arntzen paper). * Controls maximum pitch deviation for buffer level compensation. * Higher values = more aggressive correction, faster response to jitter. - * Paper recommends 0.2-0.5%, but handhelds need 1.0-1.5% due to timing variance. + * Paper recommends 0.2-0.5%, handhelds typically use 0.5-1.0%. + * Set to 0.8% - gentler than previous 1.2% since larger buffer provides more headroom. * Platforms can override this in platform.h if needed. */ #ifndef SND_RATE_CONTROL_D -#define SND_RATE_CONTROL_D 0.012f +#define SND_RATE_CONTROL_D 0.008f #endif /////////////////////////////// diff --git a/workspace/all/common/gl_video.c b/workspace/all/common/gl_video.c index e26e4936..55a5cff4 100644 --- a/workspace/all/common/gl_video.c +++ b/workspace/all/common/gl_video.c @@ -1974,6 +1974,20 @@ void GLVideo_swapBuffers(void) { SDL_GL_SwapWindow(window); } +int GLVideo_setVsync(int enabled) { + if (!gl_state.context_ready) { + return -1; + } + + int result = SDL_GL_SetSwapInterval(enabled ? 1 : 0); + if (result == 0) { + LOG_info("GL video: vsync %s", enabled ? "enabled" : "disabled"); + } else { + LOG_warn("GL video: failed to set vsync: %s", SDL_GetError()); + } + return result; +} + void GLVideo_clear(void) { if (!gl_state.context_ready) { return; diff --git a/workspace/all/common/gl_video.h b/workspace/all/common/gl_video.h index 7be3e112..991835ae 100644 --- a/workspace/all/common/gl_video.h +++ b/workspace/all/common/gl_video.h @@ -284,6 +284,18 @@ void GLVideo_presentSurface(SDL_Surface* surface); */ void GLVideo_swapBuffers(void); +/** + * Set vsync mode for the GL context. + * + * In audio-clock mode, vsync should be disabled so audio blocking + * is the sole timing source. In vsync mode, vsync should be enabled + * for tear-free rendering. + * + * @param enabled 1 to enable vsync, 0 to disable + * @return 0 on success, -1 on failure + */ +int GLVideo_setVsync(int enabled); + /** * Clear the screen to black. * @@ -462,6 +474,11 @@ static inline void GLVideo_presentSurface(SDL_Surface* surface) { static inline void GLVideo_swapBuffers(void) {} +static inline int GLVideo_setVsync(int enabled) { + (void)enabled; + return -1; +} + static inline void GLVideo_clear(void) {} static inline void GLVideo_renderHUD(const uint32_t* pixels, int width, int height, int screen_w, diff --git a/workspace/all/common/render_sdl2.c b/workspace/all/common/render_sdl2.c index cd37c555..d14670b2 100644 --- a/workspace/all/common/render_sdl2.c +++ b/workspace/all/common/render_sdl2.c @@ -243,6 +243,20 @@ SDL_Surface* SDL2_initVideo(SDL2_RenderContext* ctx, int width, int height, SDL_DestroyWindow(ctx->window); return NULL; } + +#if !HAS_OPENGLES + // Create HUD texture for debug overlay (RGBA for alpha blending) + ctx->hud_texture = SDL_CreateTexture(ctx->renderer, SDL_PIXELFORMAT_RGBA8888, + SDL_TEXTUREACCESS_STREAMING, w, h); + if (!ctx->hud_texture) { + LOG_warn("SDL2_initVideo: Failed to create HUD texture: %s", SDL_GetError()); + // Non-fatal, continue without HUD support + } else { + SDL_SetTextureBlendMode(ctx->hud_texture, SDL_BLENDMODE_BLEND); + } + ctx->hud_surface = NULL; // Not used - PLAT_getDebugHUDBuffer provides RGBA buffer +#endif + LOG_debug("SDL2_initVideo: Surfaces created successfully"); // Store dimensions @@ -298,6 +312,9 @@ void SDL2_quitVideo(SDL2_RenderContext* ctx) { SDL_DestroyTexture(ctx->target); if (ctx->effect) SDL_DestroyTexture(ctx->effect); + if (ctx->hud_texture) + SDL_DestroyTexture(ctx->hud_texture); + // Note: hud_surface is unused (set to NULL) - PLAT_getDebugHUDBuffer provides RGBA buffer SDL_DestroyTexture(ctx->texture); // Destroy renderer and window @@ -392,6 +409,15 @@ void SDL2_present(SDL2_RenderContext* ctx, GFX_Renderer* renderer) { // Draw software frame (with effect support) GLVideo_drawSoftwareFrame(&src_rect, &dst_rect, rotation, sharpness, renderer->visual_scale); + + // Render debug HUD overlay if available (player provides implementation) + uint32_t* hud_buf = PLAT_getDebugHUDBuffer(renderer->src_w, renderer->src_h, ctx->device_width, + ctx->device_height); + if (hud_buf) { + GLVideo_renderHUD(hud_buf, ctx->device_width, ctx->device_height, ctx->device_width, + ctx->device_height); + } + GLVideo_swapBuffers(); #else @@ -474,6 +500,19 @@ void SDL2_present(SDL2_RenderContext* ctx, GFX_Renderer* renderer) { } } + // Render debug HUD overlay if available (player provides implementation) + if (ctx->hud_texture) { + uint32_t* hud_buf = PLAT_getDebugHUDBuffer(renderer->src_w, renderer->src_h, + ctx->device_width, ctx->device_height); + if (hud_buf) { + // Upload RGBA buffer to texture + SDL_UpdateTexture(ctx->hud_texture, NULL, hud_buf, + ctx->device_width * (int)sizeof(uint32_t)); + // Composite HUD texture over game (fullscreen, no rotation) + SDL_RenderCopy(ctx->renderer, ctx->hud_texture, NULL, NULL); + } + } + SDL_RenderPresent(ctx->renderer); #endif } diff --git a/workspace/all/common/render_sdl2.h b/workspace/all/common/render_sdl2.h index d65c1426..7e88491d 100644 --- a/workspace/all/common/render_sdl2.h +++ b/workspace/all/common/render_sdl2.h @@ -68,10 +68,12 @@ typedef struct SDL2_RenderContext { SDL_Texture* texture; // Main texture (source resolution) SDL_Texture* target; // Intermediate texture for crisp scaling SDL_Texture* effect; // Effect overlay texture + SDL_Texture* hud_texture; // Debug HUD overlay texture // Surfaces SDL_Surface* buffer; // Wrapper for texture lock (unused pixels pointer) SDL_Surface* screen; // Main screen surface for UI rendering + SDL_Surface* hud_surface; // Debug HUD surface for rendering text // Video dimensions int width; // Current source width diff --git a/workspace/all/launcher/Makefile b/workspace/all/launcher/Makefile index b00c467a..b9ae3563 100644 --- a/workspace/all/launcher/Makefile +++ b/workspace/all/launcher/Makefile @@ -21,10 +21,11 @@ SDL ?= SDL ########################################################### TARGET = launcher -INCDIR = -I. -I../common/ -I../player/libretro-common/include/ -isystem ../vendor/stb/ -I../../$(PLATFORM)/platform/ +INCDIR = -I. -I../common/ -I../player/ -I../player/libretro-common/include/ -isystem ../vendor/stb/ -I../../$(PLATFORM)/platform/ SOURCE = $(TARGET).c ../common/scaler.c ../common/utils.c ../common/nointro_parser.c \ ../common/api.c ../common/ui_layout.c ../common/log.c ../common/pad.c ../common/paths.c \ ../common/gfx_text.c ../common/platform_variant.c ../common/stb_ds_impl.c \ + ../common/cpu.c \ launcher_entry.c launcher_launcher.c directory_index.c launcher_str_compare.c \ launcher_state.c launcher_m3u.c launcher_map.c launcher_file_utils.c launcher_directory.c \ launcher_context.c launcher_navigation.c launcher_thumbnail.c recent_file.c \ diff --git a/workspace/all/launcher/launcher.c b/workspace/all/launcher/launcher.c index fcfe1a2f..e7b9e814 100644 --- a/workspace/all/launcher/launcher.c +++ b/workspace/all/launcher/launcher.c @@ -1772,7 +1772,7 @@ int main(int argc, char* argv[]) { Menu_init(); // Reduce CPU speed for menu browsing (saves power and heat) - PWR_setCPUSpeed(CPU_SPEED_POWERSAVE); + PWR_setLowPowerMode(); PAD_reset(); int dirty = 1; // Set to 1 when screen needs redraw diff --git a/workspace/all/paks/Tools/Clock/src/clock.c b/workspace/all/paks/Tools/Clock/src/clock.c index 8d200f84..043419ae 100644 --- a/workspace/all/paks/Tools/Clock/src/clock.c +++ b/workspace/all/paks/Tools/Clock/src/clock.c @@ -51,7 +51,7 @@ enum { */ int main(int argc, char* argv[]) { Paths_init(); - PWR_setCPUSpeed(CPU_SPEED_IDLE); + PWR_setLowPowerMode(); SDL_Surface* screen = GFX_init(MODE_MAIN); if (screen == NULL) { diff --git a/workspace/all/paks/Tools/Input/src/minput.c b/workspace/all/paks/Tools/Input/src/minput.c index 78dd7236..4deab503 100644 --- a/workspace/all/paks/Tools/Input/src/minput.c +++ b/workspace/all/paks/Tools/Input/src/minput.c @@ -106,7 +106,7 @@ static void blitButton(char* label, SDL_Surface* dst, int pressed, int x, int y, * @return EXIT_SUCCESS on normal exit */ int main(int argc, char* argv[]) { - PWR_setCPUSpeed(CPU_SPEED_IDLE); + PWR_setLowPowerMode(); SDL_Surface* screen = GFX_init(MODE_MAIN); if (screen == NULL) { diff --git a/workspace/all/player/Makefile b/workspace/all/player/Makefile index be17ebfb..9ba61f33 100644 --- a/workspace/all/player/Makefile +++ b/workspace/all/player/Makefile @@ -27,11 +27,11 @@ SOURCE = $(TARGET).c ../common/scaler.c ../common/utils.c ../common/nointro_pars ../common/gfx_text.c ../launcher/launcher_file_utils.c ../common/platform_variant.c \ ../common/paths.c \ player_archive.c player_memory.c player_state.c \ - player_paths.c player_cpu.c player_input.c player_mappings.c \ + player_paths.c ../common/cpu.c player_input.c player_mappings.c \ player_video_convert.c player_rotation.c player_config.c player_context.c \ player_menu.c player_env.c player_game.c player_scaler.c player_core.c \ ../common/gl_video.c \ - frame_pacer.c \ + sync_manager.c \ ../../$(PLATFORM)/platform/platform.c # Add shared rendering modules diff --git a/workspace/all/player/frame_pacer.c b/workspace/all/player/frame_pacer.c deleted file mode 100644 index 73869664..00000000 --- a/workspace/all/player/frame_pacer.c +++ /dev/null @@ -1,73 +0,0 @@ -/** - * frame_pacer.c - Display-agnostic frame pacing implementation - * - * Uses Q16.16 fixed-point arithmetic for precision without float drift. - * Q16.16 means: 16 bits integer, 16 bits fraction (multiply by 65536). - */ - -#include "frame_pacer.h" -#include - -// Platform function we need - declared in api.h but we avoid including it -// to keep frame_pacer testable without SDL dependencies -extern double PLAT_getDisplayHz(void); - -// Q16.16 conversion factor -#define Q16_SHIFT 16 -#define Q16_SCALE 65536.0 - -void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz) { - // Fallback to 60Hz if display_hz detection failed - if (display_hz <= 0.0) { - display_hz = 60.0; - } - - // Convert to Q16.16 fixed-point for precise integer math - // 59.73fps becomes 3,913,359 (59.73 * 65536) - pacer->game_fps_q16 = (int32_t)(game_fps * Q16_SCALE); - pacer->display_hz_q16 = (int32_t)(display_hz * Q16_SCALE); - - // Initialize accumulator to display_hz so first vsync triggers a step - // This avoids showing a black/stale frame on startup - pacer->accumulator = pacer->display_hz_q16; - - // Direct mode if rates are within tolerance - // This handles 59.94fps @ 60Hz, etc. - double diff = fabs(game_fps - display_hz) / display_hz; - pacer->direct_mode = (diff < FRAME_PACER_TOLERANCE); -} - -bool FramePacer_step(FramePacer* pacer) { - // Direct mode: always step - if (pacer->direct_mode) { - return true; - } - - // Bresenham accumulator: check threshold THEN add - // Since we initialized to display_hz, first call will step - if (pacer->accumulator >= pacer->display_hz_q16) { - pacer->accumulator -= pacer->display_hz_q16; - pacer->accumulator += pacer->game_fps_q16; - return true; - } - - // Not enough accumulated - repeat frame - pacer->accumulator += pacer->game_fps_q16; - return false; -} - -void FramePacer_reset(FramePacer* pacer) { - // Reset to display_hz so next vsync triggers a step - pacer->accumulator = pacer->display_hz_q16; -} - -bool FramePacer_isDirectMode(const FramePacer* pacer) { - return pacer->direct_mode; -} - -double FramePacer_getDisplayHz(void) { - // Use platform-provided display Hz directly. - // On SDL2 platforms, this queries SDL_GetCurrentDisplayMode(). - // On SDL1 or platforms where SDL doesn't know, this returns a hardcoded value. - return PLAT_getDisplayHz(); -} diff --git a/workspace/all/player/frame_pacer.h b/workspace/all/player/frame_pacer.h deleted file mode 100644 index 9f4e012f..00000000 --- a/workspace/all/player/frame_pacer.h +++ /dev/null @@ -1,115 +0,0 @@ -/** - * frame_pacer.h - Display-agnostic frame pacing - * - * Decouples emulation timing from display refresh rate using a Bresenham-style - * fixed-point accumulator. Determines each vsync whether to step emulation or - * repeat the previous frame. - * - * Example: 60fps game on 72Hz display - * - Vsync 1: acc >= hz -> step, acc -= hz (first frame always steps) - * - Vsync 2: acc < hz -> repeat - * - Vsync 3: acc >= hz -> step, acc -= hz - * - Result: 5 steps per 6 vsyncs (83.3%) = 60fps - * - * Benefits: - * - Q16.16 fixed-point: no floating-point drift, stable forever - * - Direct mode bypass: zero overhead on 60Hz displays - * - Pure functions: fully testable, no SDL/globals - */ - -#ifndef __FRAME_PACER_H__ -#define __FRAME_PACER_H__ - -#include -#include - -/** - * Tolerance for "exact match" detection (direct mode bypass). - * - * Based on RetroArch's dynamic rate control research (Arntzen, 2012): - * - Audio pitch changes ≤0.5% are inaudible to most listeners - * - RetroArch's audio rate control can compensate for ±2% drift - * - Speed changes ≤2% are imperceptible in gameplay - * - * Using 2% allows direct mode (no frame pacing overhead) when the mismatch - * is small enough for audio rate control to handle without perceptible - * pitch shift. Larger mismatches (e.g., 60fps @ 72Hz = 20%) use Bresenham - * pacing to maintain correct speed with frame repeats. - * - * Examples at 2% tolerance: - * - 59.94fps @ 60Hz → direct mode (0.1% diff) ✓ - * - 60.0fps @ 61Hz → direct mode (1.6% diff) ✓ - * - 50.0fps @ 60Hz → paced mode (16.7% diff) - * - 60.0fps @ 72Hz → paced mode (16.7% diff) - */ -#define FRAME_PACER_TOLERANCE 0.02 - -/** - * Frame pacing state. - * - * Uses Q16.16 fixed-point (multiply by 65536) to preserve fractional precision - * while avoiding floating point drift. For example, 59.73fps becomes 3,913,359. - */ -typedef struct { - int32_t game_fps_q16; // Game FPS in Q16.16 fixed-point - int32_t display_hz_q16; // Display Hz in Q16.16 fixed-point - int32_t accumulator; // Bresenham accumulator (Q16.16) - bool direct_mode; // True if fps ~= hz (skip accumulator) -} FramePacer; - -/** - * Initialize pacer for given game and display rates. - * - * Automatically detects if rates are close enough to use direct mode - * (within FRAME_PACER_TOLERANCE). - * - * Accumulator is initialized to display_hz so the first vsync always - * triggers a step (avoids showing a black/stale frame). - * - * @param pacer Pacer state to initialize - * @param game_fps Game's target FPS (e.g., 60.0, 59.94, 50.0) - * @param display_hz Display refresh rate in Hz (e.g., 60.0, 72.0) - */ -void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz); - -/** - * Call once per vsync. Returns true if emulation should step. - * - * In direct mode, always returns true. - * In paced mode, uses Bresenham accumulator to decide. - * - * @param pacer Pacer state (accumulator will be modified) - * @return true if core.run() should be called, false to repeat last frame - */ -bool FramePacer_step(FramePacer* pacer); - -/** - * Reset accumulator to initial state (display_hz). - * - * Call on game load, state load, or any timing discontinuity. - * Ensures first frame after reset will step. - * - * @param pacer Pacer state to reset - */ -void FramePacer_reset(FramePacer* pacer); - -/** - * Check if pacer is in direct mode. - * - * @param pacer Pacer state - * @return true if direct mode (no pacing needed) - */ -bool FramePacer_isDirectMode(const FramePacer* pacer); - -/** - * Gets display refresh rate for frame pacing. - * - * Calls PLAT_getDisplayHz() which either: - * - Queries SDL_GetCurrentDisplayMode() on SDL2 platforms - * - Returns a hardcoded value for the platform's panel - * - * @return Display Hz (e.g., 60.0, 72.0, 73.0) - */ -double FramePacer_getDisplayHz(void); - -#endif // __FRAME_PACER_H__ diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index f8c7d728..7706c9c8 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -54,9 +54,9 @@ #include #include +#include "../common/cpu.h" #include "api.h" #include "defines.h" -#include "frame_pacer.h" #include "gl_video.h" #include "launcher_file_utils.h" #include "libretro.h" @@ -66,7 +66,6 @@ #include "player_config.h" #include "player_context.h" #include "player_core.h" -#include "player_cpu.h" #include "player_env.h" #include "player_game.h" #include "player_input.h" @@ -80,7 +79,9 @@ #include "player_scaler.h" #include "player_state.h" #include "player_video_convert.h" +#include "render_common.h" #include "scaler.h" +#include "sync_manager.h" #include "utils.h" /////////////////////////////////////// @@ -151,15 +152,16 @@ static int overclock = 3; // CPU speed (0=powersave, 1=normal, 2=performance, 3= // Auto CPU Scaling State (when overclock == 3) // Uses frame timing (core.run() execution time) to dynamically adjust CPU speed. -// State and config are managed via player_cpu.h structs for testability. -static PlayerCPUState auto_cpu_state; -static PlayerCPUConfig auto_cpu_config; +// State and config are managed via cpu.h structs for testability. +static CPUState auto_cpu_state; +static CPUConfig auto_cpu_config; static uint64_t auto_cpu_last_frame_start = 0; // For measuring core.run() time -// Frame Pacing State -// Decouples emulation from display refresh for non-60Hz displays (e.g., M17 @ 72Hz). -// See frame_pacer.h for algorithm details. -static FramePacer frame_pacer; +// Sync Manager State +// Manages audio/video synchronization mode (audio-clock vs vsync). +// Starts in audio-clock mode (safe), switches to vsync if compatible (<1% Hz mismatch). +// See sync_manager.h for details. +static SyncManager sync_manager; // Background thread for applying CPU changes without blocking main loop static pthread_t auto_cpu_thread; @@ -843,11 +845,37 @@ static struct Config config = * Thread safety: Uses auto_cpu_mutex to protect shared state. */ static void* auto_cpu_scaling_thread(void* arg) { - LOG_debug("Auto CPU thread: started (granular=%d, freq_count=%d)\n", - auto_cpu_state.use_granular, auto_cpu_state.freq_count); + LOG_debug("Auto CPU thread: started (topology=%d, granular=%d, freq_count=%d)\n", + auto_cpu_state.use_topology, auto_cpu_state.use_granular, auto_cpu_state.freq_count); while (auto_cpu_thread_running) { - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + // Multi-cluster topology mode: apply PerfState changes + pthread_mutex_lock(&auto_cpu_mutex); + int target_state = auto_cpu_state.target_state; + int current_state = auto_cpu_state.current_state; + pthread_mutex_unlock(&auto_cpu_mutex); + + if (target_state != current_state && target_state >= 0 && + target_state < auto_cpu_state.topology.state_count) { + LOG_debug("Auto CPU: applying PerfState %d/%d\n", target_state, + auto_cpu_state.topology.state_count - 1); + + int result = CPU_applyPerfState(&auto_cpu_state); + if (result != 0) { + LOG_warn("Auto CPU: failed to apply PerfState %d\n", target_state); + } + + // Set pending_affinity under mutex (main thread will apply it) + // This avoids race condition with main thread reading pending_affinity + CPUPerfState* ps = &auto_cpu_state.topology.states[target_state]; + pthread_mutex_lock(&auto_cpu_mutex); + if (ps->cpu_affinity_mask > 0) { + auto_cpu_state.pending_affinity = ps->cpu_affinity_mask; + } + pthread_mutex_unlock(&auto_cpu_mutex); + } + } else if (auto_cpu_state.use_granular) { // Granular frequency mode pthread_mutex_lock(&auto_cpu_mutex); int target_idx = auto_cpu_state.target_index; @@ -980,6 +1008,23 @@ static void auto_cpu_setTargetIndex(int index) { pthread_mutex_unlock(&auto_cpu_mutex); } +/** + * Requests a PerfState change (non-blocking, topology mode). + * + * @param state Target PerfState index + */ +static void auto_cpu_setTargetState(int state) { + int max_state = auto_cpu_state.topology.state_count - 1; + if (state < 0) + state = 0; + if (state > max_state) + state = max_state; + + pthread_mutex_lock(&auto_cpu_mutex); + auto_cpu_state.target_state = state; + pthread_mutex_unlock(&auto_cpu_mutex); +} + /** * Gets the current frequency index (thread-safe). */ @@ -1006,8 +1051,7 @@ static int auto_cpu_getCurrentFrequency(void) { * Wrapper around module function for convenience. */ static int auto_cpu_findNearestIndex(int target_khz) { - return PlayerCPU_findNearestIndex(auto_cpu_state.frequencies, auto_cpu_state.freq_count, - target_khz); + return CPU_findNearestIndex(auto_cpu_state.frequencies, auto_cpu_state.freq_count, target_khz); } /** @@ -1022,6 +1066,54 @@ static int auto_cpu_findNearestIndex(int target_khz) { * - PERFORMANCE: 100% (max frequency) */ static void auto_cpu_detectFrequencies(void) { + // First, try topology detection for multi-cluster SoCs + int cluster_count = PWR_detectCPUTopology(&auto_cpu_state.topology); + + if (cluster_count >= 2) { + // Multi-cluster detected - use topology mode + auto_cpu_state.use_topology = 1; + auto_cpu_state.use_granular = 0; + + // Build the PerfState ladder (3 governor levels per cluster tier) + CPU_buildPerfStates(&auto_cpu_state, &auto_cpu_config); + + // Note: governors are now set by applyPerfState(), not upfront + // This lets each PerfState control its own governor configuration + + LOG_info("Auto CPU: topology mode enabled, %d clusters, %d PerfStates\n", cluster_count, + auto_cpu_state.topology.state_count); + + // Log cluster info + for (int c = 0; c < cluster_count; c++) { + CPUCluster* cluster = &auto_cpu_state.topology.clusters[c]; + const char* type_str = cluster->type == CPU_CLUSTER_PRIME ? "PRIME" + : cluster->type == CPU_CLUSTER_BIG ? "BIG" + : cluster->type == CPU_CLUSTER_LITTLE ? "LITTLE" + : "?"; + LOG_debug("Auto CPU: cluster %d (policy%d): %s, %d CPUs, %d-%d MHz\n", c, + cluster->policy_id, type_str, cluster->cpu_count, cluster->min_khz / 1000, + cluster->max_khz / 1000); + } + + // Log PerfState ladder (governor-based) + static const char* gov_names[] = {"powersave", "schedutil", "performance"}; + for (int s = 0; s < auto_cpu_state.topology.state_count; s++) { + CPUPerfState* ps = &auto_cpu_state.topology.states[s]; + LOG_debug("Auto CPU: PerfState %d: cluster %d, affinity=0x%x\n", s, + ps->active_cluster_idx, ps->cpu_affinity_mask); + for (int c = 0; c < cluster_count; c++) { + int gov = ps->cluster_governor[c]; + const char* gov_str = (gov >= 0 && gov <= 2) ? gov_names[gov] : "?"; + LOG_debug(" cluster %d: %s\n", c, gov_str); + } + } + + return; + } + + // Single-cluster or no topology - fall back to traditional mode + auto_cpu_state.use_topology = 0; + int raw_count = PLAT_getAvailableCPUFrequencies(auto_cpu_state.frequencies, CPU_MAX_FREQUENCIES); @@ -1080,6 +1172,9 @@ static void resetAutoCPUState(void) { auto_cpu_state.startup_frames = 0; auto_cpu_state.frame_time_index = 0; auto_cpu_state.panic_cooldown = 0; + auto_cpu_state.panic_grace = 0; + auto_cpu_state.grace_underruns = 0; + auto_cpu_state.stability_streak = 0; // Reset panic tracking (menu changes may allow lower frequencies to work) memset(auto_cpu_state.panic_count, 0, sizeof(auto_cpu_state.panic_count)); @@ -1103,9 +1198,16 @@ static void resetAutoCPUState(void) { // Note: target/current frequency set by setOverclock() after this call - LOG_info("Auto CPU: enabled, frame budget=%lluus (%.2f fps), granular=%d\n", - (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, - auto_cpu_state.use_granular); + if (auto_cpu_state.use_topology) { + LOG_info("Auto CPU: enabled (topology mode), frame budget=%lluus (%.2f fps), " + "clusters=%d, states=%d\n", + (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, + auto_cpu_state.topology.cluster_count, auto_cpu_state.topology.state_count); + } else { + LOG_info("Auto CPU: enabled, frame budget=%lluus (%.2f fps), granular=%d\n", + (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, + auto_cpu_state.use_granular); + } LOG_debug( "Auto CPU: util thresholds high=%d%% low=%d%%, windows boost=%d reduce=%d, grace=%d\n", auto_cpu_config.util_high, auto_cpu_config.util_low, auto_cpu_config.boost_windows, @@ -1133,7 +1235,21 @@ void setOverclock(int i) { resetAutoCPUState(); // Start at max frequency to avoid startup stutter during grace period // Background thread will scale down as needed after grace period - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + // Multi-cluster mode: start at highest PerfState + int start_state = auto_cpu_state.topology.state_count - 1; + pthread_mutex_lock(&auto_cpu_mutex); + auto_cpu_state.target_state = start_state; + auto_cpu_state.current_state = -1; // Force apply on first thread iteration + pthread_mutex_unlock(&auto_cpu_mutex); + // Apply initial state immediately (thread will maintain it) + CPU_applyPerfState(&auto_cpu_state); + // Apply affinity directly since we're on the main (emulation) thread + CPUPerfState* ps = &auto_cpu_state.topology.states[start_state]; + if (ps->cpu_affinity_mask > 0) { + PWR_setThreadAffinity(ps->cpu_affinity_mask); + } + } else if (auto_cpu_state.use_granular) { int start_idx = auto_cpu_state.preset_indices[2]; // PERFORMANCE - start high int start_freq = auto_cpu_state.frequencies[start_idx]; PLAT_setCPUFrequency(start_freq); @@ -1154,9 +1270,6 @@ void setOverclock(int i) { } } -// Vsync rate for diagnostics (currently unused, would need measurement to populate) -static float current_vsync_hz = 0; - /** * Updates auto CPU scaling based on frame timing (core.run() execution time). * @@ -1164,15 +1277,23 @@ static float current_vsync_hz = 0; * Uses the 90th percentile of frame execution times to determine CPU utilization, * which directly measures emulation performance independent of audio/display timing. * - * Granular Mode Algorithm: - * - Performance scales linearly with frequency - * - Boost: Jump to predicted optimal frequency (no step limit) - * - Reduce: Limited to max_step_down indices to prevent underruns - * - Panic: Boost by panic_step_up on underrun, with cooldown + * Three scaling modes (selected at init based on hardware capabilities): + * - Topology: Multi-cluster CPUs with PerfStates (big.LITTLE) + * - Granular: Single-cluster with fine-grained frequency steps + * - Fallback: Simple 3-level low/medium/high scaling + * + * All modes use the same basic algorithm: + * - Measure utilization as frame_time / frame_budget + * - Boost after sustained high util (>85% for boost_windows) + * - Reduce after sustained low util (<55% for reduce_windows) + * - Panic boost on audio underrun (immediate, with cooldown) * - * Fallback Mode Algorithm (3 levels): - * - Count consecutive high/low util windows - * - Boost after 2 high-util windows (~1s), reduce after 4 low-util windows (~2s) + * Audio Clock mode special handling: + * In Audio Clock sync mode, blocking audio writes make utilization metrics + * unreliable (frame time includes blocking wait). Instead of util-based + * decisions, we use conservative time-based reduction: after 8 stable windows + * (~4s), step down one level. This prevents wasting power while avoiding + * aggressive changes that could cause underruns. */ static void updateAutoCPU(void) { // Skip if not in auto mode or during special states @@ -1192,37 +1313,82 @@ static void updateAutoCPU(void) { pthread_mutex_lock(&auto_cpu_mutex); int current_idx = auto_cpu_state.target_index; int current_level = auto_cpu_state.target_level; + int current_state = auto_cpu_state.target_state; + int pending_affinity = auto_cpu_state.pending_affinity; + auto_cpu_state.pending_affinity = 0; // Clear after reading pthread_mutex_unlock(&auto_cpu_mutex); + // Apply pending affinity from background thread (must be done from main thread) + if (pending_affinity > 0) { + PWR_setThreadAffinity(pending_affinity); + } + + // Decrement panic grace period (ignore underruns after frequency change) + if (auto_cpu_state.panic_grace > 0) { + auto_cpu_state.panic_grace--; + } + // Emergency: check for actual underruns (panic path) + // Skip if in grace period - new frequency needs time to refill audio buffer unsigned underruns = SND_getUnderrunCount(); int max_idx = auto_cpu_state.freq_count - 1; - int at_max = auto_cpu_state.use_granular ? (current_idx >= max_idx) : (current_level >= 2); + int max_state = auto_cpu_state.topology.state_count - 1; + int at_max; + if (auto_cpu_state.use_topology) { + at_max = (current_state >= max_state); + } else if (auto_cpu_state.use_granular) { + at_max = (current_idx >= max_idx); + } else { + at_max = (current_level >= 2); + } + + // Track underruns during grace period + bool underrun_detected = (underruns > auto_cpu_state.last_underrun); + if (underrun_detected && auto_cpu_state.panic_grace > 0) { + auto_cpu_state.grace_underruns++; + } + + // Override grace period if too many underruns (catastrophic failure) + bool grace_exceeded = (auto_cpu_state.grace_underruns >= CPU_PANIC_GRACE_MAX_UNDERRUNS); - if (underruns > auto_cpu_state.last_underrun && !at_max) { + if (underrun_detected && !at_max && (auto_cpu_state.panic_grace == 0 || grace_exceeded)) { // Underrun detected - track panic and boost unsigned audio_fill = SND_getBufferOccupancy(); - // Track panic at current frequency (for failsafe blocking). - // If a frequency can't keep up, all lower frequencies are also blocked - // because lower freq = less CPU throughput = guaranteed worse performance. - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < PLAYER_CPU_MAX_FREQUENCIES) { + // Track panic at current state/frequency (for failsafe blocking). + // If a state can't keep up, it gets blocked after CPU_PANIC_THRESHOLD panics. + if (auto_cpu_state.use_topology && current_state >= 0 && + current_state < CPU_MAX_FREQUENCIES) { + auto_cpu_state.panic_count[current_state]++; + + if (auto_cpu_state.panic_count[current_state] >= CPU_PANIC_THRESHOLD) { + LOG_warn("Auto CPU: BLOCKING state %d after %d panics (audio=%u%%)\n", + current_state, auto_cpu_state.panic_count[current_state], audio_fill); + } + } else if (auto_cpu_state.use_granular && current_idx >= 0 && + current_idx < CPU_MAX_FREQUENCIES) { auto_cpu_state.panic_count[current_idx]++; - if (auto_cpu_state.panic_count[current_idx] >= PLAYER_CPU_PANIC_THRESHOLD) { + if (auto_cpu_state.panic_count[current_idx] >= CPU_PANIC_THRESHOLD) { LOG_warn("Auto CPU: BLOCKING %d kHz and below after %d panics (audio=%u%%)\n", auto_cpu_state.frequencies[current_idx], auto_cpu_state.panic_count[current_idx], audio_fill); // Block this frequency and all below - they can't possibly work // if this one failed (lower freq = strictly less performance) for (int i = 0; i <= current_idx; i++) { - auto_cpu_state.panic_count[i] = PLAYER_CPU_PANIC_THRESHOLD; + auto_cpu_state.panic_count[i] = CPU_PANIC_THRESHOLD; } } } - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + int new_state = current_state + auto_cpu_config.panic_step_up; + if (new_state > max_state) + new_state = max_state; + auto_cpu_setTargetState(new_state); + LOG_warn("Auto CPU: PANIC - underrun, boosting state %d→%d (audio=%u%%)\n", + current_state, new_state, audio_fill); + } else if (auto_cpu_state.use_granular) { int new_idx = current_idx + auto_cpu_config.panic_step_up; if (new_idx > max_idx) new_idx = max_idx; @@ -1231,6 +1397,16 @@ static void updateAutoCPU(void) { auto_cpu_state.frequencies[current_idx], auto_cpu_state.frequencies[new_idx], audio_fill); } else { + // Fallback mode - track panic at current level + if (current_level >= 0 && current_level < 3) { + auto_cpu_state.panic_count[current_level]++; + + if (auto_cpu_state.panic_count[current_level] >= CPU_PANIC_THRESHOLD) { + LOG_warn("Auto CPU: BLOCKING level %d after %d panics (audio=%u%%)\n", + current_level, auto_cpu_state.panic_count[current_level], audio_fill); + } + } + int new_level = current_level + auto_cpu_config.panic_step_up; if (new_level > 2) new_level = 2; @@ -1240,8 +1416,12 @@ static void updateAutoCPU(void) { } auto_cpu_state.high_util_windows = 0; auto_cpu_state.low_util_windows = 0; + auto_cpu_state.stability_streak = 0; // Cooldown: wait 8 windows (~4 seconds) before allowing reduction auto_cpu_state.panic_cooldown = 8; + // Grace period: ignore underruns while new frequency refills audio buffer + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; SND_resetUnderrunCount(); auto_cpu_state.last_underrun = 0; return; @@ -1276,10 +1456,120 @@ static void updateAutoCPU(void) { util = 200; // Cap at 200% for sanity } - if (auto_cpu_state.use_granular) { - // Granular mode: use linear performance scaling to find optimal frequency - // Performance scales linearly with frequency, so: - // new_util = current_util * (current_freq / new_freq) + if (auto_cpu_state.use_topology) { + // Topology mode: step through PerfStates one at a time + // Unlike granular mode, we don't predict - just step conservatively + + // Decrement panic cooldown each window + if (auto_cpu_state.panic_cooldown > 0) { + auto_cpu_state.panic_cooldown--; + } + + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // We use time-based probing: after stability period, try reducing. + // If it causes underruns, panic path will boost back. + // + // Buffer level guides timing: + // - 40-75%: Normal range, reduce after 8 windows (~4s) + // - >75%: High buffer (Hz/fps mismatch), reduce after 16 windows (~8s) + // Longer delay because high buffer means timing is pathological + // - <40%: Don't reduce (need headroom for transition) + auto_cpu_state.low_util_windows++; + unsigned audio_fill = SND_getBufferOccupancy(); + + // Determine required stability windows based on buffer level + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; // 8 windows (~4s) + if (audio_fill > 75) { + // High buffer = pathological timing, be more conservative + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; // 16 windows (~8s) + } + + bool buffer_ok = (audio_fill >= 40); // Lower bound only - need headroom + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_state > 0) { + int new_state = current_state - 1; + // Skip blocked states (too many panics at that state) + while (new_state >= 0 && + auto_cpu_state.panic_count[new_state] >= CPU_PANIC_THRESHOLD) { + new_state--; + } + if (new_state >= 0) { + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE state %d→%d (AC mode, buf=%u%%, wait=%d)\n", + current_state, new_state, audio_fill, required_windows); + } else { + // All lower states blocked, just reset counter + auto_cpu_state.low_util_windows = 0; + } + } + } else if (util > auto_cpu_config.util_high) { + // Need more performance - step up + auto_cpu_state.high_util_windows++; + auto_cpu_state.low_util_windows = 0; + + if (auto_cpu_state.high_util_windows >= auto_cpu_config.boost_windows && + current_state < max_state) { + int new_state = current_state + 1; + auto_cpu_setTargetState(new_state); + auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; + LOG_debug("Auto CPU: BOOST state %d→%d (util=%u%%)\n", current_state, new_state, + util); + } + } else if (util < auto_cpu_config.util_low) { + // Can reduce power - step down + auto_cpu_state.low_util_windows++; + auto_cpu_state.high_util_windows = 0; + + // Only reduce if: enough windows, cooldown expired, buffer healthy + int reduce_ok = + (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && + (auto_cpu_state.panic_cooldown == 0) && (current_state > 0); + + if (reduce_ok) { + // Step down by max_step_down (usually 1) + int new_state = current_state - auto_cpu_config.max_step_down; + if (new_state < 0) + new_state = 0; + // Skip blocked states (too many panics at that state) + while (new_state >= 0 && + auto_cpu_state.panic_count[new_state] >= CPU_PANIC_THRESHOLD) { + new_state--; + } + if (new_state >= 0) { + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, + new_state, util); + } else { + // All lower states blocked + auto_cpu_state.low_util_windows = 0; + } + } + } else { + // In sweet spot - reset counters + auto_cpu_state.high_util_windows = 0; + auto_cpu_state.low_util_windows = 0; + } + + // Sampled debug logging (every 4th window = ~2 seconds) + static int debug_window_count_topo = 0; + if (++debug_window_count_topo >= 4) { + debug_window_count_topo = 0; + SND_Snapshot snap = SND_getSnapshot(); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% state=%d/%d\n", snap.fill_pct, + snap.total_adjust, util, current_state, max_state); + } + } else if (auto_cpu_state.use_granular) { + // Granular mode: step through available frequencies one at a time + // Skips frequencies that have caused repeated underruns (panic-blocked) int current_freq = auto_cpu_state.frequencies[current_idx]; @@ -1288,28 +1578,53 @@ static void updateAutoCPU(void) { auto_cpu_state.panic_cooldown--; } - if (util > auto_cpu_config.util_high) { + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // Use time-based probing with buffer-guided timing. + auto_cpu_state.low_util_windows++; + unsigned audio_fill = SND_getBufferOccupancy(); + + // High buffer = pathological timing, wait longer before probing + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; + if (audio_fill > 75) { + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; + } + + bool buffer_ok = (audio_fill >= 40); + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_idx > 0) { + int new_idx = current_idx - 1; + // Skip blocked frequencies + while (new_idx >= 0 && + auto_cpu_state.panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { + new_idx--; + } + if (new_idx >= 0) { + int new_freq = auto_cpu_state.frequencies[new_idx]; + auto_cpu_setTargetIndex(new_idx); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE %d→%d kHz (AC mode, buf=%u%%, wait=%d)\n", + current_freq, new_freq, audio_fill, required_windows); + } else { + auto_cpu_state.low_util_windows = 0; + } + } + } else if (util > auto_cpu_config.util_high) { // Need more performance - step up auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; if (auto_cpu_state.high_util_windows >= auto_cpu_config.boost_windows && current_idx < max_idx) { - // Find next frequency that would bring util to target (sweet spot) - // Using: new_util = util * (current_freq / new_freq) - // So: new_freq = current_freq * util / target_util - // No step limit - linear scaling prediction is accurate, boost aggressively - int needed_freq = current_freq * (int)util / auto_cpu_config.target_util; - int new_idx = auto_cpu_findNearestIndex(needed_freq); - - // Ensure we actually go higher - if (new_idx <= current_idx) - new_idx = current_idx + 1; - if (new_idx > max_idx) - new_idx = max_idx; - + // Step up by 1 - simple and predictable + int new_idx = current_idx + 1; auto_cpu_setTargetIndex(new_idx); auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; LOG_debug("Auto CPU: BOOST %d→%d kHz (util=%u%%)\n", current_freq, auto_cpu_state.frequencies[new_idx], util); } @@ -1318,47 +1633,29 @@ static void updateAutoCPU(void) { auto_cpu_state.low_util_windows++; auto_cpu_state.high_util_windows = 0; - // Only reduce if: enough consecutive low windows AND panic cooldown expired + // Only reduce if: enough windows, cooldown expired, buffer healthy int reduce_ok = (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && (auto_cpu_state.panic_cooldown == 0) && (current_idx > 0); if (reduce_ok) { - // Find frequency that would bring util up to target (sweet spot) - // new_util = util * (current_freq / new_freq) - // new_freq = current_freq * util / target_util - int needed_freq = current_freq * (int)util / auto_cpu_config.target_util; - int new_idx = auto_cpu_findNearestIndex(needed_freq); - - // Ensure we actually go lower - if (new_idx >= current_idx) - new_idx = current_idx - 1; - if (new_idx < 0) - new_idx = 0; - - // Limit reduction to max_step_down indices at once - if (current_idx - new_idx > auto_cpu_config.max_step_down) { - new_idx = current_idx - auto_cpu_config.max_step_down; - } + // Step down by 1 - simple and predictable + int new_idx = current_idx - 1; - // Skip blocked frequencies - find first unblocked one above new_idx. - // Frequencies get blocked when they cause repeated panics. + // Skip blocked frequencies while (new_idx >= 0 && - auto_cpu_state.panic_count[new_idx] >= PLAYER_CPU_PANIC_THRESHOLD) { - new_idx++; - if (new_idx >= current_idx) { - // All lower frequencies blocked - stay at current - break; - } + auto_cpu_state.panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { + new_idx--; } // Don't reduce if no safe frequency found - if (new_idx >= current_idx) { + if (new_idx < 0) { auto_cpu_state.low_util_windows = 0; } else { int new_freq = auto_cpu_state.frequencies[new_idx]; auto_cpu_setTargetIndex(new_idx); auto_cpu_state.low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow LOG_debug("Auto CPU: REDUCE %d→%d kHz (util=%u%%)\n", current_freq, new_freq, util); } @@ -1374,13 +1671,52 @@ static void updateAutoCPU(void) { if (++debug_window_count >= 4) { debug_window_count = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% freq=%dkHz idx=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_freq, - current_idx, max_idx); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% freq=%dkHz idx=%d/%d\n", + snap.fill_pct, snap.total_adjust, util, current_freq, current_idx, + max_idx); } } else { // Fallback mode: 3-level scaling (original algorithm) - if (util > auto_cpu_config.util_high) { + + // Decrement panic cooldown each window + if (auto_cpu_state.panic_cooldown > 0) { + auto_cpu_state.panic_cooldown--; + } + + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // Use time-based probing with buffer-guided timing. + auto_cpu_state.low_util_windows++; + unsigned audio_fill = SND_getBufferOccupancy(); + + // High buffer = pathological timing, wait longer before probing + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; + if (audio_fill > 75) { + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; + } + + bool buffer_ok = (audio_fill >= 40); + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_level > 0) { + int new_level = current_level - 1; + // Skip blocked levels + while (new_level >= 0 && + auto_cpu_state.panic_count[new_level] >= CPU_PANIC_THRESHOLD) { + new_level--; + } + if (new_level >= 0) { + auto_cpu_setTargetLevel(new_level); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE level %d (AC mode, buf=%u%%, wait=%d)\n", + new_level, audio_fill, required_windows); + } else { + auto_cpu_state.low_util_windows = 0; + } + } + } else if (util > auto_cpu_config.util_high) { auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; } else if (util < auto_cpu_config.util_low) { @@ -1396,9 +1732,8 @@ static void updateAutoCPU(void) { if (++debug_window_count_fallback >= 4) { debug_window_count_fallback = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% level=%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, - current_level); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% level=%d\n", snap.fill_pct, + snap.total_adjust, util, current_level); } // Boost if sustained high utilization @@ -1407,17 +1742,66 @@ static void updateAutoCPU(void) { int new_level = current_level + 1; auto_cpu_setTargetLevel(new_level); auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; LOG_debug("Auto CPU: BOOST level %d (util=%u%%)\n", new_level, util); } - // Reduce if sustained low utilization + // Reduce if sustained low utilization, buffer healthy (respects panic cooldown) if (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows && - current_level > 0) { + auto_cpu_state.panic_cooldown == 0 && current_level > 0) { int new_level = current_level - 1; - auto_cpu_setTargetLevel(new_level); - auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); + // Skip blocked levels + while (new_level >= 0 && + auto_cpu_state.panic_count[new_level] >= CPU_PANIC_THRESHOLD) { + new_level--; + } + if (new_level >= 0) { + auto_cpu_setTargetLevel(new_level); + auto_cpu_state.low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); + } else { + // All lower levels blocked + auto_cpu_state.low_util_windows = 0; + } + } + } + + // Track stability for panic count decay + // If we reached here, no panic happened during this window + auto_cpu_state.stability_streak++; + if (auto_cpu_state.stability_streak >= CPU_STABILITY_DECAY_WINDOWS) { + // Earned stability - decay panic counts for current state/freq and above only + // Being stable at a state proves higher states are fine too, but not lower ones + int decayed = 0; + if (auto_cpu_state.use_topology) { + for (int i = current_state; i < auto_cpu_state.topology.state_count; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + } else if (auto_cpu_state.use_granular) { + for (int i = current_idx; i < auto_cpu_state.freq_count; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + } else { + // Fallback mode: decay for current level and above + for (int i = current_level; i < 3; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + } + if (decayed > 0) { + LOG_debug("Auto CPU: stability earned, decayed %d panic counts\n", decayed); } + auto_cpu_state.stability_streak = 0; } // Reset window counter (frame times stay in ring buffer) @@ -3339,6 +3723,15 @@ static const char* bitmap_font[] = { " 1" "1 1" " 111 ", + ['T'] = "11111" + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 ", ['A'] = " 1 " " 1 1 " "1 1" @@ -3357,6 +3750,24 @@ static const char* bitmap_font[] = { "1 " "1 1" " 111 ", + ['V'] = "1 1" + "1 1" + "1 1" + "1 1" + "1 1" + "1 1" + " 1 1 " + " 1 1 " + " 1 ", + ['+'] = " " + " " + " 1 " + " 1 " + "11111" + " 1 " + " 1 " + " " + " ", }; static void blitBitmapText(char* text, int ox, int oy, uint16_t* data, int stride, int width, int height) { @@ -3474,6 +3885,193 @@ static void blitBitmapTextRGBA(char* text, int ox, int oy, uint32_t* data, int s } } +/** + * Render bitmap text to an RGBA buffer with DP-based scaling. + * + * Similar to blitBitmapTextScaled but for RGBA format used by HW rendering. + * White text with black outline, transparent background, scaled for consistent sizing. + * + * @param text Text to render + * @param ox X position in pixels (negative = right-align from edge) + * @param oy Y position in pixels (negative = bottom-align from edge) + * @param data RGBA8888 pixel buffer + * @param stride Buffer width in pixels (not bytes) + * @param width Total buffer width in pixels + * @param height Total buffer height in pixels + * @param scale Scale factor (e.g., 2 = double size) + */ +static void blitBitmapTextRGBAScaled(char* text, int ox, int oy, uint32_t* data, int stride, + int width, int height, int scale) { + if (scale < 1) + scale = 1; + + const uint32_t RGBA_WHITE = 0xFFFFFFFF; + const uint32_t RGBA_BLACK = 0xFF000000; + + int len = strlen(text); + int base_w = ((CHAR_WIDTH + LETTERSPACING) * len) - 1; + int base_h = CHAR_HEIGHT; + int w = base_w * scale; + int h = base_h * scale; + + // Handle negative offsets (right/bottom alignment) + if (ox < 0) + ox = width - w + ox; + if (oy < 0) + oy = height - h + oy; + + // Bounds check - need scale px margin for outline + if (ox < scale || oy < scale || ox + w + scale > width || oy + h + scale > height) + return; + + data += oy * stride + ox; + + // Top outline rows + for (int outline_y = -scale; outline_y < 0; outline_y++) { + uint32_t* row = data + (ptrdiff_t)outline_y * stride; + for (int x = -scale; x < w + scale; x++) { + row[x] = RGBA_BLACK; + } + } + + // Main text rows with side outlines + for (int y = 0; y < base_h; y++) { + for (int sy = 0; sy < scale; sy++) { + uint32_t* row = data + (ptrdiff_t)(y * scale + sy) * stride; + + // Left outline + for (int x = -scale; x < 0; x++) { + row[x] = RGBA_BLACK; + } + + // Character pixels + int col = 0; + for (int i = 0; i < len; i++) { + const char* c = bitmap_font[(unsigned char)text[i]]; + if (!c) + c = bitmap_font[' ']; + for (int x = 0; x < CHAR_WIDTH; x++) { + int j = y * CHAR_WIDTH + x; + uint32_t color = (c[j] == '1') ? RGBA_WHITE : RGBA_BLACK; + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = color; + } + col++; + } + // Letter spacing + for (int s = 0; s < LETTERSPACING; s++) { + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = RGBA_BLACK; + } + col++; + } + } + + // Right outline + for (int x = 0; x < scale; x++) { + row[w + x] = RGBA_BLACK; + } + } + } + + // Bottom outline rows + for (int outline_y = 0; outline_y < scale; outline_y++) { + uint32_t* row = data + (ptrdiff_t)(h + outline_y) * stride; + for (int x = -scale; x < w + scale; x++) { + row[x] = RGBA_BLACK; + } + } +} + +/** + * Render bitmap text to an RGB565 buffer with DP-based scaling. + * + * Renders text at a scale factor determined by display points, ensuring + * consistent visual size across different screen resolutions. Each character + * is upscaled by repeating pixels. + * + * @param text Text to render + * @param ox X position in pixels (negative = right-align from edge) + * @param oy Y position in pixels (negative = bottom-align from edge) + * @param data RGB565 pixel buffer + * @param stride Buffer width in pixels (not bytes) + * @param width Total buffer width in pixels + * @param height Total buffer height in pixels + * @param scale Scale factor (e.g., 2 = double size) + */ +static void blitBitmapTextScaled(char* text, int ox, int oy, uint16_t* data, int stride, int width, + int height, int scale) { + if (scale < 1) + scale = 1; + + int len = strlen(text); + int base_w = ((CHAR_WIDTH + LETTERSPACING) * len) - 1; + int base_h = CHAR_HEIGHT; + int w = base_w * scale; + int h = base_h * scale; + + // Handle negative offsets (right/bottom alignment) + if (ox < 0) + ox = width - w + ox; + if (oy < 0) + oy = height - h + oy; + + // Bounds check - need scale px margin for outline + if (ox < scale || oy < scale || ox + w + scale > width || oy + h + scale > height) + return; + + // Draw black outline (scale pixels around text) + data += oy * stride + ox; + + // Top outline rows + for (int outline_y = -scale; outline_y < 0; outline_y++) { + uint16_t* row = data + (ptrdiff_t)outline_y * stride; + memset(row - scale, 0, (size_t)(w + 2 * scale) * 2); + } + + // Main text rows with side outlines + for (int y = 0; y < base_h; y++) { + for (int sy = 0; sy < scale; sy++) { + uint16_t* row = data + (ptrdiff_t)(y * scale + sy) * stride; + + // Left outline + memset(row - scale, 0, (size_t)scale * 2); + + // Character pixels + int col = 0; + for (int i = 0; i < len; i++) { + const char* c = bitmap_font[(unsigned char)text[i]]; + if (!c) + c = bitmap_font[' ']; + for (int x = 0; x < CHAR_WIDTH; x++) { + int j = y * CHAR_WIDTH + x; + uint16_t color = (c[j] == '1') ? 0xffff : 0x0000; + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = color; + } + col++; + } + // Letter spacing + for (int s = 0; s < LETTERSPACING; s++) { + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = 0x0000; + } + col++; + } + } + + // Right outline + memset(row + w, 0, (size_t)scale * 2); + } + } + + // Bottom outline rows + for (int outline_y = 0; outline_y < scale; outline_y++) { + uint16_t* row = data + (ptrdiff_t)(h + outline_y) * stride; + memset(row - scale, 0, (size_t)(w + 2 * scale) * 2); + } +} + /////////////////////////////////////// // Performance Counters (needed by HW HUD before video processing section) /////////////////////////////////////// @@ -3488,81 +4086,66 @@ static double use_double = 0; // System CPU usage percentage static uint32_t sec_start = 0; /////////////////////////////////////// -// HW Debug HUD +// Shared Debug HUD Logic /////////////////////////////////////// -// HUD buffer for HW rendering (allocated once, reused) -static uint32_t* hw_hud_buffer = NULL; -static int hw_hud_width = 0; -static int hw_hud_height = 0; +/** + * Debug text strings for HUD display. + * Generated once per frame, used by both SW and HW rendering paths. + */ +typedef struct DebugHUDText { + char top_left[128]; // FPS and CPU % + char top_right[128]; // Source/output resolution + char bottom_left[128]; // CPU mode and buffer fill + char bottom_right[128]; // Output/source resolution +} DebugHUDText; /** - * Render debug HUD overlay for hardware-rendered frames. + * Generate debug HUD text strings. * - * Creates an RGBA surface with the same debug info as the software path - * (FPS, CPU usage, resolution, etc.) and passes it to the HW render module - * for compositing over the game frame. + * Consolidates all debug metric formatting logic used by both software + * and hardware rendering paths. Samples audio buffer fill every 15 frames. * + * @param text Output structure for formatted debug strings * @param src_w Source (game) width in pixels * @param src_h Source (game) height in pixels * @param screen_w Screen width in pixels * @param screen_h Screen height in pixels */ -static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { - // Allocate or resize HUD buffer if needed - if (!hw_hud_buffer || hw_hud_width != screen_w || hw_hud_height != screen_h) { - free(hw_hud_buffer); - hw_hud_buffer = malloc((size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); - if (!hw_hud_buffer) { - LOG_error("Failed to allocate HW HUD buffer"); - return; - } - hw_hud_width = screen_w; - hw_hud_height = screen_h; - } - - // Clear to fully transparent - memset(hw_hud_buffer, 0, (size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); - - int x = 2; - int y = 2; - char debug_text[128]; - - // Calculate scale factor for HW rendering (approximate) - int scale = 1; - if (src_w > 0 && src_h > 0) { - int scale_x = screen_w / src_w; - int scale_y = screen_h / src_h; - scale = (scale_x < scale_y) ? scale_x : scale_y; - if (scale < 1) - scale = 1; - } - - // Get buffer fill (sampled every 15 frames for readability) +static void generateDebugHUDText(DebugHUDText* text, int src_w, int src_h, int screen_w, + int screen_h) { + // Get buffer fill and rate adjustment (sampled every 15 frames for readability) static unsigned fill_display = 0; + static float rate_adj_display = 1.0f; static int sample_count = 0; if (++sample_count >= 15) { sample_count = 0; - fill_display = SND_getBufferOccupancy(); + SND_Snapshot snap = SND_getSnapshot(); + fill_display = snap.fill_pct; + rate_adj_display = snap.rate_adjust; + } + + // Top-left: FPS, sync mode, and rate control adjustment + // Modes: AC = audio clock, VS = vsync with rate control + // Rate adjustment shows audio stretch: >1.0 = running fast, <1.0 = running slow + float rate_pct = (rate_adj_display - 1.0f) * 100.0f; + SyncMode current_mode = SyncManager_getMode(&sync_manager); + if (current_mode == SYNC_MODE_AUDIO_CLOCK) { + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f AC", fps_double); + } else { + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f VS %+.1f%%", fps_double, + rate_pct); } - // Top-left: FPS and system CPU % -#ifdef SYNC_MODE_AUDIOCLOCK - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%% AC", fps_double, (int)use_double); -#else - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%%", fps_double, (int)use_double); -#endif - blitBitmapTextRGBA(debug_text, x, y, hw_hud_buffer, screen_w, screen_w, screen_h); - - // Top-right: Source resolution and scale factor - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i %ix", src_w, src_h, scale); - blitBitmapTextRGBA(debug_text, -x, y, hw_hud_buffer, screen_w, screen_w, screen_h); + // Top-right: Source resolution + (void)snprintf(text->top_right, sizeof(text->top_right), "%ix%i", src_w, src_h); // Bottom-left: CPU info + buffer fill if (overclock == 3) { - // Auto CPU mode: show frequency/level, utilization, and buffer fill + // Auto CPU mode: show mode-specific info, utilization, and buffer fill pthread_mutex_lock(&auto_cpu_mutex); int current_idx = auto_cpu_state.current_index; + int current_state = auto_cpu_state.current_state; int level = auto_cpu_state.current_level; pthread_mutex_unlock(&auto_cpu_mutex); @@ -3578,29 +4161,124 @@ static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { util = 200; } - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < auto_cpu_state.freq_count) { + if (auto_cpu_state.use_topology) { + // Topology mode: show state/max and performance % + int perf_pct = CPU_getPerformancePercent(&auto_cpu_state); + int max_state = auto_cpu_state.topology.state_count - 1; + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), + "T%i/%i %i%% u:%u%% b:%u%%", current_state, max_state, perf_pct, util, + fill_display); + } else if (auto_cpu_state.use_granular && current_idx >= 0 && + current_idx < auto_cpu_state.freq_count) { // Granular mode: show frequency in MHz int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; - (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, - fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "%i u:%u%% b:%u%%", + freq_mhz, util, fill_display); } else { // Fallback mode: show level - (void)snprintf(debug_text, sizeof(debug_text), "L%i u:%u%% b:%u%%", level, util, - fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "L%i u:%u%% b:%u%%", level, + util, fill_display); } } else { // Manual mode: show level and buffer fill - (void)snprintf(debug_text, sizeof(debug_text), "L%i b:%u%%", overclock, fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "L%i b:%u%%", overclock, + fill_display); } - blitBitmapTextRGBA(debug_text, x, -y, hw_hud_buffer, screen_w, screen_w, screen_h); // Bottom-right: Output resolution - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i", screen_w, screen_h); - blitBitmapTextRGBA(debug_text, -x, -y, hw_hud_buffer, screen_w, screen_w, screen_h); + (void)snprintf(text->bottom_right, sizeof(text->bottom_right), "%ix%i", screen_w, screen_h); +} + +/////////////////////////////////////// +// HW Debug HUD +/////////////////////////////////////// + +// HUD buffer for HW rendering (allocated once, reused) +static uint32_t* hw_hud_buffer = NULL; +static int hw_hud_width = 0; +static int hw_hud_height = 0; + +/** + * Build debug HUD into RGBA buffer for GL compositing. + * + * Allocates/resizes buffer as needed and renders debug text. + * Does NOT call GLVideo_renderHUD - caller handles compositing. + * + * @param src_w Source (game) width in pixels + * @param src_h Source (game) height in pixels + * @param screen_w Screen width in pixels + * @param screen_h Screen height in pixels + * @return RGBA8888 buffer or NULL on allocation failure + */ +static uint32_t* buildDebugHUDBuffer(int src_w, int src_h, int screen_w, int screen_h) { + // Allocate or resize HUD buffer if needed + if (!hw_hud_buffer || hw_hud_width != screen_w || hw_hud_height != screen_h) { + free(hw_hud_buffer); + hw_hud_buffer = malloc((size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); + if (!hw_hud_buffer) { + LOG_error("Failed to allocate HW HUD buffer"); + hw_hud_width = 0; + hw_hud_height = 0; + return NULL; + } + hw_hud_width = screen_w; + hw_hud_height = screen_h; + } - // Pass HUD to HW renderer for compositing - GLVideo_renderHUD(hw_hud_buffer, screen_w, screen_h, screen_w, screen_h); + // Clear to fully transparent + memset(hw_hud_buffer, 0, (size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); + + // Generate debug text using shared logic + DebugHUDText text; + generateDebugHUDText(&text, src_w, src_h, screen_w, screen_h); + + // Calculate text scale based on screen height for consistent proportions + // Using screen_h/180 gives ~3-4% of screen height across devices: + // - 480px: 2x (16px = 3.3%) + // - 560px: 3x (24px = 4.3%) + // - 720px: 4x (32px = 4.4%) + // This avoids integer quantization issues with DP-based calculation + int text_scale = screen_h / 180; + if (text_scale < 1) + text_scale = 1; + if (text_scale > 6) + text_scale = 6; + + // Debug: log HUD rendering parameters (GL path, once) + static int logged_gl = 0; + if (!logged_gl) { + LOG_info("Debug HUD GL: buffer=%dx%d, text_scale=%dx (%dpx, %.1f%% of screen)\n", screen_w, + screen_h, text_scale, text_scale * CHAR_HEIGHT, + 100.0f * text_scale * CHAR_HEIGHT / screen_h); + logged_gl = 1; + } + + // Offset from screen edges (proportional to text size) + int margin = text_scale * 2; + + // Render all four corners + blitBitmapTextRGBAScaled(text.top_left, margin, margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.top_right, -margin, margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.bottom_left, margin, -margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.bottom_right, -margin, -margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + + return hw_hud_buffer; +} + +/** + * Get debug HUD buffer for GL compositing (PLAT hook implementation). + * + * Called by SDL2_present() GLES path. Returns RGBA buffer to be + * composited over the game frame via GLVideo_renderHUD(). + */ +uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, int screen_h) { + if (!show_debug) + return NULL; + return buildDebugHUDBuffer(src_w, src_h, screen_w, screen_h); } /** @@ -3613,6 +4291,66 @@ static void cleanupHWDebugHUD(void) { hw_hud_height = 0; } +/////////////////////////////////////// +// SW Debug HUD +/////////////////////////////////////// + +/** + * Render debug HUD to an RGB565 surface (PLAT hook implementation). + * + * Called by PLAT_present() implementations before buffer flip. + * Uses screen-proportional scaling for consistent sizing across platforms. + * + * @param surface SDL surface to render to (must be RGB565 format) + */ +void PLAT_renderDebugHUD(SDL_Surface* surface) { + if (!show_debug || !surface || !surface->pixels) + return; + + int screen_w = surface->w; + int screen_h = surface->h; + int pitch_in_pixels = surface->pitch / sizeof(uint16_t); + uint16_t* pixels = (uint16_t*)surface->pixels; + + // Generate debug text using shared logic + DebugHUDText text; + generateDebugHUDText(&text, renderer.src_w, renderer.src_h, screen_w, screen_h); + + // Calculate text scale based on screen height for consistent proportions + // Using screen_h/180 gives ~3-4% of screen height across devices: + // - 480px: 2x (16px = 3.3%) + // - 560px: 3x (24px = 4.3%) + // - 720px: 4x (32px = 4.4%) + // This avoids integer quantization issues with DP-based calculation + int text_scale = screen_h / 180; + if (text_scale < 1) + text_scale = 1; + if (text_scale > 6) + text_scale = 6; + + // Debug: log HUD rendering parameters (SW path, once) + static int logged_sw = 0; + if (!logged_sw) { + LOG_info("Debug HUD SW: surface=%dx%d, text_scale=%dx (%dpx, %.1f%% of screen)\n", screen_w, + screen_h, text_scale, text_scale * CHAR_HEIGHT, + 100.0f * text_scale * CHAR_HEIGHT / screen_h); + logged_sw = 1; + } + + // Offset from screen edges (proportional to text size) + int margin = text_scale * 2; + + // Render all four corners + blitBitmapTextScaled(text.top_left, margin, margin, pixels, pitch_in_pixels, screen_w, screen_h, + text_scale); + blitBitmapTextScaled(text.top_right, -margin, margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); + blitBitmapTextScaled(text.bottom_left, margin, -margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); + blitBitmapTextScaled(text.bottom_right, -margin, -margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); +} + /////////////////////////////////////// // Video Processing /////////////////////////////////////// @@ -3811,110 +4549,6 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign } renderer.src = rotated_data; - - // debug - render after pixel conversion so we write to RGB565 buffer - if (show_debug) { - int x = 2 + renderer.src_x; - int y = 2 + renderer.src_y; - char debug_text[128]; - int scale = renderer.scale; - if (scale == -1) - scale = 1; // nearest neighbor flag - - // Debug text rendering needs correct buffer dimensions and pitch. - // blitBitmapText expects pitch in pixels (uint16_t), not bytes. - // - // After 90°/270° rotation, the buffer dimensions are swapped (width becomes height - // and vice versa) because the image has been rotated. We detect this by checking if - // rotated_data != frame_data (indicating rotation was actually applied). - // - // blitBitmapText needs the post-rotation dimensions to correctly bounds-check text - // rendering, and the rotation buffer's pitch instead of the original pitch. - int pitch_in_pixels; - int debug_width = width; - int debug_height = height; - - if (rotated_data != frame_data) { - // Use rotation buffer pitch when rotation was applied - pitch_in_pixels = PlayerRotation_getBuffer()->pitch / sizeof(uint16_t); - if (video_state.rotation == ROTATION_90 || video_state.rotation == ROTATION_270) { - // Swap dimensions for 90°/270° rotations - debug_width = height; - debug_height = width; - } - } else { - // Use original pitch when rotation was skipped - pitch_in_pixels = rgb565_pitch / sizeof(uint16_t); - } - - // Get buffer fill (sampled every 15 frames for readability) - static unsigned fill_display = 0; - static int sample_count = 0; - if (++sample_count >= 15) { - sample_count = 0; - fill_display = SND_getBufferOccupancy(); - } - - // Top-left: FPS and system CPU % -#ifdef SYNC_MODE_AUDIOCLOCK - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%% AC", fps_double, - (int)use_double); -#else - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%%", fps_double, - (int)use_double); -#endif - blitBitmapText(debug_text, x, y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Top-right: Source resolution and scale factor - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i %ix", renderer.src_w, renderer.src_h, - scale); - blitBitmapText(debug_text, -x, y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Bottom-left: CPU info + buffer fill (always), plus utilization when auto - if (overclock == 3) { - // Auto CPU mode: show frequency/level, utilization, and buffer fill - pthread_mutex_lock(&auto_cpu_mutex); - int current_idx = auto_cpu_state.current_index; - int level = auto_cpu_state.current_level; - pthread_mutex_unlock(&auto_cpu_mutex); - - // Calculate current utilization from most recent frame times - unsigned util = 0; - int samples = (auto_cpu_state.frame_time_index < auto_cpu_config.window_frames) - ? auto_cpu_state.frame_time_index - : auto_cpu_config.window_frames; - if (samples >= 5 && auto_cpu_state.frame_budget_us > 0) { - uint64_t p90 = percentileUint64(auto_cpu_state.frame_times, samples, 0.90f); - util = (unsigned)((p90 * 100) / auto_cpu_state.frame_budget_us); - if (util > 200) - util = 200; - } - - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < auto_cpu_state.freq_count) { - // Granular mode: show frequency in MHz (e.g., "1200" for 1200 MHz) - int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; - (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, - fill_display); - } else { - // Fallback mode: show level - (void)snprintf(debug_text, sizeof(debug_text), "L%i u:%u%% b:%u%%", level, util, - fill_display); - } - } else { - // Manual mode: show level and buffer fill (overclock 0/1/2 maps to L0/L1/L2) - (void)snprintf(debug_text, sizeof(debug_text), "L%i b:%u%%", overclock, fill_display); - } - blitBitmapText(debug_text, x, -y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Bottom-right: Output resolution - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i", renderer.dst_w, renderer.dst_h); - blitBitmapText(debug_text, -x, -y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - } renderer.dst = screen->pixels; // LOG_info("video_refresh_callback: %ix%i@%i %ix%i@%i",width,height,pitch,screen->w,screen->h,screen->pitch); @@ -3949,8 +4583,10 @@ void video_refresh_callback(const void* data, unsigned width, unsigned height, s core.aspect_ratio, renderer.visual_scale); // Render debug HUD overlay if enabled - if (show_debug) { - renderHWDebugHUD((int)width, (int)height, DEVICE_WIDTH, DEVICE_HEIGHT); + uint32_t* hud = + PLAT_getDebugHUDBuffer((int)width, (int)height, DEVICE_WIDTH, DEVICE_HEIGHT); + if (hud) { + GLVideo_renderHUD(hud, DEVICE_WIDTH, DEVICE_HEIGHT, DEVICE_WIDTH, DEVICE_HEIGHT); } // Swap buffers to display the frame @@ -5444,7 +6080,7 @@ static void Menu_saveState(void) { static void Menu_loadState(void) { PlayerMenu_loadState(PlayerContext_get()); - FramePacer_reset(&frame_pacer); // Reset accumulator after state load + // Note: Sync manager doesn't need reset after state load (no persistent accumulator) } static void Menu_scale(SDL_Surface* src, SDL_Surface* dst) { @@ -5717,12 +6353,135 @@ static void showFatalError(void) { } } -// Main loop implementation selected at compile-time based on sync mode -#ifdef SYNC_MODE_AUDIOCLOCK -#include "player_loop_audioclock.inc" -#else -#include "player_loop_vsync.inc" -#endif +// Sync mode callbacks for audio system +static bool sync_shouldUseRateControl(void) { + return SyncManager_shouldUseRateControl(&sync_manager); +} + +static bool sync_shouldBlockAudio(void) { + return SyncManager_shouldBlockAudio(&sync_manager); +} + +/** + * Unified main loop with runtime-adaptive sync mode. + */ +static void run_main_loop(void) { + double display_hz = PLAT_getDisplayHz(); + SyncManager_init(&sync_manager, core.fps, display_hz); + SND_setSyncCallbacks(sync_shouldUseRateControl, sync_shouldBlockAudio); + + // Set vsync based on sync mode: + // - Audio-clock mode: disable vsync so audio blocking is the sole timing source + // - Vsync mode: enable vsync for tear-free rendering with display-driven timing + bool use_vsync = (SyncManager_getMode(&sync_manager) != SYNC_MODE_AUDIO_CLOCK); + GLVideo_setVsync(use_vsync ? 1 : 0); + + LOG_info("Starting main loop: %.2ffps @ %.1fHz (mode: %s, vsync=%s)\n", core.fps, display_hz, + SyncManager_getModeName(SyncManager_getMode(&sync_manager)), use_vsync ? "on" : "off"); + + PWR_warn(1); + PWR_disableAutosleep(); + + GFX_clearAll(); + GFX_present(NULL); + + LOG_debug("Special_init"); + Special_init(); + + LOG_debug("Entering main loop"); + sec_start = SDL_GetTicks(); + + while (!quit) { + GFX_startFrame(); + input_polled_this_frame = 0; + + int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; + + for (int run = 0; run < runs_this_vsync; run++) { + bool should_run_core = + !show_menu && + ((run == 0) ? (fast_forward || SyncManager_shouldRunCore(&sync_manager)) + : fast_forward); + + if (should_run_core) { + if (video_state.frame_time_cb) { + retro_usec_t frame_now = getMicroseconds(); + retro_usec_t delta; + if (fast_forward) { + delta = video_state.frame_time_ref; + } else { + if (video_state.frame_time_last == 0) { + delta = video_state.frame_time_ref; + } else { + delta = frame_now - video_state.frame_time_last; + } + video_state.frame_time_last = frame_now; + } + video_state.frame_time_cb(delta); + } + + if (core.audio_buffer_status) { + if (fast_forward) { + core.audio_buffer_status(false, 0, false); + } else { + unsigned occupancy = SND_getBufferOccupancy(); + core.audio_buffer_status(true, occupancy, occupancy < 25); + } + } + + if (!fast_forward) { + SND_newFrame(); + } + + uint64_t frame_start = getMicroseconds(); + GLVideo_bindFBO(); + core.run(); + uint64_t frame_time = getMicroseconds() - frame_start; + + if (overclock == 3 && !fast_forward && !show_menu) { + auto_cpu_state + .frame_times[auto_cpu_state.frame_time_index % CPU_FRAME_BUFFER_SIZE] = + frame_time; + auto_cpu_state.frame_time_index++; + } + } + } + + if (!GLVideo_isEnabled()) { + GFX_present(&renderer); + frame_ready_for_flip = 0; + } + + SyncManager_recordVsync(&sync_manager); + + // Update vsync if sync mode changed (e.g., audio-clock → vsync transition) + { + static SyncMode prev_mode = SYNC_MODE_AUDIO_CLOCK; + SyncMode curr_mode = SyncManager_getMode(&sync_manager); + if (curr_mode != prev_mode) { + int vsync_enabled = (curr_mode != SYNC_MODE_AUDIO_CLOCK) ? 1 : 0; + GLVideo_setVsync(vsync_enabled); + prev_mode = curr_mode; + } + } + + limitFF(); + trackFPS(); + updateAutoCPU(); + + input_poll_callback(); + + if (show_menu) { + Menu_loop(); + + if (GLVideo_isEnabled()) { + GLVideo_bindFBO(); + } + } + + hdmimon(); + } +} int main(int argc, char* argv[]) { // Initialize logging early (reads LOG_FILE and LOG_SYNC from environment) @@ -5791,8 +6550,8 @@ int main(int argc, char* argv[]) { PlayerContext_initCallbacks(ctx, &callbacks); // Initialize auto CPU scaling config with defaults - PlayerCPU_initConfig(&auto_cpu_config); - PlayerCPU_initState(&auto_cpu_state); + CPU_initConfig(&auto_cpu_config); + CPU_initState(&auto_cpu_state); setOverclock(overclock); // default to normal // force a stack overflow to ensure asan is linked and actually working diff --git a/workspace/all/player/player_context.h b/workspace/all/player/player_context.h index ac48439c..58cbed09 100644 --- a/workspace/all/player/player_context.h +++ b/workspace/all/player/player_context.h @@ -29,7 +29,7 @@ #include #include -#include "player_cpu.h" +#include "../common/cpu.h" #include "player_internal.h" // Forward declaration for SDL_Surface (avoids pulling in SDL headers) @@ -196,8 +196,8 @@ typedef struct PlayerContext { //---------------------------------- // Auto CPU scaling //---------------------------------- - PlayerCPUState* auto_cpu_state; - PlayerCPUConfig* auto_cpu_config; + CPUState* auto_cpu_state; + CPUConfig* auto_cpu_config; //---------------------------------- // Multi-disc support diff --git a/workspace/all/player/player_cpu.c b/workspace/all/player/player_cpu.c deleted file mode 100644 index f6eaa6df..00000000 --- a/workspace/all/player/player_cpu.c +++ /dev/null @@ -1,422 +0,0 @@ -/** - * player_cpu.c - Auto CPU scaling utilities - * - * Implements dynamic CPU frequency scaling based on emulation performance. - * Extracted from player.c for testability. - * - * The algorithm uses frame execution time (90th percentile) to determine - * CPU utilization, then adjusts frequency to maintain target utilization. - * - * Key concepts: - * - Performance scales linearly with frequency - * - Boost aggressively (jump to predicted frequency) to avoid stuttering - * - Reduce conservatively (limited steps) to avoid oscillation - * - Panic path on audio underrun with cooldown - */ - -#include "player_cpu.h" - -#include -#include - -/////////////////////////////// -// Comparison for qsort -/////////////////////////////// - -static int compare_uint64(const void* a, const void* b) { - uint64_t va = *(const uint64_t*)a; - uint64_t vb = *(const uint64_t*)b; - if (va < vb) - return -1; - if (va > vb) - return 1; - return 0; -} - -/////////////////////////////// -// Public Functions -/////////////////////////////// - -void PlayerCPU_initConfig(PlayerCPUConfig* config) { - config->window_frames = PLAYER_CPU_DEFAULT_WINDOW_FRAMES; - config->util_high = PLAYER_CPU_DEFAULT_UTIL_HIGH; - config->util_low = PLAYER_CPU_DEFAULT_UTIL_LOW; - config->boost_windows = PLAYER_CPU_DEFAULT_BOOST_WINDOWS; - config->reduce_windows = PLAYER_CPU_DEFAULT_REDUCE_WINDOWS; - config->startup_grace = PLAYER_CPU_DEFAULT_STARTUP_GRACE; - config->min_freq_khz = PLAYER_CPU_DEFAULT_MIN_FREQ_KHZ; - config->target_util = PLAYER_CPU_DEFAULT_TARGET_UTIL; - config->max_step_down = PLAYER_CPU_DEFAULT_MAX_STEP_DOWN; - config->panic_step_up = PLAYER_CPU_DEFAULT_PANIC_STEP_UP; -} - -void PlayerCPU_initState(PlayerCPUState* state) { - memset(state, 0, sizeof(PlayerCPUState)); - // Set sensible defaults - state->frame_budget_us = 16667; // 60fps default -} - -int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz) { - if (count <= 0) - return 0; - - int best_idx = 0; - int best_diff = abs(frequencies[0] - target_khz); - - for (int i = 1; i < count; i++) { - int diff = abs(frequencies[i] - target_khz); - if (diff < best_diff) { - best_diff = diff; - best_idx = i; - } - } - return best_idx; -} - -void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* config, - const int* raw_frequencies, int raw_count) { - // Filter frequencies below minimum threshold - state->freq_count = 0; - for (int i = 0; i < raw_count && state->freq_count < PLAYER_CPU_MAX_FREQUENCIES; i++) { - if (raw_frequencies[i] >= config->min_freq_khz) { - state->frequencies[state->freq_count++] = raw_frequencies[i]; - } - } - - // Disable scaling if only 0 or 1 frequency available (nothing to scale) - if (state->freq_count <= 1) { - state->scaling_disabled = 1; - state->use_granular = 0; - state->frequencies_detected = 1; - return; - } - - state->scaling_disabled = 0; - state->use_granular = 1; - - // Calculate preset indices based on percentage of max frequency - int max_freq = state->frequencies[state->freq_count - 1]; - - // POWERSAVE: 55% of max - int ps_target = max_freq * 55 / 100; - state->preset_indices[PLAYER_CPU_LEVEL_POWERSAVE] = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, ps_target); - - // NORMAL: 80% of max - int normal_target = max_freq * 80 / 100; - state->preset_indices[PLAYER_CPU_LEVEL_NORMAL] = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, normal_target); - - // PERFORMANCE: max frequency - state->preset_indices[PLAYER_CPU_LEVEL_PERFORMANCE] = state->freq_count - 1; - - state->frequencies_detected = 1; -} - -void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, double fps, - unsigned current_underruns) { - (void)config; // May be used in future for configurable grace period - - state->frame_count = 0; - state->high_util_windows = 0; - state->low_util_windows = 0; - state->last_underrun = current_underruns; - state->startup_frames = 0; - state->panic_cooldown = 0; - state->frame_time_index = 0; - - // Calculate frame budget from FPS - if (fps > 0) { - state->frame_budget_us = (uint64_t)(1000000.0 / fps); - } else { - state->frame_budget_us = 16667; // Default to 60fps - } - - // Clear frame time buffer - memset(state->frame_times, 0, sizeof(state->frame_times)); -} - -void PlayerCPU_recordFrameTime(PlayerCPUState* state, uint64_t frame_time_us) { - state->frame_times[state->frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time_us; - state->frame_time_index++; -} - -uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count) { - if (count <= 0) - return 0; - - // Limit to buffer size - if (count > PLAYER_CPU_FRAME_BUFFER_SIZE) - count = PLAYER_CPU_FRAME_BUFFER_SIZE; - - // Copy and sort - uint64_t sorted[PLAYER_CPU_FRAME_BUFFER_SIZE]; - memcpy(sorted, frame_times, count * sizeof(uint64_t)); - qsort(sorted, count, sizeof(uint64_t), compare_uint64); - - // 90th percentile index - int p90_idx = (count * 90) / 100; - if (p90_idx >= count) - p90_idx = count - 1; - - return sorted[p90_idx]; -} - -int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_util) { - if (target_util <= 0) - return current_freq; - - // Linear scaling: new_freq = current_freq * current_util / target_util - return current_freq * current_util / target_util; -} - -int PlayerCPU_getPresetPercentage(PlayerCPULevel level) { - switch (level) { - case PLAYER_CPU_LEVEL_POWERSAVE: - return 55; - case PLAYER_CPU_LEVEL_NORMAL: - return 80; - case PLAYER_CPU_LEVEL_PERFORMANCE: - default: - return 100; - } -} - -PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* config, - bool fast_forward, bool show_menu, unsigned current_underruns, - PlayerCPUResult* result) { - // Initialize result if provided - if (result) { - result->decision = PLAYER_CPU_DECISION_NONE; - result->new_index = state->target_index; - result->new_level = state->target_level; - result->utilization = 0; - result->p90_time = 0; - } - - // Skip if scaling is disabled (0 or 1 frequency available) - if (state->scaling_disabled) { - if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; - } - - // Skip during special states - if (fast_forward || show_menu) { - if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; - } - - // Startup grace period - if (state->startup_frames < config->startup_grace) { - state->startup_frames++; - if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; - } - - // Get current indices (target_index is always 0..freq_count-1) - int current_idx = state->target_index; - int current_level = state->target_level; - int max_idx = state->freq_count - 1; - if (max_idx < 0) - max_idx = 0; - - // Check if at max - bool at_max = state->use_granular ? (current_idx >= max_idx) : (current_level >= 2); - - // Emergency: check for underruns (panic path) - if (current_underruns > state->last_underrun && !at_max) { - // Underrun detected - boost by panic_step_up - if (state->use_granular) { - int new_idx = current_idx + config->panic_step_up; - if (new_idx > max_idx) - new_idx = max_idx; - state->target_index = new_idx; - if (result) { - result->decision = PLAYER_CPU_DECISION_PANIC; - result->new_index = new_idx; - } - } else { - int new_level = current_level + config->panic_step_up; - if (new_level > 2) - new_level = 2; - state->target_level = new_level; - if (result) { - result->decision = PLAYER_CPU_DECISION_PANIC; - result->new_level = new_level; - } - } - - state->high_util_windows = 0; - state->low_util_windows = 0; - state->panic_cooldown = 8; // ~4 seconds before allowing reduction - state->last_underrun = 0; // Reset after handling - - return PLAYER_CPU_DECISION_PANIC; - } - - // Update underrun tracking (even if at max) - if (current_underruns > state->last_underrun) { - state->last_underrun = current_underruns; - } - - // Count frames in current window - state->frame_count++; - - // Check if window is complete - if (state->frame_count < config->window_frames) { - return PLAYER_CPU_DECISION_NONE; - } - - // Calculate 90th percentile frame time - int samples = state->frame_time_index; - if (samples > PLAYER_CPU_FRAME_BUFFER_SIZE) - samples = PLAYER_CPU_FRAME_BUFFER_SIZE; - - if (samples < 5) { - // Not enough samples - reset and wait - state->frame_count = 0; - return PLAYER_CPU_DECISION_NONE; - } - - uint64_t p90_time = PlayerCPU_percentile90(state->frame_times, samples); - - // Calculate utilization as percentage of frame budget - unsigned util = 0; - if (state->frame_budget_us > 0) { - util = (unsigned)((p90_time * 100) / state->frame_budget_us); - if (util > 200) - util = 200; // Cap at 200% for sanity - } - - if (result) { - result->utilization = util; - result->p90_time = p90_time; - } - - PlayerCPUDecision decision = PLAYER_CPU_DECISION_NONE; - - if (state->use_granular) { - // Granular mode: linear frequency scaling - int current_freq = state->frequencies[current_idx]; - - // Decrement panic cooldown - if (state->panic_cooldown > 0) { - state->panic_cooldown--; - } - - if (util > config->util_high) { - // Need more performance - state->high_util_windows++; - state->low_util_windows = 0; - - if (state->high_util_windows >= config->boost_windows && current_idx < max_idx) { - // Predict optimal frequency using linear scaling - int needed_freq = - PlayerCPU_predictFrequency(current_freq, util, config->target_util); - int new_idx = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); - - // Ensure we actually go higher - if (new_idx <= current_idx) - new_idx = current_idx + 1; - if (new_idx > max_idx) - new_idx = max_idx; - - state->target_index = new_idx; - state->high_util_windows = 0; - decision = PLAYER_CPU_DECISION_BOOST; - - if (result) { - result->decision = PLAYER_CPU_DECISION_BOOST; - result->new_index = new_idx; - } - } - } else if (util < config->util_low) { - // Can reduce power - state->low_util_windows++; - state->high_util_windows = 0; - - // Only reduce if enough windows AND panic cooldown expired - bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && - (state->panic_cooldown == 0) && (current_idx > 0); - - if (reduce_ok) { - // Predict lower frequency - int needed_freq = - PlayerCPU_predictFrequency(current_freq, util, config->target_util); - int new_idx = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); - - // Ensure we actually go lower - if (new_idx >= current_idx) - new_idx = current_idx - 1; - if (new_idx < 0) - new_idx = 0; - - // Limit reduction to max_step_down - if (current_idx - new_idx > config->max_step_down) { - new_idx = current_idx - config->max_step_down; - } - - state->target_index = new_idx; - state->low_util_windows = 0; - decision = PLAYER_CPU_DECISION_REDUCE; - - if (result) { - result->decision = PLAYER_CPU_DECISION_REDUCE; - result->new_index = new_idx; - } - } - } else { - // In sweet spot - reset counters - state->high_util_windows = 0; - state->low_util_windows = 0; - } - } else { - // Fallback mode: 3-level scaling - if (util > config->util_high) { - state->high_util_windows++; - state->low_util_windows = 0; - } else if (util < config->util_low) { - state->low_util_windows++; - state->high_util_windows = 0; - } else { - state->high_util_windows = 0; - state->low_util_windows = 0; - } - - // Boost if sustained high utilization - if (state->high_util_windows >= config->boost_windows && current_level < 2) { - int new_level = current_level + 1; - state->target_level = new_level; - state->high_util_windows = 0; - decision = PLAYER_CPU_DECISION_BOOST; - - if (result) { - result->decision = PLAYER_CPU_DECISION_BOOST; - result->new_level = new_level; - } - } - - // Reduce if sustained low utilization - if (state->low_util_windows >= config->reduce_windows && current_level > 0) { - int new_level = current_level - 1; - state->target_level = new_level; - state->low_util_windows = 0; - decision = PLAYER_CPU_DECISION_REDUCE; - - if (result) { - result->decision = PLAYER_CPU_DECISION_REDUCE; - result->new_level = new_level; - } - } - } - - // Reset window counter - state->frame_count = 0; - - return decision; -} diff --git a/workspace/all/player/player_cpu.h b/workspace/all/player/player_cpu.h deleted file mode 100644 index 7b7f1583..00000000 --- a/workspace/all/player/player_cpu.h +++ /dev/null @@ -1,243 +0,0 @@ -/** - * player_cpu.h - Auto CPU scaling utilities - * - * Provides functions for dynamic CPU frequency scaling based on emulation - * performance. Uses frame timing (core.run() execution time) to determine - * optimal CPU frequency. - * - * Two modes are supported: - * - Granular mode: Uses all available CPU frequencies (linear scaling) - * - Fallback mode: Uses 3 fixed levels (powersave/normal/performance) - * - * Designed for testability with injectable state and callbacks. - * Extracted from player.c. - */ - -#ifndef __PLAYER_CPU_H__ -#define __PLAYER_CPU_H__ - -#include -#include - -/** - * Maximum number of CPU frequencies that can be detected. - */ -#define PLAYER_CPU_MAX_FREQUENCIES 32 - -/** - * Ring buffer size for frame timing samples. - */ -#define PLAYER_CPU_FRAME_BUFFER_SIZE 64 - -/** - * Default tuning constants. - * These can be overridden via PlayerCPUConfig. - */ -#define PLAYER_CPU_DEFAULT_WINDOW_FRAMES 30 // ~500ms at 60fps -#define PLAYER_CPU_DEFAULT_UTIL_HIGH 85 // Boost threshold (%) -#define PLAYER_CPU_DEFAULT_UTIL_LOW 55 // Reduce threshold (%) -#define PLAYER_CPU_DEFAULT_BOOST_WINDOWS 2 // Windows before boost (~1s) -#define PLAYER_CPU_DEFAULT_REDUCE_WINDOWS 4 // Windows before reduce (~2s) -#define PLAYER_CPU_DEFAULT_STARTUP_GRACE 300 // Frames to skip (~5s at 60fps) -#define PLAYER_CPU_DEFAULT_MIN_FREQ_KHZ 0 // No minimum (panic failsafe handles problematic freqs) -#define PLAYER_CPU_DEFAULT_TARGET_UTIL 70 // Target utilization after change -#define PLAYER_CPU_DEFAULT_MAX_STEP_DOWN 1 // Max frequency steps when reducing -#define PLAYER_CPU_DEFAULT_PANIC_STEP_UP 2 // Frequency steps on panic (underrun) -#define PLAYER_CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics - -/** - * Preset level indices. - */ -typedef enum { - PLAYER_CPU_LEVEL_POWERSAVE = 0, - PLAYER_CPU_LEVEL_NORMAL = 1, - PLAYER_CPU_LEVEL_PERFORMANCE = 2 -} PlayerCPULevel; - -/** - * Decision type returned by PlayerCPU_update(). - */ -typedef enum { - PLAYER_CPU_DECISION_NONE = 0, // No change needed - PLAYER_CPU_DECISION_BOOST, // Increase frequency/level - PLAYER_CPU_DECISION_REDUCE, // Decrease frequency/level - PLAYER_CPU_DECISION_PANIC, // Emergency boost (underrun detected) - PLAYER_CPU_DECISION_SKIP // Skipped (grace period, menu, etc.) -} PlayerCPUDecision; - -/** - * Configuration constants for auto CPU scaling. - */ -typedef struct { - int window_frames; // Frames per monitoring window - unsigned int util_high; // High utilization threshold (%) - unsigned int util_low; // Low utilization threshold (%) - int boost_windows; // Consecutive windows before boost - int reduce_windows; // Consecutive windows before reduce - int startup_grace; // Grace period frames at startup - int min_freq_khz; // Minimum frequency to consider (kHz) - unsigned int target_util; // Target utilization after frequency change - int max_step_down; // Max frequency steps when reducing - int panic_step_up; // Frequency steps on panic (underrun) -} PlayerCPUConfig; - -/** - * State for auto CPU scaling. - * All fields can be inspected for testing. - */ -typedef struct { - // Frequency array (populated by detectFrequencies) - int frequencies[PLAYER_CPU_MAX_FREQUENCIES]; // Available frequencies (kHz, sorted low→high) - int freq_count; // Number of valid frequencies - - // Granular mode state - int target_index; // Target frequency index (set by algorithm) - int current_index; // Actually applied frequency index - int preset_indices[3]; // Preset mappings [POWERSAVE, NORMAL, PERFORMANCE] - int use_granular; // 1 if granular mode, 0 for 3-level fallback - - // Fallback mode state (3-level) - int target_level; // Target level (0-2) - int current_level; // Actually applied level - - // Monitoring state - int frame_count; // Frames in current window - int high_util_windows; // Consecutive high-util windows - int low_util_windows; // Consecutive low-util windows - unsigned last_underrun; // Last seen underrun count - int startup_frames; // Frames since start (for grace period) - int panic_cooldown; // Windows to wait after panic - - // Frame timing data - uint64_t frame_times[PLAYER_CPU_FRAME_BUFFER_SIZE]; // Ring buffer of frame times (us) - int frame_time_index; // Current ring buffer position - uint64_t frame_budget_us; // Target frame time (from fps) - - // Flags for frequency detection - int frequencies_detected; // 1 if frequencies have been detected - int scaling_disabled; // 1 if scaling is disabled (0 or 1 frequency available) - - // Per-frequency panic tracking (failsafe for problematic frequencies) - int panic_count[PLAYER_CPU_MAX_FREQUENCIES]; // Count of panics at each frequency -} PlayerCPUState; - -/** - * Result of an update operation (for detailed testing). - */ -typedef struct { - PlayerCPUDecision decision; // What decision was made - int new_index; // New frequency index (if granular) - int new_level; // New level (if fallback) - unsigned utilization; // Calculated utilization (%) - uint64_t p90_time; // 90th percentile frame time -} PlayerCPUResult; - -/** - * Initializes config with default values. - * - * @param config Config to initialize - */ -void PlayerCPU_initConfig(PlayerCPUConfig* config); - -/** - * Initializes state to empty/zero state. - * - * @param state State to initialize - */ -void PlayerCPU_initState(PlayerCPUState* state); - -/** - * Finds the index of the nearest frequency to the target. - * - * @param frequencies Array of frequencies in kHz - * @param count Number of frequencies in array - * @param target_khz Target frequency to find - * @return Index of nearest frequency (0 if count <= 0) - */ -int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz); - -/** - * Detects available CPU frequencies and initializes granular scaling. - * - * Populates state->frequencies and state->preset_indices based on - * available system frequencies. - * - * @param state State to populate - * @param config Configuration (uses min_freq_khz) - * @param raw_frequencies Array of frequencies from platform - * @param raw_count Number of frequencies from platform - */ -void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* config, - const int* raw_frequencies, int raw_count); - -/** - * Resets auto CPU state for a new session. - * - * Called when entering auto mode or starting a new game. - * - * @param state State to reset - * @param config Configuration - * @param fps Game's target FPS (for frame budget calculation) - * @param current_underruns Current underrun count from audio system - */ -void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, double fps, - unsigned current_underruns); - -/** - * Records a frame time sample. - * - * Called after each frame with the execution time of core.run(). - * - * @param state State to update - * @param frame_time_us Frame execution time in microseconds - */ -void PlayerCPU_recordFrameTime(PlayerCPUState* state, uint64_t frame_time_us); - -/** - * Main update function - determines if CPU frequency should change. - * - * Should be called once per frame when in auto mode. - * Returns a decision indicating what action should be taken. - * - * @param state Current state (will be modified) - * @param config Configuration constants - * @param fast_forward True if fast-forwarding (skip scaling) - * @param show_menu True if menu is showing (skip scaling) - * @param current_underruns Current underrun count from audio - * @param result Optional output for detailed result info - * @return Decision type (NONE, BOOST, REDUCE, PANIC, SKIP) - */ -PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* config, - bool fast_forward, bool show_menu, unsigned current_underruns, - PlayerCPUResult* result); - -/** - * Calculates the recommended frequency for a target utilization. - * - * Uses linear scaling: new_freq = current_freq * current_util / target_util - * - * @param current_freq Current frequency in kHz - * @param current_util Current utilization percentage - * @param target_util Target utilization percentage - * @return Recommended frequency in kHz - */ -int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_util); - -/** - * Returns the percentage of max frequency for a preset level. - * - * @param level Preset level (0=POWERSAVE, 1=NORMAL, 2=PERFORMANCE) - * @return Percentage of max frequency (55, 80, or 100) - */ -int PlayerCPU_getPresetPercentage(PlayerCPULevel level); - -/** - * Calculates the 90th percentile of frame times. - * - * @param frame_times Array of frame times - * @param count Number of samples (uses min of count and buffer size) - * @return 90th percentile value - */ -uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count); - -#endif // __PLAYER_CPU_H__ diff --git a/workspace/all/player/player_loop_audioclock.inc b/workspace/all/player/player_loop_audioclock.inc deleted file mode 100644 index aeabd063..00000000 --- a/workspace/all/player/player_loop_audioclock.inc +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Audio-driven main loop with blocking audio writes. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is defined. - * - * Timing approach: - * - Audio hardware clock is the timing source - * - Core runs every loop iteration (no frame pacing) - * - SND_batchSamples() blocks when buffer is full (up to 10ms) - * - Audio callback drains buffer at hardware audio rate - * - Natural backpressure from audio blocking rate-limits emulation - * - * For devices with unstable vsync (e.g., M17). - */ - -static void run_main_loop(void) { - LOG_info("Using audioclock sync mode (audio-driven timing)\n"); - - PWR_warn(1); - PWR_disableAutosleep(); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (audioclock mode)"); - sec_start = SDL_GetTicks(); - uint32_t last_ff_flip = 0; - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Always run core - audio blocking in SND_batchSamples() handles timing. - // When audio buffer is full, the core will block (up to 10ms) waiting for - // the audio callback to consume samples. This naturally rate-limits emulation - // to match the audio hardware clock. - // - // During fast-forward: audio is skipped (no blocking), limitFF() controls speed. - - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Note: SND_newFrame() is not called here because audioclock mode uses - // blocking audio writes for timing, not the PI rate controller. - - // Measure frame execution time for auto CPU scaling - // Don't run core while menu is active - uint64_t frame_start = getMicroseconds(); - if (!show_menu) { - GLVideo_bindFBO(); - core.run(); - } - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state - .frame_times[auto_cpu_state.frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = - frame_time; - auto_cpu_state.frame_time_index++; - } - - // Present frame - // During FF: throttle vsync to avoid blocking (limitFF controls speed instead) - // Normal: present every frame (may or may not wait for vsync depending on platform) - // Skip for HW rendering - frame already presented via GLVideo_present() - if (!GLVideo_isEnabled()) { - if (fast_forward) { - uint32_t now = SDL_GetTicks(); - if (now - last_ff_flip >= 30) { // Visual update every 30ms - GFX_present(&renderer); - frame_ready_for_flip = 0; - last_ff_flip = now; - } - } else { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - } - - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_loop_audioclock.inc.bak b/workspace/all/player/player_loop_audioclock.inc.bak deleted file mode 100644 index 6cfdd5f1..00000000 --- a/workspace/all/player/player_loop_audioclock.inc.bak +++ /dev/null @@ -1,117 +0,0 @@ -/** - * Audio-driven main loop with blocking audio writes. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is defined. - * - * Timing approach: - * - Audio hardware clock is the timing source - * - Core runs every loop iteration (no frame pacing) - * - SND_batchSamples() blocks when buffer is full (up to 10ms) - * - Audio callback drains buffer at hardware audio rate - * - Natural backpressure from audio blocking rate-limits emulation - * - * For devices with unstable vsync (e.g., M17). - */ - -static void run_main_loop(void) { - LOG_info("Using audioclock sync mode (audio-driven timing)\n"); - - PWR_warn(1); - PWR_disableAutosleep(); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (audioclock mode)"); - sec_start = SDL_GetTicks(); - uint32_t last_ff_flip = 0; - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Always run core - audio blocking in SND_batchSamples() handles timing. - // When audio buffer is full, the core will block (up to 10ms) waiting for - // the audio callback to consume samples. This naturally rate-limits emulation - // to match the audio hardware clock. - // - // During fast-forward: audio is skipped (no blocking), limitFF() controls speed. - - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Note: SND_newFrame() is not called here because audioclock mode uses - // blocking audio writes for timing, not the PI rate controller. - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state - .frame_times[auto_cpu_state.frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = - frame_time; - auto_cpu_state.frame_time_index++; - } - - // Present frame - // During FF: throttle vsync to avoid blocking (limitFF controls speed instead) - // Normal: present every frame (may or may not wait for vsync depending on platform) - if (fast_forward) { - uint32_t now = SDL_GetTicks(); - if (now - last_ff_flip >= 30) { // Visual update every 30ms - GFX_present(&renderer); - frame_ready_for_flip = 0; - last_ff_flip = now; - } - } else { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_loop_vsync.inc b/workspace/all/player/player_loop_vsync.inc deleted file mode 100644 index 743bef4f..00000000 --- a/workspace/all/player/player_loop_vsync.inc +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Vsync-driven main loop with frame pacing and audio rate control. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is not defined. - * - * Timing approach: - * - Display vsync is the timing source (GFX_present blocks until vsync) - * - Frame pacer uses Bresenham accumulator to decide when to step emulation - * - Audio rate control maintains buffer at 50% using dual-timescale PI controller - * - Works with any display Hz / game fps mismatch - * - * For devices with stable vsync. - */ - -static void run_main_loop(void) { - // Initialize frame pacer with display Hz - double display_hz = FramePacer_getDisplayHz(); - FramePacer_init(&frame_pacer, core.fps, display_hz); - LOG_info("Frame pacer: %.2ffps @ %.2fHz (%s) [Q16: %d/%d]\n", core.fps, display_hz, - FramePacer_isDirectMode(&frame_pacer) ? "direct" : "vsync-driven Bresenham", - frame_pacer.game_fps_q16, frame_pacer.display_hz_q16); - - // Keep audio rate control ENABLED with frame pacing. - // The integral term is slow enough (300-frame average) that it won't fight - // the per-frame step/repeat pattern. It learns the average vsync rate and - // compensates for hardware drift (e.g., 58.7Hz vs 60Hz nominal). - // Without rate control, vsync variance causes continuous audio underruns. - - PWR_warn(1); - PWR_disableAutosleep(); - - // force a vsync immediately before loop - // for better frame pacing? - GFX_clearAll(); - GFX_present(NULL); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (vsync mode)"); - sec_start = SDL_GetTicks(); - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Frame pacing: Bresenham accumulator decides whether to run core this vsync. - // Vsync (from GFX_present) is the timing source - each loop iteration = one display refresh. - // Core runs at its natural rate (e.g., 60fps), display refreshes at panel Hz (e.g., 72Hz). - // When display Hz > game fps: some frames are repeated (re-presented). - // - // During fast-forward: run core multiple times per vsync to achieve speedup. - // max_ff_speed: 0=2x, 1=3x, 2=4x, 3=5x - int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; - - for (int run = 0; run < runs_this_vsync; run++) { - // First run uses frame pacer, subsequent FF runs always execute - // Don't run core while menu is active - bool should_run_core = !show_menu && ((run == 0) ? (fast_forward || FramePacer_step(&frame_pacer)) - : fast_forward); - - if (should_run_core) { - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Update audio rate control integral (once per frame) - // Skip during FF to prevent integral windup (audio is skipped) - if (!fast_forward) { - SND_newFrame(); - } - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - GLVideo_bindFBO(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state.frame_times[auto_cpu_state.frame_time_index % - PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time; - auto_cpu_state.frame_time_index++; - } - } - } - - // Always present for vsync timing - when !should_run_core, re-presents previous frame - // Skip for HW rendering - frame already presented via GLVideo_present() - if (!GLVideo_isEnabled()) { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - - // Track performance (only once per vsync, not per FF run) - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - Menu_loop(); - - // Rebind FBO for HW cores after menu (menu uses FBO 0) - if (GLVideo_isEnabled()) { - GLVideo_bindFBO(); - } - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_loop_vsync.inc.bak b/workspace/all/player/player_loop_vsync.inc.bak deleted file mode 100644 index f58fc2bb..00000000 --- a/workspace/all/player/player_loop_vsync.inc.bak +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Vsync-driven main loop with frame pacing and audio rate control. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is not defined. - * - * Timing approach: - * - Display vsync is the timing source (GFX_present blocks until vsync) - * - Frame pacer uses Bresenham accumulator to decide when to step emulation - * - Audio rate control maintains buffer at 50% using dual-timescale PI controller - * - Works with any display Hz / game fps mismatch - * - * For devices with stable vsync. - */ - -static void run_main_loop(void) { - // Initialize frame pacer with display Hz - double display_hz = FramePacer_getDisplayHz(); - FramePacer_init(&frame_pacer, core.fps, display_hz); - LOG_info("Frame pacer: %.2ffps @ %.2fHz (%s) [Q16: %d/%d]\n", core.fps, display_hz, - FramePacer_isDirectMode(&frame_pacer) ? "direct" : "vsync-driven Bresenham", - frame_pacer.game_fps_q16, frame_pacer.display_hz_q16); - - // Keep audio rate control ENABLED with frame pacing. - // The integral term is slow enough (300-frame average) that it won't fight - // the per-frame step/repeat pattern. It learns the average vsync rate and - // compensates for hardware drift (e.g., 58.7Hz vs 60Hz nominal). - // Without rate control, vsync variance causes continuous audio underruns. - - PWR_warn(1); - PWR_disableAutosleep(); - - // force a vsync immediately before loop - // for better frame pacing? - GFX_clearAll(); - GFX_present(NULL); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (vsync mode)"); - sec_start = SDL_GetTicks(); - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Frame pacing: Bresenham accumulator decides whether to run core this vsync. - // Vsync (from GFX_present) is the timing source - each loop iteration = one display refresh. - // Core runs at its natural rate (e.g., 60fps), display refreshes at panel Hz (e.g., 72Hz). - // When display Hz > game fps: some frames are repeated (re-presented). - // - // During fast-forward: run core multiple times per vsync to achieve speedup. - // max_ff_speed: 0=2x, 1=3x, 2=4x, 3=5x - int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; - - for (int run = 0; run < runs_this_vsync; run++) { - // First run uses frame pacer, subsequent FF runs always execute - bool should_run_core = (run == 0) ? (fast_forward || FramePacer_step(&frame_pacer)) - : fast_forward; - - if (should_run_core) { - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Update audio rate control integral (once per frame) - // Skip during FF to prevent integral windup (audio is skipped) - if (!fast_forward) { - SND_newFrame(); - } - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state.frame_times[auto_cpu_state.frame_time_index % - PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time; - auto_cpu_state.frame_time_index++; - } - } - } - - // Always present for vsync timing - when !should_run_core, re-presents previous frame - GFX_present(&renderer); - frame_ready_for_flip = 0; - - // Track performance (only once per vsync, not per FF run) - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_utils.c b/workspace/all/player/player_utils.c index def5d3a2..bfc05369 100644 --- a/workspace/all/player/player_utils.c +++ b/workspace/all/player/player_utils.c @@ -4,7 +4,7 @@ * These functions have no external dependencies and can be tested in isolation. * * For option-related functions, see player_options.c - * For CPU frequency functions, see player_cpu.c + * For CPU frequency functions, see cpu.c */ #include "player_utils.h" diff --git a/workspace/all/player/player_utils.h b/workspace/all/player/player_utils.h index dcfac086..32ce90a8 100644 --- a/workspace/all/player/player_utils.h +++ b/workspace/all/player/player_utils.h @@ -5,7 +5,7 @@ * They perform string manipulation and other pure computations. * * For option-related functions, see player_options.h - * For CPU frequency functions, see player_cpu.h + * For CPU frequency functions, see cpu.h */ #ifndef PLAYER_UTILS_H diff --git a/workspace/all/player/sync_manager.c b/workspace/all/player/sync_manager.c new file mode 100644 index 00000000..1a7ba381 --- /dev/null +++ b/workspace/all/player/sync_manager.c @@ -0,0 +1,229 @@ +/** + * sync_manager.c - Audio/video synchronization mode management implementation + */ + +#include "sync_manager.h" +#include "log.h" +#include "utils.h" // getMicroseconds +#include +#include + +// Minimum samples before checking stability +// 60 samples (~1s at 60Hz): Need enough for meaningful stddev +#define SYNC_MIN_SAMPLES 60 + +// Maximum samples before giving up on convergence +// 1800 samples (~30s at 60Hz): If not stable by then, display is unstable +#define SYNC_MAX_SAMPLES 1800 + +// Stability threshold (stddev/mean ratio) +// 1% relative deviation indicates stable measurement +#define SYNC_STABILITY_THRESHOLD 0.01 + +// Progress logging interval (DEBUG only) +// Log every 60 samples to show convergence progress +#define SYNC_LOG_INTERVAL 60 + +// Check for drift every 300 frames (~5 seconds at 60fps) +#define SYNC_DRIFT_CHECK_INTERVAL 300 + +// Tolerance for mode selection (1% mismatch) +#define SYNC_MODE_TOLERANCE 0.01 + +// Outlier rejection bounds (50-120 Hz) +#define SYNC_MIN_HZ 50.0 +#define SYNC_MAX_HZ 120.0 + +void SyncManager_init(SyncManager* manager, double game_fps, double display_hz) { + memset(manager, 0, sizeof(SyncManager)); + + // Start in AUDIO_CLOCK mode (safe default) + manager->mode = SYNC_MODE_AUDIO_CLOCK; + manager->game_fps = game_fps; + manager->display_hz = (display_hz > 0.0) ? display_hz : 60.0; + + LOG_info("Sync: Starting in %s mode (%.2ffps @ %.1fHz reported)", + SyncManager_getModeName(manager->mode), manager->game_fps, manager->display_hz); + LOG_info("Sync: Measuring vsync timing..."); +} + +void SyncManager_recordVsync(SyncManager* manager) { + uint64_t now = getMicroseconds(); + + // First call - just record timestamp + if (manager->last_vsync_time == 0) { + manager->last_vsync_time = now; + return; + } + + // Calculate frame interval + uint64_t interval = now - manager->last_vsync_time; + manager->last_vsync_time = now; + + // Reject zero intervals (duplicate timestamps) + if (interval == 0) { + return; + } + + // Reject outliers based on Hz (frame drops, fast presents) + double hz = 1000000.0 / (double)interval; + if (hz < SYNC_MIN_HZ || hz > SYNC_MAX_HZ) { + return; + } + + // Store interval in circular buffer + manager->frame_intervals[manager->write_index] = interval; + manager->write_index = (manager->write_index + 1) % SYNC_SAMPLE_BUFFER_SIZE; + manager->sample_count++; + + // Skip measurement logic if already stable + if (manager->measurement_stable) { + // Monitor for drift in vsync mode + if (manager->mode == SYNC_MODE_VSYNC) { + manager->last_drift_check++; + + if (manager->last_drift_check >= SYNC_DRIFT_CHECK_INTERVAL) { + manager->last_drift_check = 0; + + // Recalculate current Hz from buffer + int samples = (manager->sample_count < SYNC_SAMPLE_BUFFER_SIZE) + ? manager->sample_count + : SYNC_SAMPLE_BUFFER_SIZE; + uint64_t sum = 0; + for (int i = 0; i < samples; i++) { + sum += manager->frame_intervals[i]; + } + double mean = (double)sum / samples; + double current_hz = 1000000.0 / mean; + + // Check if drifted beyond tolerance + double mismatch = fabs(current_hz - manager->game_fps) / manager->game_fps; + if (mismatch >= SYNC_MODE_TOLERANCE) { + LOG_info("Sync: Drift detected! %.3fHz now differs by %.2f%% from %.2ffps", + current_hz, mismatch * 100.0, manager->game_fps); + LOG_info("Sync: Switching to %s mode (fallback for unstable display)", + SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); + manager->mode = SYNC_MODE_AUDIO_CLOCK; + } + } + } + return; + } + + // Check for convergence (need minimum samples first) + if (manager->sample_count < SYNC_MIN_SAMPLES) { + return; + } + + // Calculate statistics from circular buffer + int samples = (manager->sample_count < SYNC_SAMPLE_BUFFER_SIZE) ? manager->sample_count + : SYNC_SAMPLE_BUFFER_SIZE; + + // Calculate mean + uint64_t sum = 0; + for (int i = 0; i < samples; i++) { + sum += manager->frame_intervals[i]; + } + double mean = (double)sum / samples; + + // Calculate standard deviation + double variance_sum = 0.0; + for (int i = 0; i < samples; i++) { + double diff = (double)manager->frame_intervals[i] - mean; + variance_sum += diff * diff; + } + double stddev = sqrt(variance_sum / (samples - 1)); + + // Calculate confidence (relative stddev) + double confidence = stddev / mean; + double measured_hz = 1000000.0 / mean; + + // Progress logging (DEBUG only) + if (manager->sample_count % SYNC_LOG_INTERVAL == 0) { + LOG_debug("Sync: %d samples, mean=%.3fHz, confidence=%.3f%% (%s)", manager->sample_count, + measured_hz, confidence * 100.0, + confidence < SYNC_STABILITY_THRESHOLD ? "STABLE" : "measuring..."); + } + + // Check for stability + if (confidence < SYNC_STABILITY_THRESHOLD) { + // Measurement converged! + manager->measurement_stable = true; + manager->measured_hz = measured_hz; + manager->measurement_confidence = confidence; + + LOG_info("Sync: Measurement stable after %d samples: %.3fHz ± %.2f%%", + manager->sample_count, manager->measured_hz, + manager->measurement_confidence * 100.0); + + // Try switching to vsync mode if compatible + double mismatch = fabs(manager->measured_hz - manager->game_fps) / manager->game_fps; + if (mismatch < SYNC_MODE_TOLERANCE) { + manager->mode = SYNC_MODE_VSYNC; + LOG_info("Sync: Switching to %s mode (%.3fHz within 1%% of %.2ffps)", + SyncManager_getModeName(manager->mode), manager->measured_hz, + manager->game_fps); + } else { + LOG_info("Sync: Staying in %s mode (%.3fHz differs by %.2f%% from %.2ffps)", + SyncManager_getModeName(manager->mode), manager->measured_hz, mismatch * 100.0, + manager->game_fps); + } + + return; + } + + // Timeout: give up if not stable after max samples + if (manager->sample_count >= SYNC_MAX_SAMPLES) { + manager->measurement_stable = true; // Stop trying + manager->measured_hz = measured_hz; + manager->measurement_confidence = confidence; + + LOG_info( + "Sync: Measurement unstable after %d samples (confidence %.2f%% > 1%%), staying in %s " + "mode", + manager->sample_count, confidence * 100.0, SyncManager_getModeName(manager->mode)); + } +} + +bool SyncManager_shouldRunCore(const SyncManager* manager) { + // Always run core every frame in both modes + // AUDIO_CLOCK: blocking audio provides timing + // VSYNC: vsync provides timing + return true; +} + +SyncMode SyncManager_getMode(const SyncManager* manager) { + return manager->mode; +} + +const char* SyncManager_getModeName(SyncMode mode) { + switch (mode) { + case SYNC_MODE_AUDIO_CLOCK: + return "Audio Clock"; + case SYNC_MODE_VSYNC: + return "Vsync"; + default: + return "Unknown"; + } +} + +bool SyncManager_shouldUseRateControl(const SyncManager* manager) { + // Both modes use rate control (±0.8%) as buffer health mechanism + // This handles timing variations when true blocking can't provide pacing + (void)manager; + return true; +} + +bool SyncManager_shouldBlockAudio(const SyncManager* manager) { + // Only block audio in audio clock mode + // Vsync mode uses non-blocking writes with rate control + return manager->mode == SYNC_MODE_AUDIO_CLOCK; +} + +double SyncManager_getMeasuredHz(const SyncManager* manager) { + return manager->measurement_stable ? manager->measured_hz : 0.0; +} + +bool SyncManager_isMeasurementStable(const SyncManager* manager) { + return manager->measurement_stable; +} diff --git a/workspace/all/player/sync_manager.h b/workspace/all/player/sync_manager.h new file mode 100644 index 00000000..f826639c --- /dev/null +++ b/workspace/all/player/sync_manager.h @@ -0,0 +1,194 @@ +/** + * sync_manager.h - Audio/video synchronization mode management + * + * Manages runtime switching between audio-clock and vsync timing modes. + * + * Strategy: + * - Start in AUDIO_CLOCK (safe, works on all hardware) + * - Measure actual display refresh rate via vsync timing + * - Switch to VSYNC if compatible (< 1% mismatch from game fps) + * - Monitor for drift, fall back to AUDIO_CLOCK if needed + * + * This eliminates the need for: + * - Frame pacing (Bresenham accumulator) - was problematic at >5% mismatch + * - Compile-time SYNC_MODE selection - now runtime adaptive + * - Aggressive audio rate control - only light adjustment in vsync mode + */ + +#ifndef __SYNC_MANAGER_H__ +#define __SYNC_MANAGER_H__ + +#include +#include + +/** + * Synchronization mode determines timing source. + */ +typedef enum { + /** + * Audio-clock mode: Audio hardware drives timing. + * + * Core runs every frame, audio writes block when buffer full. + * Natural backpressure from blocking maintains timing. + * No audio rate control needed. + * + * Benefits: + * - Works with any display refresh rate (no fps/Hz matching needed) + * - Frame duplication instead of frame skipping (less visible) + * - Audio buffer naturally stable (no rate control oscillation) + * + * Used when: + * - Initial startup (safe default) + * - Display Hz mismatch > 1% from game fps + * - Display Hz unstable (drift detected) + */ + SYNC_MODE_AUDIO_CLOCK, + + /** + * Vsync mode: Display vsync drives timing. + * + * GFX_present() blocks until vsync, providing frame timing. + * Core runs every frame (no pacing), light audio rate control + * adjusts pitch ±0.5% to maintain buffer at 50%. + * + * Benefits: + * - Minimal input latency (1 frame) + * - Perfect frame pacing when fps ≈ Hz + * - No frame duplication artifacts + * + * Used when: + * - Display Hz within 1% of game fps + * - Display Hz is stable (no drift) + */ + SYNC_MODE_VSYNC +} SyncMode; + +// Vsync measurement circular buffer size +// 512 samples (~8 seconds at 60fps) provides good rolling window +#define SYNC_SAMPLE_BUFFER_SIZE 512 + +/** + * Sync manager state. + */ +typedef struct { + SyncMode mode; // Current sync mode + double game_fps; // Game target fps (e.g., 60.0, 59.94) + double display_hz; // Reported display Hz from SDL + + // Vsync timing measurement (circular buffer) + uint64_t frame_intervals[SYNC_SAMPLE_BUFFER_SIZE]; // Frame time deltas in microseconds + int sample_count; // Total samples collected (may exceed buffer size) + int write_index; // Next write position in circular buffer + + bool measurement_stable; // True when stddev converged + double measured_hz; // Calculated from buffer mean (valid when stable) + double measurement_confidence; // Stddev/mean ratio (lower = better) + + uint32_t last_drift_check; // Frames since last drift check (resets at interval) + uint64_t last_vsync_time; // Microsecond timestamp of last vsync +} SyncManager; + +/** + * Initialize sync manager. + * + * Starts in AUDIO_CLOCK mode (safe default). + * Begins vsync measurement in background. + * + * @param manager Manager state to initialize + * @param game_fps Game target fps (e.g., 60.0) + * @param display_hz Display refresh rate from SDL (e.g., 60.0) + */ +void SyncManager_init(SyncManager* manager, double game_fps, double display_hz); + +/** + * Record vsync timing and update sync mode if needed. + * + * Call this immediately after GFX_present() returns. + * Measures actual display refresh rate and switches modes when appropriate. + * + * Measurement approach: + * - Continuously collects frame intervals in circular buffer + * - Calculates mean and stddev from buffer samples + * - Measurement stable when: samples >= 60 AND stddev/mean < 1% + * - Gives up after 1800 samples (~30s) if never converges + * + * Mode transitions: + * - AUDIO_CLOCK → VSYNC: When measurement stable and mismatch < 1% + * - VSYNC → AUDIO_CLOCK: If drift > 1% detected + * + * @param manager Manager state to update + */ +void SyncManager_recordVsync(SyncManager* manager); + +/** + * Check if core should run this frame. + * + * AUDIO_CLOCK: Always returns true (core runs every frame) + * VSYNC: Always returns true (core runs every frame, no pacing) + * + * This exists for API consistency and future extensibility. + * + * @param manager Manager state + * @return true if core.run() should be called + */ +bool SyncManager_shouldRunCore(const SyncManager* manager); + +/** + * Get current sync mode. + * + * @param manager Manager state + * @return Current sync mode + */ +SyncMode SyncManager_getMode(const SyncManager* manager); + +/** + * Get mode name for logging/display. + * + * @param mode Sync mode + * @return Human-readable mode name ("Audio Clock" or "Vsync") + */ +const char* SyncManager_getModeName(SyncMode mode); + +/** + * Check if audio rate control should be active. + * + * Both modes use rate control (±0.8%) as a buffer health mechanism. + * This handles timing variations when true blocking can't provide pacing + * (e.g., platform can't disable vsync, loop runs at display Hz). + * + * @param manager Manager state + * @return true if audio rate control should run (always true) + */ +bool SyncManager_shouldUseRateControl(const SyncManager* manager); + +/** + * Check if audio writes should block. + * + * AUDIO_CLOCK: Yes (blocking provides timing backpressure) + * VSYNC: No (vsync provides timing, audio is just output) + * + * @param manager Manager state + * @return true if SND_batchSamples should block when buffer full + */ +bool SyncManager_shouldBlockAudio(const SyncManager* manager); + +/** + * Get measured display Hz. + * + * Returns actual measured Hz after enough samples collected. + * Before measurement stable, returns 0.0. + * + * @param manager Manager state + * @return Measured Hz, or 0.0 if not yet measured + */ +double SyncManager_getMeasuredHz(const SyncManager* manager); + +/** + * Check if measurement is stable. + * + * @param manager Manager state + * @return true if enough samples collected for reliable measurement + */ +bool SyncManager_isMeasurementStable(const SyncManager* manager); + +#endif // __SYNC_MANAGER_H__ diff --git a/workspace/miyoomini/platform/platform.c b/workspace/miyoomini/platform/platform.c index 1367ece7..5c917488 100644 --- a/workspace/miyoomini/platform/platform.c +++ b/workspace/miyoomini/platform/platform.c @@ -700,6 +700,9 @@ void PLAT_present(GFX_Renderer* renderer) { GFX_BlitSurfaceExec(vid.effect, &src_rect, vid.video, &dst_rect, 0, 0, 0); } } + + // Render debug HUD overlay (game mode only) + PLAT_renderDebugHUD(vid.video); } else { // UI mode: blit screen to video if (!vid.direct) { diff --git a/workspace/miyoomini/platform/platform.h b/workspace/miyoomini/platform/platform.h index be0c048e..3c0e3e20 100644 --- a/workspace/miyoomini/platform/platform.h +++ b/workspace/miyoomini/platform/platform.h @@ -26,8 +26,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for A7 device with high timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/my355/platform/platform.h b/workspace/my355/platform/platform.h index c6d25e49..60b6be9c 100644 --- a/workspace/my355/platform/platform.h +++ b/workspace/my355/platform/platform.h @@ -36,8 +36,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for device with timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Dependencies diff --git a/workspace/tg5040/platform/platform.h b/workspace/tg5040/platform/platform.h index 6d4c59dc..cec822ae 100644 --- a/workspace/tg5040/platform/platform.h +++ b/workspace/tg5040/platform/platform.h @@ -34,7 +34,7 @@ // Audio Configuration /////////////////////////////// -// Uses default SND_RATE_CONTROL_D (0.012f) for standard timing +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/tg5050/platform/platform.h b/workspace/tg5050/platform/platform.h index 1f0dfdd2..b748dded 100644 --- a/workspace/tg5050/platform/platform.h +++ b/workspace/tg5050/platform/platform.h @@ -36,7 +36,7 @@ // Audio Configuration /////////////////////////////// -// Uses default SND_RATE_CONTROL_D (0.012f) for standard timing +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/trimuismart/platform/platform.h b/workspace/trimuismart/platform/platform.h index 6c6a8f39..c8bb0a38 100644 --- a/workspace/trimuismart/platform/platform.h +++ b/workspace/trimuismart/platform/platform.h @@ -27,8 +27,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for slower A7 device with high timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling