From 418116347cf96fff8e41d219a41b33d7004f8d17 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Mon, 5 Jan 2026 13:53:50 -0800 Subject: [PATCH 01/11] Add topology-aware CPU autoscaling for multi-cluster ARM SoCs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the CPU autoscaler to support big.LITTLE and tri-cluster SoCs (Allwinner A523, Snapdragon 865, etc.) while maintaining backward compatibility with single-cluster devices. Key features: - Detects CPU clusters via sysfs (/sys/devices/system/cpu/cpufreq/policyN) - Classifies clusters as LITTLE/BIG/PRIME based on max frequency and core count - Builds a PerfState ladder using governors (powersave/schedutil/performance) - Uses CPU affinity to guide the emulation thread to the appropriate cluster The governor-based approach works WITH the kernel's frequency scaling: - Our algorithm provides frame timing intelligence (which cluster tier) - The kernel provides frequency scaling intelligence (schedutil) - Inactive clusters get powersave governor, allowing them to truly idle PerfState ladder structure: - Dual-cluster: 6 states (LITTLE×3 governors, BIG×3 governors) - Tri-cluster: 9 states (adds PRIME×3 governors) - Progression: LITTLE powersave → LITTLE schedutil → LITTLE performance → BIG powersave → BIG schedutil → BIG performance → ... Single-cluster devices continue using the existing granular frequency mode with userspace governor and scaling_setspeed. --- tests/unit/all/player/test_player_cpu.c | 431 ++++++++++++++++++++++++ workspace/all/common/api.c | 326 ++++++++++++++++++ workspace/all/common/api.h | 55 +++ workspace/all/player/player.c | 212 +++++++++++- workspace/all/player/player_cpu.c | 364 +++++++++++++++++++- workspace/all/player/player_cpu.h | 175 +++++++++- 6 files changed, 1542 insertions(+), 21 deletions(-) diff --git a/tests/unit/all/player/test_player_cpu.c b/tests/unit/all/player/test_player_cpu.c index 6f9160aa..749b3123 100644 --- a/tests/unit/all/player/test_player_cpu.c +++ b/tests/unit/all/player/test_player_cpu.c @@ -18,6 +18,42 @@ #include +/////////////////////////////// +// Stubs for API functions called by player_cpu.c +// These allow unit testing without linking api.c +/////////////////////////////// + +// Track calls for verification in tests +static int stub_governor_calls = 0; +static int stub_last_policy_id = -1; +static char stub_last_governor[32] = {0}; +static int stub_affinity_calls = 0; +static int stub_last_affinity_mask = 0; + +int PWR_setCPUGovernor(int policy_id, const char* governor) { + stub_governor_calls++; + stub_last_policy_id = policy_id; + if (governor) { + strncpy(stub_last_governor, governor, sizeof(stub_last_governor) - 1); + stub_last_governor[sizeof(stub_last_governor) - 1] = '\0'; + } + return 0; // Success +} + +int PWR_setThreadAffinity(int cpu_mask) { + stub_affinity_calls++; + stub_last_affinity_mask = cpu_mask; + return 0; // Success +} + +static void reset_stubs(void) { + stub_governor_calls = 0; + stub_last_policy_id = -1; + stub_last_governor[0] = '\0'; + stub_affinity_calls = 0; + stub_last_affinity_mask = 0; +} + // Test state and config static PlayerCPUState state; static PlayerCPUConfig config; @@ -29,6 +65,7 @@ static PlayerCPUConfig config; void setUp(void) { PlayerCPU_initState(&state); PlayerCPU_initConfig(&config); + reset_stubs(); } void tearDown(void) { @@ -570,6 +607,363 @@ void test_update_sweet_spot_resets_counters(void) { TEST_ASSERT_EQUAL(0, state.low_util_windows); } +/////////////////////////////// +// Topology Tests +/////////////////////////////// + +void test_initTopology_zeros_topology(void) { + PlayerCPUTopology t; + memset(&t, 0xFF, sizeof(t)); // Fill with garbage + PlayerCPU_initTopology(&t); + + TEST_ASSERT_EQUAL(0, t.cluster_count); + TEST_ASSERT_EQUAL(0, t.state_count); + TEST_ASSERT_EQUAL(0, t.topology_detected); +} + +void test_parseCPUList_single_cpu(void) { + int count = 0; + int mask = PlayerCPU_parseCPUList("0", &count); + TEST_ASSERT_EQUAL(1, count); + TEST_ASSERT_EQUAL(0x1, mask); // CPU 0 +} + +void test_parseCPUList_range(void) { + int count = 0; + int mask = PlayerCPU_parseCPUList("0-3", &count); + TEST_ASSERT_EQUAL(4, count); + TEST_ASSERT_EQUAL(0xF, mask); // CPUs 0-3 +} + +void test_parseCPUList_mixed(void) { + int count = 0; + int mask = PlayerCPU_parseCPUList("0-3,7", &count); + TEST_ASSERT_EQUAL(5, count); + TEST_ASSERT_EQUAL(0x8F, mask); // CPUs 0-3 and 7 +} + +void test_parseCPUList_single_high_cpu(void) { + int count = 0; + int mask = PlayerCPU_parseCPUList("7", &count); + TEST_ASSERT_EQUAL(1, count); + TEST_ASSERT_EQUAL(0x80, mask); // CPU 7 +} + +void test_parseCPUList_empty_string(void) { + int count = 0; + int mask = PlayerCPU_parseCPUList("", &count); + TEST_ASSERT_EQUAL(0, count); + TEST_ASSERT_EQUAL(0, mask); +} + +void test_classifyClusters_single_is_little(void) { + PlayerCPUCluster clusters[1]; + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + + PlayerCPU_classifyClusters(clusters, 1); + + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); +} + +void test_classifyClusters_dual_little_big(void) { + PlayerCPUCluster clusters[2]; + // Sorted by max_khz ascending + // Use frequencies with <10% gap to get BIG (not PRIME) classification + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 1900000; // ~5.5% higher, should be BIG + clusters[1].cpu_count = 4; + + PlayerCPU_classifyClusters(clusters, 2); + + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_BIG, clusters[1].type); +} + +void test_classifyClusters_tri_little_big_prime(void) { + PlayerCPUCluster clusters[3]; + // SD865-like: Silver, Gold, Prime + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 2420000; + clusters[1].cpu_count = 3; + clusters[2].max_khz = 2840000; + clusters[2].cpu_count = 1; // Prime is single-core + + PlayerCPU_classifyClusters(clusters, 3); + + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_BIG, clusters[1].type); + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_PRIME, clusters[2].type); +} + +void test_classifyClusters_dual_prime_by_frequency_gap(void) { + PlayerCPUCluster clusters[2]; + // >10% frequency gap makes highest PRIME even with multiple cores + clusters[0].max_khz = 1800000; + clusters[0].cpu_count = 4; + clusters[1].max_khz = 2200000; // >10% higher + clusters[1].cpu_count = 4; + + PlayerCPU_classifyClusters(clusters, 2); + + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_PRIME, clusters[1].type); +} + +void test_pickRepresentativeFreqs_single_freq(void) { + PlayerCPUCluster c; + c.frequencies[0] = 1800000; + c.freq_count = 1; + + int low, mid, high; + PlayerCPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + + TEST_ASSERT_EQUAL(1800000, low); + TEST_ASSERT_EQUAL(1800000, mid); + TEST_ASSERT_EQUAL(1800000, high); +} + +void test_pickRepresentativeFreqs_multiple_freqs(void) { + PlayerCPUCluster c; + c.frequencies[0] = 400000; + c.frequencies[1] = 800000; + c.frequencies[2] = 1200000; + c.frequencies[3] = 1600000; + c.frequencies[4] = 2000000; + c.freq_count = 5; + + int low, mid, high; + PlayerCPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + + TEST_ASSERT_EQUAL(400000, low); + TEST_ASSERT_EQUAL(1200000, mid); // freqs[5/2] = freqs[2] + TEST_ASSERT_EQUAL(2000000, high); +} + +// Helper to set up a dual-cluster topology +static void setup_dual_cluster_topology(PlayerCPUState* s) { + s->topology.cluster_count = 2; + s->topology.topology_detected = 1; // Mark as detected so buildPerfStates works + + // LITTLE cluster (policy 0, CPUs 0-3) + s->topology.clusters[0].policy_id = 0; + s->topology.clusters[0].cpu_mask = 0x0F; + s->topology.clusters[0].cpu_count = 4; + s->topology.clusters[0].frequencies[0] = 600000; + s->topology.clusters[0].frequencies[1] = 1200000; + s->topology.clusters[0].frequencies[2] = 1800000; + s->topology.clusters[0].freq_count = 3; + s->topology.clusters[0].min_khz = 600000; + s->topology.clusters[0].max_khz = 1800000; + s->topology.clusters[0].type = PLAYER_CPU_CLUSTER_LITTLE; + + // BIG cluster (policy 4, CPUs 4-7) + s->topology.clusters[1].policy_id = 4; + s->topology.clusters[1].cpu_mask = 0xF0; + s->topology.clusters[1].cpu_count = 4; + s->topology.clusters[1].frequencies[0] = 800000; + s->topology.clusters[1].frequencies[1] = 1600000; + s->topology.clusters[1].frequencies[2] = 2400000; + s->topology.clusters[1].freq_count = 3; + s->topology.clusters[1].min_khz = 800000; + s->topology.clusters[1].max_khz = 2400000; + s->topology.clusters[1].type = PLAYER_CPU_CLUSTER_BIG; +} + +void test_buildPerfStates_dual_cluster_creates_six_states(void) { + setup_dual_cluster_topology(&state); + + PlayerCPU_buildPerfStates(&state, &config); + + TEST_ASSERT_EQUAL(6, state.topology.state_count); + TEST_ASSERT_EQUAL(1, state.use_topology); +} + +void test_buildPerfStates_dual_cluster_state_progression(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + // State 0: LITTLE powersave, BIG powersave, affinity = LITTLE + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[0]); + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[1]); + TEST_ASSERT_EQUAL(0, state.topology.states[0].active_cluster_idx); + TEST_ASSERT_EQUAL(0x0F, state.topology.states[0].cpu_affinity_mask); // LITTLE CPUs + + // State 1: LITTLE schedutil, BIG powersave + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_SCHEDUTIL, state.topology.states[1].cluster_governor[0]); + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[1].cluster_governor[1]); + + // State 2: LITTLE performance, BIG powersave + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_PERFORMANCE, state.topology.states[2].cluster_governor[0]); + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[2].cluster_governor[1]); + + // State 3: BIG powersave, LITTLE powersave, affinity = BIG + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[0]); + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[1]); + TEST_ASSERT_EQUAL(1, state.topology.states[3].active_cluster_idx); + TEST_ASSERT_EQUAL(0xF0, state.topology.states[3].cpu_affinity_mask); // BIG CPUs + + // State 5: BIG performance (highest state) + TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_PERFORMANCE, state.topology.states[5].cluster_governor[1]); +} + +void test_buildPerfStates_single_cluster_skips_topology(void) { + state.topology.cluster_count = 1; + + PlayerCPU_buildPerfStates(&state, &config); + + TEST_ASSERT_EQUAL(0, state.topology.state_count); + TEST_ASSERT_EQUAL(0, state.use_topology); +} + +void test_applyPerfState_calls_governors(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.target_state = 0; + state.current_state = -1; + + int result = PlayerCPU_applyPerfState(&state); + + TEST_ASSERT_EQUAL(0, result); + // Should call governor for each cluster (2 clusters = 2 calls) + TEST_ASSERT_EQUAL(2, stub_governor_calls); +} + +void test_applyPerfState_does_not_set_affinity_directly(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.target_state = 0; + state.current_state = -1; + state.pending_affinity = 0; + + PlayerCPU_applyPerfState(&state); + + // applyPerfState should NOT set pending_affinity or call PWR_setThreadAffinity + // The caller is responsible for setting pending_affinity under mutex + TEST_ASSERT_EQUAL(0, state.pending_affinity); + TEST_ASSERT_EQUAL(0, stub_affinity_calls); +} + +void test_applyPerfState_updates_current_state(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.target_state = 3; + state.current_state = -1; + + PlayerCPU_applyPerfState(&state); + + TEST_ASSERT_EQUAL(3, state.current_state); +} + +void test_update_topology_boost_increments_state(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 2; + state.current_state = 2; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization frames (>85%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + PlayerCPU_recordFrameTime(&state, 15000); // ~90% + } + + PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + + TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(3, state.target_state); +} + +void test_update_topology_reduce_decrements_state(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + PlayerCPU_recordFrameTime(&state, 6667); // ~40% + } + + PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + + TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); + TEST_ASSERT_LESS_THAN(4, state.target_state); +} + +void test_update_topology_panic_jumps_states(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 1; + state.current_state = 1; + state.last_underrun = 0; + + // Underrun detected + PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, NULL); + + TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); + TEST_ASSERT_GREATER_THAN(1, state.target_state); +} + +void test_update_topology_no_boost_at_max_state(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 5; // Already at max + state.current_state = 5; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization frames + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + PlayerCPU_recordFrameTime(&state, 15000); + } + + PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + + TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(5, state.target_state); +} + +void test_update_topology_no_reduce_at_min_state(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 0; // Already at min + state.current_state = 0; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + PlayerCPU_recordFrameTime(&state, 6667); + } + + PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + + TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(0, state.target_state); +} + /////////////////////////////// // Test Runner /////////////////////////////// @@ -647,5 +1041,42 @@ int main(void) { RUN_TEST(test_update_reduce_fallback_mode); RUN_TEST(test_update_sweet_spot_resets_counters); + // Topology - initialization + RUN_TEST(test_initTopology_zeros_topology); + + // Topology - CPU list parsing + RUN_TEST(test_parseCPUList_single_cpu); + RUN_TEST(test_parseCPUList_range); + RUN_TEST(test_parseCPUList_mixed); + RUN_TEST(test_parseCPUList_single_high_cpu); + RUN_TEST(test_parseCPUList_empty_string); + + // Topology - cluster classification + RUN_TEST(test_classifyClusters_single_is_little); + RUN_TEST(test_classifyClusters_dual_little_big); + RUN_TEST(test_classifyClusters_tri_little_big_prime); + RUN_TEST(test_classifyClusters_dual_prime_by_frequency_gap); + + // Topology - representative frequencies + RUN_TEST(test_pickRepresentativeFreqs_single_freq); + RUN_TEST(test_pickRepresentativeFreqs_multiple_freqs); + + // Topology - PerfState building + RUN_TEST(test_buildPerfStates_dual_cluster_creates_six_states); + RUN_TEST(test_buildPerfStates_dual_cluster_state_progression); + RUN_TEST(test_buildPerfStates_single_cluster_skips_topology); + + // Topology - PerfState application + RUN_TEST(test_applyPerfState_calls_governors); + RUN_TEST(test_applyPerfState_does_not_set_affinity_directly); + RUN_TEST(test_applyPerfState_updates_current_state); + + // Topology - update decisions + RUN_TEST(test_update_topology_boost_increments_state); + RUN_TEST(test_update_topology_reduce_decrements_state); + RUN_TEST(test_update_topology_panic_jumps_states); + RUN_TEST(test_update_topology_no_boost_at_max_state); + RUN_TEST(test_update_topology_no_reduce_at_min_state); + return UNITY_END(); } diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 9d8f4e6f..a443dd03 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -21,6 +21,9 @@ * - Font resources managed through TTF_CloseFont */ +// Enable GNU extensions for CPU affinity macros (must be before any includes) +#define _GNU_SOURCE + #include #include #include @@ -3328,6 +3331,329 @@ int PWR_setCPUFrequency_sysfs(int freq_khz) { return -1; } +/////////////////////////////// +// Multi-cluster CPU topology support +/////////////////////////////// + +// Include player_cpu.h for topology types +#include "../player/player_cpu.h" + +// Base path for cpufreq policies +#define CPUFREQ_BASE_PATH "/sys/devices/system/cpu/cpufreq" + +/** + * Comparison function for sorting clusters by max_khz ascending. + */ +static int compare_cluster_by_max_khz(const void* a, const void* b) { + const PlayerCPUCluster* ca = (const PlayerCPUCluster*)a; + const PlayerCPUCluster* cb = (const PlayerCPUCluster*)b; + return ca->max_khz - cb->max_khz; +} + +/** + * Reads an integer from a sysfs file. + * + * @param path Full path to sysfs file + * @return Value read, or 0 on failure + */ +static int read_sysfs_int(const char* path) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + int value = 0; + if (fscanf(fp, "%d", &value) != 1) { + value = 0; + } + (void)fclose(fp); + return value; +} + +/** + * Reads available frequencies from a sysfs file into a cluster. + * + * @param path Path to scaling_available_frequencies + * @param cluster Cluster to populate + * @return Number of frequencies read + */ +static int read_cluster_frequencies(const char* path, PlayerCPUCluster* cluster) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + char buffer[256]; + int count = 0; + + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + char* token = strtok(buffer, " \t\n"); + while (token != NULL && count < PLAYER_CPU_MAX_FREQS_PER_CLUSTER) { + int freq = atoi(token); + if (freq > 0) { + cluster->frequencies[count++] = freq; + } + token = strtok(NULL, " \t\n"); + } + } + (void)fclose(fp); + + // Sort frequencies ascending + if (count > 1) { + qsort(cluster->frequencies, count, sizeof(int), compare_int_asc); + } + + cluster->freq_count = count; + return count; +} + +/** + * Parses related_cpus file to get CPU mask and count. + * + * Format can be: "0 1 2 3" or "0-3" or "0-3 5 7-8" + * + * @param path Path to related_cpus file + * @param cpu_mask Output: bitmask of CPUs + * @param cpu_count Output: number of CPUs + * @return 1 on success, 0 on failure + */ +static int parse_related_cpus(const char* path, int* cpu_mask, int* cpu_count) { + FILE* fp = fopen(path, "r"); + if (!fp) + return 0; + + char buffer[128]; + *cpu_mask = 0; + *cpu_count = 0; + + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + char* ptr = buffer; + while (*ptr) { + // Skip whitespace + while (*ptr == ' ' || *ptr == '\t' || *ptr == '\n') + ptr++; + if (!*ptr) + break; + + // Parse number + int start = atoi(ptr); + while (*ptr >= '0' && *ptr <= '9') + ptr++; + + int end = start; + if (*ptr == '-') { + // Range: "0-3" + ptr++; + end = atoi(ptr); + while (*ptr >= '0' && *ptr <= '9') + ptr++; + } + + // Add CPUs to mask + for (int cpu = start; cpu <= end && cpu < 32; cpu++) { + *cpu_mask |= (1 << cpu); + (*cpu_count)++; + } + + // Skip comma if present + if (*ptr == ',') + ptr++; + } + } + (void)fclose(fp); + return (*cpu_count > 0) ? 1 : 0; +} + +int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { + PlayerCPUTopology* topo = topology; // Use typedef for internal code + if (!topo) { + return 0; + } + + // Initialize topology + memset(topo, 0, sizeof(*topo)); + + // Enumerate policies (0, 1, 2, ... up to 15) + // Policies may not be contiguous (e.g., policy0, policy4, policy7) + char path[256]; + int cluster_count = 0; + + for (int policy_id = 0; policy_id < 16 && cluster_count < PLAYER_CPU_MAX_CLUSTERS; + policy_id++) { + (void)snprintf(path, sizeof(path), "%s/policy%d", CPUFREQ_BASE_PATH, policy_id); + + // Check if policy directory exists by trying to read cpuinfo_max_freq + char max_freq_path[256]; + (void)snprintf(max_freq_path, sizeof(max_freq_path), "%s/cpuinfo_max_freq", path); + int max_khz = read_sysfs_int(max_freq_path); + if (max_khz <= 0) { + continue; // Policy doesn't exist + } + + PlayerCPUCluster* cluster = &topo->clusters[cluster_count]; + cluster->policy_id = policy_id; + cluster->max_khz = max_khz; + + // Read min freq + char min_freq_path[256]; + (void)snprintf(min_freq_path, sizeof(min_freq_path), "%s/cpuinfo_min_freq", path); + cluster->min_khz = read_sysfs_int(min_freq_path); + + // Read related_cpus + char cpus_path[256]; + (void)snprintf(cpus_path, sizeof(cpus_path), "%s/related_cpus", path); + if (!parse_related_cpus(cpus_path, &cluster->cpu_mask, &cluster->cpu_count)) { + LOG_warn("PWR_detectCPUTopology: failed to parse related_cpus for policy%d\n", + policy_id); + continue; + } + + // Read available frequencies + char freqs_path[256]; + (void)snprintf(freqs_path, sizeof(freqs_path), "%s/scaling_available_frequencies", path); + read_cluster_frequencies(freqs_path, cluster); + + // If no frequencies available, use min/max as fallback + if (cluster->freq_count == 0 && cluster->min_khz > 0 && cluster->max_khz > 0) { + cluster->frequencies[0] = cluster->min_khz; + cluster->frequencies[1] = (cluster->min_khz + cluster->max_khz) / 2; + cluster->frequencies[2] = cluster->max_khz; + cluster->freq_count = 3; + } + + LOG_debug("PWR_detectCPUTopology: policy%d: cpus=%d (mask=0x%x), %d-%d kHz, %d freqs\n", + policy_id, cluster->cpu_count, cluster->cpu_mask, cluster->min_khz, + cluster->max_khz, cluster->freq_count); + + cluster_count++; + } + + if (cluster_count == 0) { + LOG_info("PWR_detectCPUTopology: no clusters detected\n"); + return 0; + } + + // Sort clusters by max_khz ascending (LITTLE → BIG → PRIME) + if (cluster_count > 1) { + qsort(topo->clusters, cluster_count, sizeof(PlayerCPUCluster), compare_cluster_by_max_khz); + } + + // Classify clusters using shared logic from player_cpu.c + PlayerCPU_classifyClusters(topo->clusters, cluster_count); + + // Log classification results + const char* type_names[] = {"LITTLE", "BIG", "PRIME"}; + for (int i = 0; i < cluster_count; i++) { + PlayerCPUCluster* cluster = &topo->clusters[i]; + LOG_info("PWR_detectCPUTopology: cluster %d (policy%d): %s, %d CPUs, %d-%d kHz\n", i, + cluster->policy_id, type_names[cluster->type], cluster->cpu_count, + cluster->min_khz, cluster->max_khz); + } + + topo->cluster_count = cluster_count; + topo->topology_detected = 1; + + LOG_info("PWR_detectCPUTopology: detected %d cluster(s), multi-cluster=%s\n", cluster_count, + (cluster_count > 1) ? "yes" : "no"); + + return cluster_count; +} + +int PWR_setCPUClusterBounds(int policy_id, int min_khz, int max_khz) { + char path[256]; + int result = 0; + + // Write min_freq if specified + if (min_khz > 0) { + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_min_freq", CPUFREQ_BASE_PATH, + policy_id); + FILE* fp = fopen(path, "w"); + if (fp) { + (void)fprintf(fp, "%d\n", min_khz); + (void)fclose(fp); + } else { + LOG_warn("PWR_setCPUClusterBounds: failed to write min_freq for policy%d\n", policy_id); + result = -1; + } + } + + // Write max_freq if specified + if (max_khz > 0) { + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_max_freq", CPUFREQ_BASE_PATH, + policy_id); + FILE* fp = fopen(path, "w"); + if (fp) { + (void)fprintf(fp, "%d\n", max_khz); + (void)fclose(fp); + } else { + LOG_warn("PWR_setCPUClusterBounds: failed to write max_freq for policy%d\n", policy_id); + result = -1; + } + } + + return result; +} + +int PWR_setCPUGovernor(int policy_id, const char* governor) { + if (!governor) { + return -1; + } + + char path[256]; + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_governor", CPUFREQ_BASE_PATH, + policy_id); + + FILE* fp = fopen(path, "w"); + if (!fp) { + LOG_warn("PWR_setCPUGovernor: failed to open %s\n", path); + return -1; + } + + int written = fprintf(fp, "%s\n", governor); + int close_result = fclose(fp); + + if (written < 0 || close_result != 0) { + LOG_warn("PWR_setCPUGovernor: write failed for policy%d governor %s\n", policy_id, + governor); + return -1; + } + + LOG_debug("PWR_setCPUGovernor: set policy%d governor to %s\n", policy_id, governor); + return 0; +} + +#if defined(__linux__) +#include + +int PWR_setThreadAffinity(int cpu_mask) { + if (cpu_mask <= 0) { + return -1; + } + + cpu_set_t set; + CPU_ZERO(&set); + + for (int cpu = 0; cpu < 32; cpu++) { + if (cpu_mask & (1 << cpu)) { + CPU_SET(cpu, &set); + } + } + + // Set affinity for current thread + if (sched_setaffinity(0, sizeof(set), &set) != 0) { + LOG_warn("PWR_setThreadAffinity: sched_setaffinity failed: %s\n", strerror(errno)); + return -1; + } + + LOG_debug("PWR_setThreadAffinity: set affinity mask to 0x%x\n", cpu_mask); + return 0; +} +#else +// Non-Linux platforms: no-op +int PWR_setThreadAffinity(int cpu_mask) { + (void)cpu_mask; + return 0; +} +#endif + /////////////////////////////// // Platform utility functions /////////////////////////////// diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index de916940..702c1afc 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -1521,6 +1521,61 @@ int PWR_getAvailableCPUFrequencies_sysfs(int* frequencies, int max_count); */ int PWR_setCPUFrequency_sysfs(int freq_khz); +/////////////////////////////// +// Multi-cluster CPU topology support +/////////////////////////////// + +// Forward declarations from player_cpu.h (avoid circular include) +struct PlayerCPUTopology; + +/** + * Detects CPU topology from sysfs. + * + * Enumerates /sys/devices/system/cpu/cpufreq/policy{0,1,...} and reads: + * - related_cpus: Which CPUs belong to this cluster + * - cpuinfo_min_freq / cpuinfo_max_freq: Frequency bounds + * - scaling_available_frequencies: Available frequency steps + * + * Clusters are sorted by max_khz ascending (LITTLE → BIG → PRIME). + * + * @param topology Output structure to populate + * @return Number of clusters found (0 on failure, 1 for single-cluster) + */ +int PWR_detectCPUTopology(struct PlayerCPUTopology* topology); + +/** + * Sets frequency bounds for a CPU cluster. + * + * For multi-cluster mode with schedutil governor, writes to: + * - /sys/devices/system/cpu/cpufreq/policy{N}/scaling_min_freq + * - /sys/devices/system/cpu/cpufreq/policy{N}/scaling_max_freq + * + * @param policy_id Policy number (0, 4, 7, etc.) + * @param min_khz Minimum frequency in kHz + * @param max_khz Maximum frequency in kHz (0 = don't change) + * @return 0 on success, -1 on failure + */ +int PWR_setCPUClusterBounds(int policy_id, int min_khz, int max_khz); + +/** + * Sets CPU governor for a cluster. + * + * @param policy_id Policy number (0, 4, 7, etc.) + * @param governor Governor name ("userspace", "schedutil", etc.) + * @return 0 on success, -1 on failure + */ +int PWR_setCPUGovernor(int policy_id, const char* governor); + +/** + * Sets CPU affinity for the current thread. + * + * Uses pthread_setaffinity_np() to restrict thread to specific CPUs. + * + * @param cpu_mask Bitmask of allowed CPUs (bit 0 = CPU0, bit 1 = CPU1, etc.) + * @return 0 on success, -1 on failure + */ +int PWR_setThreadAffinity(int cpu_mask); + /** * Platform-specific rumble/vibration control. * diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index f8c7d728..99bfcb65 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -843,11 +843,37 @@ static struct Config config = * Thread safety: Uses auto_cpu_mutex to protect shared state. */ static void* auto_cpu_scaling_thread(void* arg) { - LOG_debug("Auto CPU thread: started (granular=%d, freq_count=%d)\n", - auto_cpu_state.use_granular, auto_cpu_state.freq_count); + LOG_debug("Auto CPU thread: started (topology=%d, granular=%d, freq_count=%d)\n", + auto_cpu_state.use_topology, auto_cpu_state.use_granular, auto_cpu_state.freq_count); while (auto_cpu_thread_running) { - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + // Multi-cluster topology mode: apply PerfState changes + pthread_mutex_lock(&auto_cpu_mutex); + int target_state = auto_cpu_state.target_state; + int current_state = auto_cpu_state.current_state; + pthread_mutex_unlock(&auto_cpu_mutex); + + if (target_state != current_state && target_state >= 0 && + target_state < auto_cpu_state.topology.state_count) { + LOG_debug("Auto CPU: applying PerfState %d/%d\n", target_state, + auto_cpu_state.topology.state_count - 1); + + int result = PlayerCPU_applyPerfState(&auto_cpu_state); + if (result != 0) { + LOG_warn("Auto CPU: failed to apply PerfState %d\n", target_state); + } + + // Set pending_affinity under mutex (main thread will apply it) + // This avoids race condition with main thread reading pending_affinity + PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[target_state]; + pthread_mutex_lock(&auto_cpu_mutex); + if (ps->cpu_affinity_mask > 0) { + auto_cpu_state.pending_affinity = ps->cpu_affinity_mask; + } + pthread_mutex_unlock(&auto_cpu_mutex); + } + } else if (auto_cpu_state.use_granular) { // Granular frequency mode pthread_mutex_lock(&auto_cpu_mutex); int target_idx = auto_cpu_state.target_index; @@ -980,6 +1006,23 @@ static void auto_cpu_setTargetIndex(int index) { pthread_mutex_unlock(&auto_cpu_mutex); } +/** + * Requests a PerfState change (non-blocking, topology mode). + * + * @param state Target PerfState index + */ +static void auto_cpu_setTargetState(int state) { + int max_state = auto_cpu_state.topology.state_count - 1; + if (state < 0) + state = 0; + if (state > max_state) + state = max_state; + + pthread_mutex_lock(&auto_cpu_mutex); + auto_cpu_state.target_state = state; + pthread_mutex_unlock(&auto_cpu_mutex); +} + /** * Gets the current frequency index (thread-safe). */ @@ -1022,6 +1065,54 @@ static int auto_cpu_findNearestIndex(int target_khz) { * - PERFORMANCE: 100% (max frequency) */ static void auto_cpu_detectFrequencies(void) { + // First, try topology detection for multi-cluster SoCs + int cluster_count = PWR_detectCPUTopology(&auto_cpu_state.topology); + + if (cluster_count >= 2) { + // Multi-cluster detected - use topology mode + auto_cpu_state.use_topology = 1; + auto_cpu_state.use_granular = 0; + + // Build the PerfState ladder (3 governor levels per cluster tier) + PlayerCPU_buildPerfStates(&auto_cpu_state, &auto_cpu_config); + + // Note: governors are now set by applyPerfState(), not upfront + // This lets each PerfState control its own governor configuration + + LOG_info("Auto CPU: topology mode enabled, %d clusters, %d PerfStates\n", cluster_count, + auto_cpu_state.topology.state_count); + + // Log cluster info + for (int c = 0; c < cluster_count; c++) { + PlayerCPUCluster* cluster = &auto_cpu_state.topology.clusters[c]; + const char* type_str = cluster->type == PLAYER_CPU_CLUSTER_PRIME ? "PRIME" + : cluster->type == PLAYER_CPU_CLUSTER_BIG ? "BIG" + : cluster->type == PLAYER_CPU_CLUSTER_LITTLE ? "LITTLE" + : "?"; + LOG_debug("Auto CPU: cluster %d (policy%d): %s, %d CPUs, %d-%d MHz\n", c, + cluster->policy_id, type_str, cluster->cpu_count, cluster->min_khz / 1000, + cluster->max_khz / 1000); + } + + // Log PerfState ladder (governor-based) + static const char* gov_names[] = {"powersave", "schedutil", "performance"}; + for (int s = 0; s < auto_cpu_state.topology.state_count; s++) { + PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[s]; + LOG_debug("Auto CPU: PerfState %d: cluster %d, affinity=0x%x\n", s, + ps->active_cluster_idx, ps->cpu_affinity_mask); + for (int c = 0; c < cluster_count; c++) { + int gov = ps->cluster_governor[c]; + const char* gov_str = (gov >= 0 && gov <= 2) ? gov_names[gov] : "?"; + LOG_debug(" cluster %d: %s\n", c, gov_str); + } + } + + return; + } + + // Single-cluster or no topology - fall back to traditional mode + auto_cpu_state.use_topology = 0; + int raw_count = PLAT_getAvailableCPUFrequencies(auto_cpu_state.frequencies, CPU_MAX_FREQUENCIES); @@ -1103,9 +1194,16 @@ static void resetAutoCPUState(void) { // Note: target/current frequency set by setOverclock() after this call - LOG_info("Auto CPU: enabled, frame budget=%lluus (%.2f fps), granular=%d\n", - (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, - auto_cpu_state.use_granular); + if (auto_cpu_state.use_topology) { + LOG_info("Auto CPU: enabled (topology mode), frame budget=%lluus (%.2f fps), " + "clusters=%d, states=%d\n", + (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, + auto_cpu_state.topology.cluster_count, auto_cpu_state.topology.state_count); + } else { + LOG_info("Auto CPU: enabled, frame budget=%lluus (%.2f fps), granular=%d\n", + (unsigned long long)auto_cpu_state.frame_budget_us, core.fps, + auto_cpu_state.use_granular); + } LOG_debug( "Auto CPU: util thresholds high=%d%% low=%d%%, windows boost=%d reduce=%d, grace=%d\n", auto_cpu_config.util_high, auto_cpu_config.util_low, auto_cpu_config.boost_windows, @@ -1133,7 +1231,21 @@ void setOverclock(int i) { resetAutoCPUState(); // Start at max frequency to avoid startup stutter during grace period // Background thread will scale down as needed after grace period - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + // Multi-cluster mode: start at highest PerfState + int start_state = auto_cpu_state.topology.state_count - 1; + pthread_mutex_lock(&auto_cpu_mutex); + auto_cpu_state.target_state = start_state; + auto_cpu_state.current_state = -1; // Force apply on first thread iteration + pthread_mutex_unlock(&auto_cpu_mutex); + // Apply initial state immediately (thread will maintain it) + PlayerCPU_applyPerfState(&auto_cpu_state); + // Apply affinity directly since we're on the main (emulation) thread + PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[start_state]; + if (ps->cpu_affinity_mask > 0) { + PWR_setThreadAffinity(ps->cpu_affinity_mask); + } + } else if (auto_cpu_state.use_granular) { int start_idx = auto_cpu_state.preset_indices[2]; // PERFORMANCE - start high int start_freq = auto_cpu_state.frequencies[start_idx]; PLAT_setCPUFrequency(start_freq); @@ -1192,12 +1304,28 @@ static void updateAutoCPU(void) { pthread_mutex_lock(&auto_cpu_mutex); int current_idx = auto_cpu_state.target_index; int current_level = auto_cpu_state.target_level; + int current_state = auto_cpu_state.target_state; + int pending_affinity = auto_cpu_state.pending_affinity; + auto_cpu_state.pending_affinity = 0; // Clear after reading pthread_mutex_unlock(&auto_cpu_mutex); + // Apply pending affinity from background thread (must be done from main thread) + if (pending_affinity > 0) { + PWR_setThreadAffinity(pending_affinity); + } + // Emergency: check for actual underruns (panic path) unsigned underruns = SND_getUnderrunCount(); int max_idx = auto_cpu_state.freq_count - 1; - int at_max = auto_cpu_state.use_granular ? (current_idx >= max_idx) : (current_level >= 2); + int max_state = auto_cpu_state.topology.state_count - 1; + int at_max; + if (auto_cpu_state.use_topology) { + at_max = (current_state >= max_state); + } else if (auto_cpu_state.use_granular) { + at_max = (current_idx >= max_idx); + } else { + at_max = (current_level >= 2); + } if (underruns > auto_cpu_state.last_underrun && !at_max) { // Underrun detected - track panic and boost @@ -1222,7 +1350,14 @@ static void updateAutoCPU(void) { } } - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + int new_state = current_state + auto_cpu_config.panic_step_up; + if (new_state > max_state) + new_state = max_state; + auto_cpu_setTargetState(new_state); + LOG_warn("Auto CPU: PANIC - underrun, boosting state %d→%d (audio=%u%%)\n", + current_state, new_state, audio_fill); + } else if (auto_cpu_state.use_granular) { int new_idx = current_idx + auto_cpu_config.panic_step_up; if (new_idx > max_idx) new_idx = max_idx; @@ -1276,7 +1411,64 @@ static void updateAutoCPU(void) { util = 200; // Cap at 200% for sanity } - if (auto_cpu_state.use_granular) { + if (auto_cpu_state.use_topology) { + // Topology mode: step through PerfStates one at a time + // Unlike granular mode, we don't predict - just step conservatively + + // Decrement panic cooldown each window + if (auto_cpu_state.panic_cooldown > 0) { + auto_cpu_state.panic_cooldown--; + } + + if (util > auto_cpu_config.util_high) { + // Need more performance - step up + auto_cpu_state.high_util_windows++; + auto_cpu_state.low_util_windows = 0; + + if (auto_cpu_state.high_util_windows >= auto_cpu_config.boost_windows && + current_state < max_state) { + int new_state = current_state + 1; + auto_cpu_setTargetState(new_state); + auto_cpu_state.high_util_windows = 0; + LOG_debug("Auto CPU: BOOST state %d→%d (util=%u%%)\n", current_state, new_state, + util); + } + } else if (util < auto_cpu_config.util_low) { + // Can reduce power - step down + auto_cpu_state.low_util_windows++; + auto_cpu_state.high_util_windows = 0; + + // Only reduce if: enough consecutive low windows AND panic cooldown expired + int reduce_ok = + (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && + (auto_cpu_state.panic_cooldown == 0) && (current_state > 0); + + if (reduce_ok) { + // Step down by max_step_down (usually 1) + int new_state = current_state - auto_cpu_config.max_step_down; + if (new_state < 0) + new_state = 0; + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, + new_state, util); + } + } else { + // In sweet spot - reset counters + auto_cpu_state.high_util_windows = 0; + auto_cpu_state.low_util_windows = 0; + } + + // Sampled debug logging (every 4th window = ~2 seconds) + static int debug_window_count_topo = 0; + if (++debug_window_count_topo >= 4) { + debug_window_count_topo = 0; + SND_Snapshot snap = SND_getSnapshot(); + LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% state=%d/%d\n", + snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_state, + max_state); + } + } else if (auto_cpu_state.use_granular) { // Granular mode: use linear performance scaling to find optimal frequency // Performance scales linearly with frequency, so: // new_util = current_util * (current_freq / new_freq) diff --git a/workspace/all/player/player_cpu.c b/workspace/all/player/player_cpu.c index f6eaa6df..cdd72e9f 100644 --- a/workspace/all/player/player_cpu.c +++ b/workspace/all/player/player_cpu.c @@ -194,8 +194,8 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* result->p90_time = 0; } - // Skip if scaling is disabled (0 or 1 frequency available) - if (state->scaling_disabled) { + // Skip if scaling is disabled (0 or 1 frequency available) AND not using topology mode + if (state->scaling_disabled && !state->use_topology) { if (result) result->decision = PLAYER_CPU_DECISION_SKIP; return PLAYER_CPU_DECISION_SKIP; @@ -216,20 +216,40 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* return PLAYER_CPU_DECISION_SKIP; } - // Get current indices (target_index is always 0..freq_count-1) + // Get current indices based on mode int current_idx = state->target_index; int current_level = state->target_level; + int current_state_idx = state->target_state; int max_idx = state->freq_count - 1; if (max_idx < 0) max_idx = 0; - - // Check if at max - bool at_max = state->use_granular ? (current_idx >= max_idx) : (current_level >= 2); + int max_state = state->topology.state_count - 1; + if (max_state < 0) + max_state = 0; + + // Check if at max based on mode + bool at_max; + if (state->use_topology) { + at_max = (current_state_idx >= max_state); + } else if (state->use_granular) { + at_max = (current_idx >= max_idx); + } else { + at_max = (current_level >= 2); + } // Emergency: check for underruns (panic path) if (current_underruns > state->last_underrun && !at_max) { // Underrun detected - boost by panic_step_up - if (state->use_granular) { + if (state->use_topology) { + int new_state = current_state_idx + config->panic_step_up; + if (new_state > max_state) + new_state = max_state; + state->target_state = new_state; + if (result) { + result->decision = PLAYER_CPU_DECISION_PANIC; + result->new_index = new_state; // Use new_index for state index + } + } else if (state->use_granular) { int new_idx = current_idx + config->panic_step_up; if (new_idx > max_idx) new_idx = max_idx; @@ -298,7 +318,64 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* PlayerCPUDecision decision = PLAYER_CPU_DECISION_NONE; - if (state->use_granular) { + if (state->use_topology) { + // Topology mode: multi-cluster PerfState scaling + // Decrement panic cooldown + if (state->panic_cooldown > 0) { + state->panic_cooldown--; + } + + if (util > config->util_high) { + // Need more performance + state->high_util_windows++; + state->low_util_windows = 0; + + if (state->high_util_windows >= config->boost_windows && + current_state_idx < max_state) { + // Step up one state at a time (conservative approach for multi-cluster) + int new_state = current_state_idx + 1; + if (new_state > max_state) + new_state = max_state; + + state->target_state = new_state; + state->high_util_windows = 0; + decision = PLAYER_CPU_DECISION_BOOST; + + if (result) { + result->decision = PLAYER_CPU_DECISION_BOOST; + result->new_index = new_state; + } + } + } else if (util < config->util_low) { + // Can reduce power + state->low_util_windows++; + state->high_util_windows = 0; + + // Only reduce if enough windows AND panic cooldown expired + bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && + (state->panic_cooldown == 0) && (current_state_idx > 0); + + if (reduce_ok) { + // Step down one state at a time + int new_state = current_state_idx - config->max_step_down; + if (new_state < 0) + new_state = 0; + + state->target_state = new_state; + state->low_util_windows = 0; + decision = PLAYER_CPU_DECISION_REDUCE; + + if (result) { + result->decision = PLAYER_CPU_DECISION_REDUCE; + result->new_index = new_state; + } + } + } else { + // In sweet spot - reset counters + state->high_util_windows = 0; + state->low_util_windows = 0; + } + } else if (state->use_granular) { // Granular mode: linear frequency scaling int current_freq = state->frequencies[current_idx]; @@ -420,3 +497,274 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* return decision; } + +/////////////////////////////// +// Multi-cluster topology functions +/////////////////////////////// + +// Forward declaration for PWR functions (defined in api.c) +extern int PWR_setCPUGovernor(int policy_id, const char* governor); +extern int PWR_setThreadAffinity(int cpu_mask); + +/** + * Returns the governor string for a given governor type. + */ +static const char* governor_name(PlayerCPUGovernor gov) { + switch (gov) { + case PLAYER_CPU_GOV_POWERSAVE: + return "powersave"; + case PLAYER_CPU_GOV_SCHEDUTIL: + return "schedutil"; + case PLAYER_CPU_GOV_PERFORMANCE: + return "performance"; + default: + return "schedutil"; + } +} + +void PlayerCPU_initTopology(PlayerCPUTopology* topology) { + memset(topology, 0, sizeof(PlayerCPUTopology)); +} + +int PlayerCPU_parseCPUList(const char* str, int* cpu_count) { + if (!str || !cpu_count) { + if (cpu_count) + *cpu_count = 0; + return 0; + } + + int mask = 0; + *cpu_count = 0; + + const char* ptr = str; + while (*ptr) { + // Skip whitespace and commas + while (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == ',') + ptr++; + if (!*ptr) + break; + + // Parse number + int start = 0; + while (*ptr >= '0' && *ptr <= '9') { + start = start * 10 + (*ptr - '0'); + ptr++; + } + + int end = start; + if (*ptr == '-') { + // Range: "0-3" + ptr++; + end = 0; + while (*ptr >= '0' && *ptr <= '9') { + end = end * 10 + (*ptr - '0'); + ptr++; + } + } + + // Add CPUs to mask + for (int cpu = start; cpu <= end && cpu < 32; cpu++) { + if (!(mask & (1 << cpu))) { + mask |= (1 << cpu); + (*cpu_count)++; + } + } + } + + return mask; +} + +void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count) { + if (!clusters || count <= 0) + return; + + for (int i = 0; i < count; i++) { + PlayerCPUCluster* cluster = &clusters[i]; + + if (i == 0) { + // First cluster (lowest max_khz) is always LITTLE + cluster->type = PLAYER_CPU_CLUSTER_LITTLE; + } else if (i == count - 1) { + // Last cluster might be PRIME if single CPU or significantly faster + int prev_max = clusters[i - 1].max_khz; + int freq_gap_percent = 0; + if (prev_max > 0) { + freq_gap_percent = ((cluster->max_khz - prev_max) * 100) / prev_max; + } + + if (cluster->cpu_count == 1 || freq_gap_percent > 10) { + cluster->type = PLAYER_CPU_CLUSTER_PRIME; + } else { + cluster->type = PLAYER_CPU_CLUSTER_BIG; + } + } else { + // Middle clusters are BIG + cluster->type = PLAYER_CPU_CLUSTER_BIG; + } + } +} + +void PlayerCPU_pickRepresentativeFreqs(const PlayerCPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz) { + if (!cluster || cluster->freq_count <= 0) { + if (low_khz) + *low_khz = 0; + if (mid_khz) + *mid_khz = 0; + if (high_khz) + *high_khz = 0; + return; + } + + // Low: first frequency + if (low_khz) { + *low_khz = cluster->frequencies[0]; + } + + // Mid: middle frequency + if (mid_khz) { + int mid_idx = cluster->freq_count / 2; + *mid_khz = cluster->frequencies[mid_idx]; + } + + // High: last frequency + if (high_khz) { + *high_khz = cluster->frequencies[cluster->freq_count - 1]; + } +} + +/** + * Builds a single PerfState entry using governors instead of frequency bounds. + * + * @param state PerfState to populate + * @param cluster_count Number of clusters in topology + * @param active_cluster_idx Index of the active cluster for this state + * @param clusters Array of cluster info + * @param governor_level 0=powersave, 1=schedutil, 2=performance for active cluster + */ +static void build_perf_state(PlayerCPUPerfState* state, int cluster_count, int active_cluster_idx, + const PlayerCPUCluster* clusters, int governor_level) { + memset(state, 0, sizeof(*state)); + + state->active_cluster_idx = active_cluster_idx; + state->cpu_affinity_mask = 0; + + // Set governors for all clusters + for (int i = 0; i < cluster_count && i < PLAYER_CPU_MAX_CLUSTERS; i++) { + const PlayerCPUCluster* cluster = &clusters[i]; + + if (i == active_cluster_idx) { + // Active cluster: use the specified governor level + switch (governor_level) { + case 0: + state->cluster_governor[i] = PLAYER_CPU_GOV_POWERSAVE; + break; + case 1: + state->cluster_governor[i] = PLAYER_CPU_GOV_SCHEDUTIL; + break; + case 2: + default: + state->cluster_governor[i] = PLAYER_CPU_GOV_PERFORMANCE; + break; + } + // Add active cluster to affinity + state->cpu_affinity_mask |= cluster->cpu_mask; + } else { + // Inactive clusters: powersave (let them idle/sleep) + state->cluster_governor[i] = PLAYER_CPU_GOV_POWERSAVE; + } + } +} + +void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* config) { + (void)config; // Reserved for future configuration + + PlayerCPUTopology* topo = &state->topology; + + if (!topo->topology_detected || topo->cluster_count <= 1) { + // Single-cluster or no topology: don't use PerfState mode + topo->state_count = 0; + state->use_topology = 0; + return; + } + + int cluster_count = topo->cluster_count; + int state_idx = 0; + + // Build states for each cluster tier using governors + // Structure: 3 governor levels per cluster (powersave/schedutil/performance) + // + // Dual-cluster (LITTLE + BIG): + // 0: LITTLE powersave, BIG powersave - lightest workloads + // 1: LITTLE schedutil, BIG powersave - light workloads (kernel finds sweet spot) + // 2: LITTLE performance, BIG powersave - moderate workloads + // 3: BIG powersave, LITTLE powersave - heavier workloads (conserve power) + // 4: BIG schedutil, LITTLE powersave - heavy workloads (kernel scales) + // 5: BIG performance, LITTLE powersave - demanding workloads + // + // Tri-cluster adds 3 more states for PRIME (6-8) + + for (int cluster_idx = 0; cluster_idx < cluster_count && state_idx < PLAYER_CPU_MAX_PERF_STATES; + cluster_idx++) { + // 3 governor levels per cluster + for (int gov_level = 0; gov_level < 3 && state_idx < PLAYER_CPU_MAX_PERF_STATES; + gov_level++) { + PlayerCPUPerfState* ps = &topo->states[state_idx]; + build_perf_state(ps, cluster_count, cluster_idx, topo->clusters, gov_level); + + // For PRIME cluster, include BIG in affinity (allow scheduler some flexibility) + if (cluster_idx == cluster_count - 1 && cluster_count >= 3 && + topo->clusters[cluster_idx].type == PLAYER_CPU_CLUSTER_PRIME) { + // Add BIG cluster(s) to affinity + for (int i = 1; i < cluster_idx; i++) { + if (topo->clusters[i].type == PLAYER_CPU_CLUSTER_BIG) { + ps->cpu_affinity_mask |= topo->clusters[i].cpu_mask; + } + } + } + + state_idx++; + } + } + + topo->state_count = state_idx; + state->use_topology = 1; + state->target_state = state_idx - 1; // Start at highest (performance on fastest cluster) + state->current_state = -1; // Not yet applied +} + +int PlayerCPU_applyPerfState(PlayerCPUState* state) { + PlayerCPUTopology* topo = &state->topology; + + if (!state->use_topology || topo->state_count <= 0) { + return -1; + } + + int target = state->target_state; + if (target < 0) + target = 0; + if (target >= topo->state_count) + target = topo->state_count - 1; + + PlayerCPUPerfState* ps = &topo->states[target]; + int result = 0; + + // Apply governors to each cluster + for (int i = 0; i < topo->cluster_count; i++) { + int policy_id = topo->clusters[i].policy_id; + const char* gov = governor_name(ps->cluster_governor[i]); + + if (PWR_setCPUGovernor(policy_id, gov) != 0) { + result = -1; + } + } + + // Note: pending_affinity is NOT set here to avoid race conditions. + // The caller is responsible for setting pending_affinity under mutex + // after this function returns. See auto_cpu_scaling_thread(). + + // Update current state + state->current_state = target; + + return result; +} diff --git a/workspace/all/player/player_cpu.h b/workspace/all/player/player_cpu.h index 7b7f1583..50d2065e 100644 --- a/workspace/all/player/player_cpu.h +++ b/workspace/all/player/player_cpu.h @@ -5,9 +5,18 @@ * performance. Uses frame timing (core.run() execution time) to determine * optimal CPU frequency. * - * Two modes are supported: - * - Granular mode: Uses all available CPU frequencies (linear scaling) - * - Fallback mode: Uses 3 fixed levels (powersave/normal/performance) + * Three modes are supported: + * - Topology mode: Multi-cluster SoCs (big.LITTLE, etc.) using PerfState ladder + * - Granular mode: Single-cluster with all available frequencies (linear scaling) + * - Fallback mode: 3 fixed levels (powersave/normal/performance) + * + * Topology mode: + * - Detects CPU clusters via sysfs and builds a performance state ladder + * - Uses GOVERNORS (powersave/schedutil/performance) rather than frequency bounds + * - Works WITH the kernel's frequency scaling instead of fighting it + * - Creates a gradient: 3 states per cluster tier (powersave/schedutil/performance) + * - Progresses: LITTLE tier → BIG tier → PRIME tier (if available) + * - Uses CPU affinity to guide which cluster the emulation thread runs on * * Designed for testability with injectable state and callbacks. * Extracted from player.c. @@ -45,6 +54,13 @@ #define PLAYER_CPU_DEFAULT_PANIC_STEP_UP 2 // Frequency steps on panic (underrun) #define PLAYER_CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics +/** + * Multi-cluster topology constants. + */ +#define PLAYER_CPU_MAX_CLUSTERS 8 // Maximum CPU clusters (policies) +#define PLAYER_CPU_MAX_PERF_STATES 16 // Maximum performance states in ladder +#define PLAYER_CPU_MAX_FREQS_PER_CLUSTER 16 // Maximum frequencies per cluster + /** * Preset level indices. */ @@ -54,6 +70,75 @@ typedef enum { PLAYER_CPU_LEVEL_PERFORMANCE = 2 } PlayerCPULevel; +/** + * Cluster type classification based on relative performance. + * Determined by sorting clusters by max_khz and analyzing the distribution. + */ +typedef enum { + PLAYER_CPU_CLUSTER_LITTLE = 0, // Efficiency cores (lowest max_khz) + PLAYER_CPU_CLUSTER_BIG = 1, // Performance cores (middle) + PLAYER_CPU_CLUSTER_PRIME = 2, // Premium core (highest max_khz, often single) +} PlayerCPUClusterType; + +/** + * Governor types for PerfState ladder. + * + * Instead of manipulating frequency bounds, we use governors to create + * a gradient of performance levels within each cluster tier: + * - POWERSAVE: runs at minimum frequency (very efficient) + * - SCHEDUTIL: dynamic scaling based on load (balanced) + * - PERFORMANCE: runs at maximum frequency (full power) + */ +typedef enum { + PLAYER_CPU_GOV_POWERSAVE = 0, // Min frequency - for light workloads + PLAYER_CPU_GOV_SCHEDUTIL = 1, // Dynamic scaling - kernel finds sweet spot + PLAYER_CPU_GOV_PERFORMANCE = 2, // Max frequency - for demanding workloads +} PlayerCPUGovernor; + +/** + * Information about a single CPU cluster (cpufreq policy). + * Each cluster represents a group of CPUs that share a frequency. + */ +typedef struct { + int policy_id; // Policy number (0, 4, 7, etc. from policyN) + int cpu_mask; // Bitmask of CPUs in this cluster + int cpu_count; // Number of CPUs in cluster + int frequencies + [PLAYER_CPU_MAX_FREQS_PER_CLUSTER]; // Available frequencies (kHz, sorted ascending) + int freq_count; // Number of frequencies + int min_khz; // cpuinfo_min_freq + int max_khz; // cpuinfo_max_freq + PlayerCPUClusterType type; // LITTLE/BIG/PRIME classification +} PlayerCPUCluster; + +/** + * A performance state represents one step in the autoscaler's ladder. + * + * Instead of manipulating frequency bounds, each state specifies: + * - Which cluster is "active" (where the emulation thread should run) + * - What governor to use on each cluster + * - CPU affinity to guide the scheduler + * + * This works WITH the kernel's frequency scaling rather than against it. + */ +typedef struct { + PlayerCPUGovernor cluster_governor[PLAYER_CPU_MAX_CLUSTERS]; // Governor per cluster + int cpu_affinity_mask; // Bitmask of CPUs for emulation thread + int active_cluster_idx; // Which cluster is the "active" one +} PlayerCPUPerfState; + +/** + * Complete CPU topology information detected from sysfs. + * Populated by PWR_detectCPUTopology() at initialization. + */ +typedef struct { + PlayerCPUCluster clusters[PLAYER_CPU_MAX_CLUSTERS]; // Detected clusters (sorted by max_khz) + int cluster_count; // Number of clusters detected + PlayerCPUPerfState states[PLAYER_CPU_MAX_PERF_STATES]; // Performance state ladder + int state_count; // Number of states in ladder + int topology_detected; // 1 if detection completed successfully +} PlayerCPUTopology; + /** * Decision type returned by PlayerCPU_update(). */ @@ -119,6 +204,13 @@ typedef struct { // Per-frequency panic tracking (failsafe for problematic frequencies) int panic_count[PLAYER_CPU_MAX_FREQUENCIES]; // Count of panics at each frequency + + // Multi-cluster topology support + PlayerCPUTopology topology; // Detected CPU topology + int target_state; // Target PerfState index (multi-cluster mode) + int current_state; // Currently applied PerfState index + int use_topology; // 1 = multi-cluster mode active + int pending_affinity; // CPU mask to apply from main thread (0 = none pending) } PlayerCPUState; /** @@ -240,4 +332,81 @@ int PlayerCPU_getPresetPercentage(PlayerCPULevel level); */ uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count); +/////////////////////////////// +// Multi-cluster topology functions +/////////////////////////////// + +/** + * Initializes topology structure to empty state. + * + * @param topology Topology to initialize + */ +void PlayerCPU_initTopology(PlayerCPUTopology* topology); + +/** + * Builds the PerfState ladder from detected topology. + * + * Creates a progression of performance states using governors: + * - Single-cluster: No states built (use existing frequency array) + * - Dual-cluster: 6 states (LITTLE powersave/schedutil/performance, + * BIG powersave/schedutil/performance) + * - Tri-cluster: 9 states (add PRIME powersave/schedutil/performance) + * + * Each state sets: + * - Active cluster's governor (powersave/schedutil/performance) + * - Inactive clusters to powersave (let them idle) + * - CPU affinity to guide emulation thread to active cluster + * + * @param state CPU state with populated topology.clusters + * @param config Configuration + */ +void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* config); + +/** + * Applies a PerfState by setting cluster governors and thread affinity. + * + * Called by background thread when target_state != current_state. + * Sets governors on all clusters and queues affinity change for main thread. + * + * @param state CPU state with target_state set + * @return 0 on success, -1 on failure + */ +int PlayerCPU_applyPerfState(PlayerCPUState* state); + +/** + * Parses a CPU list string (e.g., "0-3" or "0 1 2 3") into a bitmask. + * + * @param str CPU list string from sysfs (e.g., "0-3,5,7-8") + * @param cpu_count Output: number of CPUs in the list + * @return Bitmask of CPUs + */ +int PlayerCPU_parseCPUList(const char* str, int* cpu_count); + +/** + * Classifies clusters based on their relative performance. + * + * After clusters are sorted by max_khz, this assigns LITTLE/BIG/PRIME types: + * - clusters[0] = LITTLE + * - clusters[N-1] = PRIME if single CPU or >10% faster than next + * - Middle clusters = BIG + * + * @param clusters Array of clusters (must be sorted by max_khz ascending) + * @param count Number of clusters + */ +void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count); + +/** + * Picks 3 representative frequencies from a cluster's available frequencies. + * + * Selects low (min), mid (middle), and high (max) frequencies for building + * the PerfState ladder. + * + * @param cluster Cluster with populated frequencies + * @param low_khz Output: low frequency (freqs[0]) + * @param mid_khz Output: mid frequency (freqs[count/2]) + * @param high_khz Output: high frequency (freqs[count-1]) + */ +void PlayerCPU_pickRepresentativeFreqs(const PlayerCPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz); + #endif // __PLAYER_CPU_H__ From f72126ca350ce4fb815914d3a1e32c2fb58118ea Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Mon, 5 Jan 2026 14:31:10 -0800 Subject: [PATCH 02/11] Add unified CPU scaling API and fix debug HUD for topology mode. Introduces two helper functions that abstract over the three CPU scaling modes (topology/granular/fallback): - PlayerCPU_getPerformancePercent(): returns 0-100% normalized level - PlayerCPU_getModeName(): returns mode string for logging/debugging Updates the debug HUD to properly display topology mode state. Previously it fell through to fallback mode display which showed incorrect info. New format: "T3/5 60% u:75% b:80%" showing state index, max state, performance %, utilization %, and buffer fill %. --- tests/unit/all/player/test_player_cpu.c | 109 ++++++++++++++++++++++++ workspace/all/player/player.c | 22 ++++- workspace/all/player/player_cpu.c | 46 ++++++++++ workspace/all/player/player_cpu.h | 21 +++++ 4 files changed, 195 insertions(+), 3 deletions(-) diff --git a/tests/unit/all/player/test_player_cpu.c b/tests/unit/all/player/test_player_cpu.c index 749b3123..d7a0bf1e 100644 --- a/tests/unit/all/player/test_player_cpu.c +++ b/tests/unit/all/player/test_player_cpu.c @@ -58,6 +58,9 @@ static void reset_stubs(void) { static PlayerCPUState state; static PlayerCPUConfig config; +// Forward declaration for helper function (defined later with topology tests) +static void setup_dual_cluster_topology(PlayerCPUState* s); + /////////////////////////////// // Test Setup/Teardown /////////////////////////////// @@ -353,6 +356,98 @@ void test_getPresetPercentage_performance(void) { TEST_ASSERT_EQUAL(100, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_PERFORMANCE)); } +/////////////////////////////// +// Unified Performance Level Tests +/////////////////////////////// + +void test_getPerformancePercent_topology_mode(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + + // At state 0 of 5 (0%) + state.current_state = 0; + TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + + // At state 3 of 5 (60%) + state.current_state = 3; + TEST_ASSERT_EQUAL(60, PlayerCPU_getPerformancePercent(&state)); + + // At state 5 of 5 (100%) + state.current_state = 5; + TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_granular_mode(void) { + int raw[] = {600000, 800000, 1000000, 1200000, 1400000}; + PlayerCPU_detectFrequencies(&state, &config, raw, 5); + + // At index 0 of 4 (0%) + state.current_index = 0; + TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + + // At index 2 of 4 (50%) + state.current_index = 2; + TEST_ASSERT_EQUAL(50, PlayerCPU_getPerformancePercent(&state)); + + // At index 4 of 4 (100%) + state.current_index = 4; + TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_fallback_mode(void) { + state.use_topology = 0; + state.use_granular = 0; + state.scaling_disabled = 0; + + state.current_level = 0; + TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + + state.current_level = 1; + TEST_ASSERT_EQUAL(50, PlayerCPU_getPerformancePercent(&state)); + + state.current_level = 2; + TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_disabled_returns_negative(void) { + state.scaling_disabled = 1; + state.use_topology = 0; + TEST_ASSERT_EQUAL(-1, PlayerCPU_getPerformancePercent(&state)); +} + +void test_getPerformancePercent_null_returns_negative(void) { + TEST_ASSERT_EQUAL(-1, PlayerCPU_getPerformancePercent(NULL)); +} + +void test_getModeName_topology(void) { + setup_dual_cluster_topology(&state); + PlayerCPU_buildPerfStates(&state, &config); + TEST_ASSERT_EQUAL_STRING("topology", PlayerCPU_getModeName(&state)); +} + +void test_getModeName_granular(void) { + int raw[] = {600000, 800000, 1000000}; + PlayerCPU_detectFrequencies(&state, &config, raw, 3); + TEST_ASSERT_EQUAL_STRING("granular", PlayerCPU_getModeName(&state)); +} + +void test_getModeName_fallback(void) { + state.use_topology = 0; + state.use_granular = 0; + state.scaling_disabled = 0; + TEST_ASSERT_EQUAL_STRING("fallback", PlayerCPU_getModeName(&state)); +} + +void test_getModeName_disabled(void) { + state.scaling_disabled = 1; + state.use_topology = 0; + TEST_ASSERT_EQUAL_STRING("disabled", PlayerCPU_getModeName(&state)); +} + +void test_getModeName_null(void) { + TEST_ASSERT_EQUAL_STRING("disabled", PlayerCPU_getModeName(NULL)); +} + /////////////////////////////// // update Tests - Skip Conditions /////////////////////////////// @@ -1018,6 +1113,20 @@ int main(void) { RUN_TEST(test_getPresetPercentage_normal); RUN_TEST(test_getPresetPercentage_performance); + // getPerformancePercent (unified) + RUN_TEST(test_getPerformancePercent_topology_mode); + RUN_TEST(test_getPerformancePercent_granular_mode); + RUN_TEST(test_getPerformancePercent_fallback_mode); + RUN_TEST(test_getPerformancePercent_disabled_returns_negative); + RUN_TEST(test_getPerformancePercent_null_returns_negative); + + // getModeName (unified) + RUN_TEST(test_getModeName_topology); + RUN_TEST(test_getModeName_granular); + RUN_TEST(test_getModeName_fallback); + RUN_TEST(test_getModeName_disabled); + RUN_TEST(test_getModeName_null); + // update - skip conditions RUN_TEST(test_update_skips_during_fast_forward); RUN_TEST(test_update_skips_during_menu); diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 99bfcb65..34c33e68 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -3531,6 +3531,15 @@ static const char* bitmap_font[] = { " 1" "1 1" " 111 ", + ['T'] = "11111" + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 " + " 1 ", ['A'] = " 1 " " 1 1 " "1 1" @@ -4066,9 +4075,10 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign // Bottom-left: CPU info + buffer fill (always), plus utilization when auto if (overclock == 3) { - // Auto CPU mode: show frequency/level, utilization, and buffer fill + // Auto CPU mode: show mode-specific info, utilization, and buffer fill pthread_mutex_lock(&auto_cpu_mutex); int current_idx = auto_cpu_state.current_index; + int current_state = auto_cpu_state.current_state; int level = auto_cpu_state.current_level; pthread_mutex_unlock(&auto_cpu_mutex); @@ -4084,8 +4094,14 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign util = 200; } - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < auto_cpu_state.freq_count) { + if (auto_cpu_state.use_topology) { + // Topology mode: show state/max and performance % + int perf_pct = PlayerCPU_getPerformancePercent(&auto_cpu_state); + int max_state = auto_cpu_state.topology.state_count - 1; + (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", + current_state, max_state, perf_pct, util, fill_display); + } else if (auto_cpu_state.use_granular && current_idx >= 0 && + current_idx < auto_cpu_state.freq_count) { // Granular mode: show frequency in MHz (e.g., "1200" for 1200 MHz) int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, diff --git a/workspace/all/player/player_cpu.c b/workspace/all/player/player_cpu.c index cdd72e9f..c3902de3 100644 --- a/workspace/all/player/player_cpu.c +++ b/workspace/all/player/player_cpu.c @@ -182,6 +182,52 @@ int PlayerCPU_getPresetPercentage(PlayerCPULevel level) { } } +int PlayerCPU_getPerformancePercent(const PlayerCPUState* state) { + if (!state) + return -1; + + if (state->scaling_disabled && !state->use_topology) { + return -1; + } + + if (state->use_topology) { + // Topology mode: normalize state index to 0-100 + int max_state = state->topology.state_count - 1; + if (max_state <= 0) + return 100; + int current = state->current_state; + if (current < 0) + current = state->target_state; + return (current * 100) / max_state; + } else if (state->use_granular) { + // Granular mode: normalize frequency index to 0-100 + int max_idx = state->freq_count - 1; + if (max_idx <= 0) + return 100; + return (state->current_index * 100) / max_idx; + } else { + // Fallback mode: 0=0%, 1=50%, 2=100% + return state->current_level * 50; + } +} + +const char* PlayerCPU_getModeName(const PlayerCPUState* state) { + if (!state) + return "disabled"; + + if (state->scaling_disabled && !state->use_topology) { + return "disabled"; + } + + if (state->use_topology) { + return "topology"; + } else if (state->use_granular) { + return "granular"; + } else { + return "fallback"; + } +} + PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* config, bool fast_forward, bool show_menu, unsigned current_underruns, PlayerCPUResult* result) { diff --git a/workspace/all/player/player_cpu.h b/workspace/all/player/player_cpu.h index 50d2065e..c2bcf137 100644 --- a/workspace/all/player/player_cpu.h +++ b/workspace/all/player/player_cpu.h @@ -323,6 +323,27 @@ int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_ut */ int PlayerCPU_getPresetPercentage(PlayerCPULevel level); +/** + * Returns the current performance level as a normalized percentage (0-100). + * + * Provides a unified view of CPU scaling state regardless of mode: + * - Topology mode: (current_state / (state_count - 1)) * 100 + * - Granular mode: (current_index / (freq_count - 1)) * 100 + * - Fallback mode: current_level * 50 (0=0%, 1=50%, 2=100%) + * + * @param state CPU state to query + * @return Performance level 0-100, or -1 if scaling disabled + */ +int PlayerCPU_getPerformancePercent(const PlayerCPUState* state); + +/** + * Returns a string describing the current CPU scaling mode. + * + * @param state CPU state to query + * @return "topology", "granular", "fallback", or "disabled" + */ +const char* PlayerCPU_getModeName(const PlayerCPUState* state); + /** * Calculates the 90th percentile of frame times. * From b67543ce8b6b52a2ad5d931ed4377302e541f5c9 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Mon, 5 Jan 2026 15:25:00 -0800 Subject: [PATCH 03/11] Update CPU scaling docs. --- docs/auto-cpu-scaling.md | 122 ++++++++++++++++++++++++++++++---- workspace/all/player/player.c | 13 +++- 2 files changed, 120 insertions(+), 15 deletions(-) diff --git a/docs/auto-cpu-scaling.md b/docs/auto-cpu-scaling.md index e419db08..38013392 100644 --- a/docs/auto-cpu-scaling.md +++ b/docs/auto-cpu-scaling.md @@ -8,7 +8,7 @@ Dynamic CPU frequency scaling for libretro emulation based on frame timing. Add an "Auto" CPU speed option that dynamically scales between existing power levels (POWERSAVE/NORMAL/PERFORMANCE) based on real-time emulation performance, saving battery when possible and boosting when needed. -**Status:** ✅ Granular frequency scaling implemented. Auto mode now uses all available CPU frequencies detected from the system. +**Status:** ✅ Topology-aware scaling implemented. Supports multi-cluster ARM SoCs (big.LITTLE, tri-cluster) with governor-based PerfState ladder, plus granular frequency scaling for single-cluster devices. ## Design Approach @@ -255,11 +255,17 @@ Auto CPU scaling uses a **two-thread design** to keep the main emulation loop re ### Background Thread (CPU Applier) - Polls every 50ms checking for target changes -- When target ≠ current, applies the change -- Calls `PWR_setCPUSpeed()` which may fork `system("overclock.elf")` -- Updates current level after successful application +- When target ≠ current, applies the change: + - **Topology mode**: Calls `PlayerCPU_applyPerfState()` to set governors on all clusters, queues affinity change for main thread + - **Granular mode**: Calls `PLAT_setCPUFrequency()` to set frequency via sysfs + - **Fallback mode**: Calls `PWR_setCPUSpeed()` which may fork `system("overclock.elf")` +- Updates current level/state after successful application - Stops cleanly when exiting auto mode +**Topology mode thread safety:** + +CPU affinity must be set from the emulation thread (not background thread) because `sched_setaffinity(0, ...)` affects the calling thread. The background thread sets `pending_affinity` under mutex, and the main thread applies it on next frame. + ### Thread Safety ```c @@ -510,9 +516,11 @@ The discovered frequency steps and performance data come from a custom CPU bench - [Dynamic Rate Control for Retro Game Emulators](https://docs.libretro.com/guides/ratecontrol.pdf) - Hans-Kristian Arntzen, 2012 - [docs/audio-rate-control.md](audio-rate-control.md) - Our rate control implementation -- [workspace/all/common/api.c](../workspace/all/common/api.c) - `SND_calculateRateAdjust()`, `PWR_getAvailableCPUFrequencies_sysfs()`, `PWR_setCPUFrequency_sysfs()` +- [workspace/all/common/api.c](../workspace/all/common/api.c) - `SND_calculateRateAdjust()`, `PWR_getAvailableCPUFrequencies_sysfs()`, `PWR_setCPUFrequency_sysfs()`, `PWR_detectCPUTopology()`, `PWR_setCPUGovernor()`, `PWR_setThreadAffinity()` - [workspace/all/common/api.h](../workspace/all/common/api.h) - `PLAT_getAvailableCPUFrequencies()`, `PLAT_setCPUFrequency()` API - [workspace/all/player/player.c](../workspace/all/player/player.c) - Main emulation loop, `updateAutoCPU()`, `auto_cpu_detectFrequencies()` +- [workspace/all/player/player_cpu.c](../workspace/all/player/player_cpu.c) - CPU scaling algorithm, `PlayerCPU_buildPerfStates()`, `PlayerCPU_applyPerfState()`, `PlayerCPU_getPerformancePercent()` +- [workspace/all/player/player_cpu.h](../workspace/all/player/player_cpu.h) - CPU scaling types and API - [workspace/all/paks/Benchmark/](../workspace/all/paks/Benchmark/) - CPU frequency benchmark tool ## Tuning Status @@ -528,7 +536,8 @@ The discovered frequency steps and performance data come from a custom CPU bench | Utilization high | 85% | Frame time >85% of budget = boost | | Utilization low | 55% | Frame time <55% of budget = reduce | | Target util | 70% | Target utilization after frequency change | -| Max step (reduce/panic) | 2 | Max frequency steps down (boost unlimited) | +| Max step down | 1 | Max frequency steps when reducing | +| Panic step up | 2 | Frequency steps on underrun emergency | | Min frequency | 400 MHz | Floor for frequency scaling | | Boost windows | 2 (~1s) | Fast response to performance issues | | Reduce windows | 4 (~2s) | Conservative to prevent oscillation | @@ -574,11 +583,14 @@ The debug overlay uses all 4 corners to show performance and scaling info: - Manual mode: `L1 b:48%` (level + buffer fill) - Auto mode (fallback): `L1 u:52% b:48%` (level + utilization + buffer fill) - Auto mode (granular): `1200 u:52% b:48%` (frequency in MHz + utilization + buffer fill) +- Auto mode (topology): `T3/5 60% u:52% b:48%` (state/max + perf% + utilization + buffer fill) **Key metrics:** - `L0/L1/L2` = CPU level (POWERSAVE/NORMAL/PERFORMANCE) - used in manual and fallback modes - `1200` = CPU frequency in MHz (e.g., 1200 = 1.2 GHz) - used in granular auto mode +- `T3/5` = PerfState index / max (e.g., state 3 of 5) - used in topology auto mode +- `60%` = Normalized performance level (0-100%) - topology mode only - `u:XX%` = Frame timing utilization (90th percentile, % of frame budget) - `b:XX%` = Audio buffer fill (should converge to ~50%) @@ -612,14 +624,81 @@ After implementing the unified RateMeter system with dual clock correction (disp | Rate control | Audio/video sync | Per-frame (~16ms) | Buffer fill | Resampler ratio adjustment | | CPU scaling | Performance headroom | Per-second (~1-2s) | Frame timing | CPU frequency | -### Granular Frequency Scaling (Implemented) +### Multi-Cluster Topology Mode (Implemented) + +Modern ARM SoCs use heterogeneous CPU clusters (big.LITTLE, tri-cluster) where different cores have different performance/power characteristics. Auto mode now detects and leverages this topology. + +**How it works:** + +1. **Detection**: Enumerates `/sys/devices/system/cpu/cpufreq/policy{0,1,...}` at startup +2. **Classification**: Sorts clusters by max frequency, assigns LITTLE/BIG/PRIME types +3. **PerfState Ladder**: Builds a progression of performance states using governors +4. **Application**: Sets governors and CPU affinity to guide the emulation thread + +**Governor-based approach (not frequency bounds):** + +Instead of manipulating `scaling_min_freq`/`scaling_max_freq`, we use governors: + +| Governor | Behavior | Use Case | +| ------------- | ------------------------------------------- | ----------------------- | +| `powersave` | Runs at minimum frequency | Inactive clusters, idle | +| `schedutil` | Kernel dynamically scales based on load | Balanced workloads | +| `performance` | Runs at maximum frequency | Demanding workloads | + +**Why governors instead of frequency bounds:** + +- Works WITH the kernel's frequency scaling intelligence +- `schedutil` finds optimal frequency automatically +- Inactive clusters truly idle at `powersave` (power savings) +- No fighting between our algorithm and the kernel + +**PerfState Ladder Structure:** + +``` +Dual-cluster (LITTLE + BIG): + State 0: LITTLE powersave (active), BIG powersave ← lightest + State 1: LITTLE schedutil (active), BIG powersave + State 2: LITTLE performance (active), BIG powersave + State 3: BIG powersave (active), LITTLE powersave + State 4: BIG schedutil (active), LITTLE powersave + State 5: BIG performance (active), LITTLE powersave ← heaviest + +Tri-cluster adds 3 more states for PRIME (6-8) +``` + +**CPU Affinity:** -Auto mode now uses **all available CPU frequencies** detected from the system via `scaling_available_frequencies` sysfs interface. +Each PerfState sets CPU affinity to guide the emulation thread to the active cluster: + +```c +// State 0-2: Run on LITTLE cores (mask 0x0F for CPUs 0-3) +// State 3-5: Run on BIG cores (mask 0xF0 for CPUs 4-7) +sched_setaffinity(0, sizeof(set), &set); +``` + +**Cluster Classification:** + +- `LITTLE`: First cluster (lowest max frequency) +- `BIG`: Middle clusters +- `PRIME`: Last cluster if single-core OR >10% faster than previous + +**Example SoC configurations:** + +| SoC | Clusters | PerfStates | +| ------------- | ------------------------- | ---------- | +| Allwinner A53 | 4×A53 (single) | 0 (granular mode) | +| Allwinner H700| 4×A53 (single) | 0 (granular mode) | +| Allwinner A523| 4×A55 + 4×A76 | 6 | +| SD865 | 4×A55 + 3×A77 + 1×A77 | 9 | + +### Granular Frequency Scaling (Single-Cluster Fallback) + +For single-cluster devices, auto mode uses **all available CPU frequencies** detected from the system via `scaling_available_frequencies` sysfs interface. **Key features:** - Runtime frequency detection via `PLAT_getAvailableCPUFrequencies()` -- Direct frequency setting via `PLAT_setCPUFrequency()` +- Direct frequency setting via `PLAT_setCPUFrequency()` with `userspace` governor - Linear performance scaling for intelligent frequency selection - Minimum frequency floor (400 MHz) filters out unusably slow frequencies - Automatic fallback to 3-level mode if detection fails @@ -629,14 +708,14 @@ Auto mode now uses **all available CPU frequencies** detected from the system vi - Performance scales linearly with frequency: `new_util = current_util × (current_freq / new_freq)` - Target 70% utilization after frequency changes - **Boost**: Uses linear prediction, no step limit (aggressive is safe) -- **Reduce**: Uses linear prediction, max 2 steps (conservative to avoid underruns) +- **Reduce**: Uses linear prediction, max 1 step (conservative to avoid underruns) - **Panic**: Boost by max 2 steps on underrun, 4s cooldown - **Startup**: Begin at max frequency during 5s grace period **Preset mapping for manual modes:** -- POWERSAVE: ~25% up from minimum frequency -- NORMAL: ~75% of max frequency +- POWERSAVE: ~55% of max frequency +- NORMAL: ~80% of max frequency - PERFORMANCE: max frequency **Example on miyoomini (6 frequencies detected: 400, 600, 800, 1000, 1100, 1600 kHz):** @@ -646,6 +725,25 @@ Old: POWERSAVE → NORMAL → PERFORMANCE (3 steps) New: 400 → 600 → 800 → 1000 → 1100 → 1600 (6 steps, granular) ``` +### Unified API + +Helper functions provide a consistent interface regardless of scaling mode: + +```c +// Get normalized performance level (0-100%) +int PlayerCPU_getPerformancePercent(const PlayerCPUState* state); +// - Topology: (current_state / max_state) * 100 +// - Granular: (current_index / max_index) * 100 +// - Fallback: level * 50 (0=0%, 1=50%, 2=100%) +// - Returns -1 if scaling disabled + +// Get mode name for logging/debugging +const char* PlayerCPU_getModeName(const PlayerCPUState* state); +// - Returns: "topology", "granular", "fallback", or "disabled" +``` + +These functions enable mode-agnostic debugging, logging, and potential future UI elements. + ### Frequency Band Analysis Comprehensive analysis of benchmark data from all platforms revealed optimization opportunities. diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 34c33e68..c0465c66 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -3761,9 +3761,10 @@ static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { // Bottom-left: CPU info + buffer fill if (overclock == 3) { - // Auto CPU mode: show frequency/level, utilization, and buffer fill + // Auto CPU mode: show mode-specific info, utilization, and buffer fill pthread_mutex_lock(&auto_cpu_mutex); int current_idx = auto_cpu_state.current_index; + int current_state = auto_cpu_state.current_state; int level = auto_cpu_state.current_level; pthread_mutex_unlock(&auto_cpu_mutex); @@ -3779,8 +3780,14 @@ static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { util = 200; } - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < auto_cpu_state.freq_count) { + if (auto_cpu_state.use_topology) { + // Topology mode: show state/max and performance % + int perf_pct = PlayerCPU_getPerformancePercent(&auto_cpu_state); + int max_state = auto_cpu_state.topology.state_count - 1; + (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", + current_state, max_state, perf_pct, util, fill_display); + } else if (auto_cpu_state.use_granular && current_idx >= 0 && + current_idx < auto_cpu_state.freq_count) { // Granular mode: show frequency in MHz int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, From ce868ebfb3831bd780d21b8b3955215f8e2ab9cb Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Mon, 5 Jan 2026 16:15:29 -0800 Subject: [PATCH 04/11] Add PWR_setLowPowerMode() for multi-cluster CPU power management. On multi-cluster ARM SoCs (tg5050, retroid), the existing PWR_setCPUSpeed() calls were no-ops, leaving CPUs at full power during menu browsing and in tools. PWR_setLowPowerMode() enumerates all cpufreq policies and sets each to "powersave" governor. On single-cluster devices, it uses PLAT_setCPUSpeed() instead. --- docs/auto-cpu-scaling.md | 58 +++++++-------- workspace/all/common/api.c | 81 ++++++++++++++++++--- workspace/all/common/api.h | 12 +++ workspace/all/launcher/launcher.c | 2 +- workspace/all/paks/Tools/Clock/src/clock.c | 2 +- workspace/all/paks/Tools/Input/src/minput.c | 2 +- workspace/all/player/player_cpu.h | 2 +- 7 files changed, 116 insertions(+), 43 deletions(-) diff --git a/docs/auto-cpu-scaling.md b/docs/auto-cpu-scaling.md index 38013392..4dd76e74 100644 --- a/docs/auto-cpu-scaling.md +++ b/docs/auto-cpu-scaling.md @@ -525,24 +525,24 @@ The discovered frequency steps and performance data come from a custom CPU bench ## Tuning Status -| Parameter | Current | Notes | -| ----------------------- | ------------------- | ------------------------------------------------- | -| Rate control d | 1.0% | Proportional gain - handles frame-to-frame jitter | -| Rate control ki | 0.00005 | Integral gain - learns persistent clock offset | -| Error smoothing α | 0.003 (~333 frames) | Separates P and I timescales | -| Integral clamp | ±2% | Max drift correction (handles hardware variance) | -| Audio buffer | 5 frames (~83ms) | Effective latency ~42ms at 50% fill | -| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | -| Utilization high | 85% | Frame time >85% of budget = boost | -| Utilization low | 55% | Frame time <55% of budget = reduce | -| Target util | 70% | Target utilization after frequency change | -| Max step down | 1 | Max frequency steps when reducing | -| Panic step up | 2 | Frequency steps on underrun emergency | -| Min frequency | 400 MHz | Floor for frequency scaling | -| Boost windows | 2 (~1s) | Fast response to performance issues | -| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | -| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | -| Percentile | 90th | Ignores outliers (loading screens) | +| Parameter | Current | Notes | +| ----------------- | ------------------- | ------------------------------------------------- | +| Rate control d | 1.0% | Proportional gain - handles frame-to-frame jitter | +| Rate control ki | 0.00005 | Integral gain - learns persistent clock offset | +| Error smoothing α | 0.003 (~333 frames) | Separates P and I timescales | +| Integral clamp | ±2% | Max drift correction (handles hardware variance) | +| Audio buffer | 5 frames (~83ms) | Effective latency ~42ms at 50% fill | +| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | +| Utilization high | 85% | Frame time >85% of budget = boost | +| Utilization low | 55% | Frame time <55% of budget = reduce | +| Target util | 70% | Target utilization after frequency change | +| Max step down | 1 | Max frequency steps when reducing | +| Panic step up | 2 | Frequency steps on underrun emergency | +| Min frequency | 400 MHz | Floor for frequency scaling | +| Boost windows | 2 (~1s) | Fast response to performance issues | +| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | +| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | +| Percentile | 90th | Ignores outliers (loading screens) | ### Display Rate Handling @@ -639,11 +639,11 @@ Modern ARM SoCs use heterogeneous CPU clusters (big.LITTLE, tri-cluster) where d Instead of manipulating `scaling_min_freq`/`scaling_max_freq`, we use governors: -| Governor | Behavior | Use Case | -| ------------- | ------------------------------------------- | ----------------------- | -| `powersave` | Runs at minimum frequency | Inactive clusters, idle | -| `schedutil` | Kernel dynamically scales based on load | Balanced workloads | -| `performance` | Runs at maximum frequency | Demanding workloads | +| Governor | Behavior | Use Case | +| ------------- | --------------------------------------- | ----------------------- | +| `powersave` | Runs at minimum frequency | Inactive clusters, idle | +| `schedutil` | Kernel dynamically scales based on load | Balanced workloads | +| `performance` | Runs at maximum frequency | Demanding workloads | **Why governors instead of frequency bounds:** @@ -684,12 +684,12 @@ sched_setaffinity(0, sizeof(set), &set); **Example SoC configurations:** -| SoC | Clusters | PerfStates | -| ------------- | ------------------------- | ---------- | -| Allwinner A53 | 4×A53 (single) | 0 (granular mode) | -| Allwinner H700| 4×A53 (single) | 0 (granular mode) | -| Allwinner A523| 4×A55 + 4×A76 | 6 | -| SD865 | 4×A55 + 3×A77 + 1×A77 | 9 | +| SoC | Clusters | PerfStates | +| -------------- | --------------------- | ----------------- | +| Allwinner A53 | 4×A53 (single) | 0 (granular mode) | +| Allwinner H700 | 4×A53 (single) | 0 (granular mode) | +| Allwinner A523 | 4×A55 + 4×A76 | 6 | +| SD865 | 4×A55 + 3×A77 + 1×A77 | 9 | ### Granular Frequency Scaling (Single-Cluster Fallback) diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index a443dd03..95eb026e 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -3350,6 +3350,38 @@ static int compare_cluster_by_max_khz(const void* a, const void* b) { return ca->max_khz - cb->max_khz; } +/** + * Classifies clusters based on their relative performance. + * Static helper for PWR_detectCPUTopology(). + * (Public version available in player_cpu.c for testing) + */ +static void classify_clusters(PlayerCPUCluster* clusters, int count) { + if (!clusters || count <= 0) + return; + + for (int i = 0; i < count; i++) { + PlayerCPUCluster* cluster = &clusters[i]; + + if (i == 0) { + cluster->type = PLAYER_CPU_CLUSTER_LITTLE; + } else if (i == count - 1) { + int prev_max = clusters[i - 1].max_khz; + int freq_gap_percent = 0; + if (prev_max > 0) { + freq_gap_percent = ((cluster->max_khz - prev_max) * 100) / prev_max; + } + + if (cluster->cpu_count == 1 || freq_gap_percent > 10) { + cluster->type = PLAYER_CPU_CLUSTER_PRIME; + } else { + cluster->type = PLAYER_CPU_CLUSTER_BIG; + } + } else { + cluster->type = PLAYER_CPU_CLUSTER_BIG; + } + } +} + /** * Reads an integer from a sysfs file. * @@ -3463,13 +3495,12 @@ static int parse_related_cpus(const char* path, int* cpu_mask, int* cpu_count) { } int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { - PlayerCPUTopology* topo = topology; // Use typedef for internal code - if (!topo) { + if (!topology) { return 0; } // Initialize topology - memset(topo, 0, sizeof(*topo)); + memset(topology, 0, sizeof(*topology)); // Enumerate policies (0, 1, 2, ... up to 15) // Policies may not be contiguous (e.g., policy0, policy4, policy7) @@ -3488,7 +3519,7 @@ int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { continue; // Policy doesn't exist } - PlayerCPUCluster* cluster = &topo->clusters[cluster_count]; + PlayerCPUCluster* cluster = &topology->clusters[cluster_count]; cluster->policy_id = policy_id; cluster->max_khz = max_khz; @@ -3533,23 +3564,24 @@ int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { // Sort clusters by max_khz ascending (LITTLE → BIG → PRIME) if (cluster_count > 1) { - qsort(topo->clusters, cluster_count, sizeof(PlayerCPUCluster), compare_cluster_by_max_khz); + qsort(topology->clusters, cluster_count, sizeof(PlayerCPUCluster), + compare_cluster_by_max_khz); } - // Classify clusters using shared logic from player_cpu.c - PlayerCPU_classifyClusters(topo->clusters, cluster_count); + // Classify clusters (LITTLE/BIG/PRIME) + classify_clusters(topology->clusters, cluster_count); // Log classification results const char* type_names[] = {"LITTLE", "BIG", "PRIME"}; for (int i = 0; i < cluster_count; i++) { - PlayerCPUCluster* cluster = &topo->clusters[i]; + PlayerCPUCluster* cluster = &topology->clusters[i]; LOG_info("PWR_detectCPUTopology: cluster %d (policy%d): %s, %d CPUs, %d-%d kHz\n", i, cluster->policy_id, type_names[cluster->type], cluster->cpu_count, cluster->min_khz, cluster->max_khz); } - topo->cluster_count = cluster_count; - topo->topology_detected = 1; + topology->cluster_count = cluster_count; + topology->topology_detected = 1; LOG_info("PWR_detectCPUTopology: detected %d cluster(s), multi-cluster=%s\n", cluster_count, (cluster_count > 1) ? "yes" : "no"); @@ -3620,6 +3652,35 @@ int PWR_setCPUGovernor(int policy_id, const char* governor) { return 0; } +int PWR_setLowPowerMode(void) { + int clusters_configured = 0; + + // Enumerate all cpufreq policies and set to powersave + for (int policy_id = 0; policy_id < 16; policy_id++) { + char path[256]; + (void)snprintf(path, sizeof(path), "%s/policy%d/scaling_governor", CPUFREQ_BASE_PATH, + policy_id); + + // Check if policy exists + if (access(path, F_OK) != 0) + continue; + + // Set to powersave governor + if (PWR_setCPUGovernor(policy_id, "powersave") == 0) { + clusters_configured++; + } + } + + if (clusters_configured > 0) { + LOG_info("PWR_setLowPowerMode: set %d cluster(s) to powersave\n", clusters_configured); + } else { + // Single-cluster device without cpufreq policies + PLAT_setCPUSpeed(CPU_SPEED_POWERSAVE); + } + + return clusters_configured; +} + #if defined(__linux__) #include diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index 702c1afc..5c2f3072 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -1576,6 +1576,18 @@ int PWR_setCPUGovernor(int policy_id, const char* governor); */ int PWR_setThreadAffinity(int cpu_mask); +/** + * Sets all CPU clusters to low-power mode. + * + * On multi-cluster devices: sets all cpufreq policies to "powersave" governor. + * On single-cluster devices: uses PLAT_setCPUSpeed(CPU_SPEED_POWERSAVE). + * + * Use this for non-gaming contexts (menus, tools) to save power and reduce heat. + * + * @return Number of clusters configured (0 for single-cluster devices) + */ +int PWR_setLowPowerMode(void); + /** * Platform-specific rumble/vibration control. * diff --git a/workspace/all/launcher/launcher.c b/workspace/all/launcher/launcher.c index fcfe1a2f..e7b9e814 100644 --- a/workspace/all/launcher/launcher.c +++ b/workspace/all/launcher/launcher.c @@ -1772,7 +1772,7 @@ int main(int argc, char* argv[]) { Menu_init(); // Reduce CPU speed for menu browsing (saves power and heat) - PWR_setCPUSpeed(CPU_SPEED_POWERSAVE); + PWR_setLowPowerMode(); PAD_reset(); int dirty = 1; // Set to 1 when screen needs redraw diff --git a/workspace/all/paks/Tools/Clock/src/clock.c b/workspace/all/paks/Tools/Clock/src/clock.c index 8d200f84..043419ae 100644 --- a/workspace/all/paks/Tools/Clock/src/clock.c +++ b/workspace/all/paks/Tools/Clock/src/clock.c @@ -51,7 +51,7 @@ enum { */ int main(int argc, char* argv[]) { Paths_init(); - PWR_setCPUSpeed(CPU_SPEED_IDLE); + PWR_setLowPowerMode(); SDL_Surface* screen = GFX_init(MODE_MAIN); if (screen == NULL) { diff --git a/workspace/all/paks/Tools/Input/src/minput.c b/workspace/all/paks/Tools/Input/src/minput.c index 78dd7236..4deab503 100644 --- a/workspace/all/paks/Tools/Input/src/minput.c +++ b/workspace/all/paks/Tools/Input/src/minput.c @@ -106,7 +106,7 @@ static void blitButton(char* label, SDL_Surface* dst, int pressed, int x, int y, * @return EXIT_SUCCESS on normal exit */ int main(int argc, char* argv[]) { - PWR_setCPUSpeed(CPU_SPEED_IDLE); + PWR_setLowPowerMode(); SDL_Surface* screen = GFX_init(MODE_MAIN); if (screen == NULL) { diff --git a/workspace/all/player/player_cpu.h b/workspace/all/player/player_cpu.h index c2bcf137..8d7d113c 100644 --- a/workspace/all/player/player_cpu.h +++ b/workspace/all/player/player_cpu.h @@ -131,7 +131,7 @@ typedef struct { * Complete CPU topology information detected from sysfs. * Populated by PWR_detectCPUTopology() at initialization. */ -typedef struct { +typedef struct PlayerCPUTopology { PlayerCPUCluster clusters[PLAYER_CPU_MAX_CLUSTERS]; // Detected clusters (sorted by max_khz) int cluster_count; // Number of clusters detected PlayerCPUPerfState states[PLAYER_CPU_MAX_PERF_STATES]; // Performance state ladder From 5186b6fd09ef0e478b5e6d7476eb76963acf0d97 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Mon, 5 Jan 2026 16:47:56 -0800 Subject: [PATCH 05/11] Refactor player_cpu to common/cpu with CPU_ prefix. Moved player_cpu.c/h to common/cpu.c/h and renamed all PlayerCPU* identifiers to CPU* since the code is now shared infrastructure used by both launcher (for topology detection) and player (for autoscaling). --- CLAUDE.md | 12 +- Makefile.qa | 8 +- docs/auto-cpu-scaling.md | 10 +- docs/changes.md | 2 +- docs/minarch-refactoring.md | 12 +- scripts/run-coverage.sh | 2 +- tests/README.md | 2 +- .../test_player_cpu.c => common/test_cpu.c} | 382 +++++++++--------- tests/unit/all/player/test_player_utils.c | 2 +- workspace/all/common/api.c | 58 +-- workspace/all/common/api.h | 6 +- workspace/all/common/build.mk | 3 +- .../all/{player/player_cpu.c => common/cpu.c} | 225 +++++------ .../all/{player/player_cpu.h => common/cpu.h} | 166 ++++---- workspace/all/launcher/Makefile | 3 +- workspace/all/player/Makefile | 2 +- workspace/all/player/player.c | 50 ++- workspace/all/player/player_context.h | 6 +- .../all/player/player_loop_audioclock.inc | 2 +- .../all/player/player_loop_audioclock.inc.bak | 117 ------ workspace/all/player/player_loop_vsync.inc | 2 +- .../all/player/player_loop_vsync.inc.bak | 133 ------ workspace/all/player/player_utils.c | 2 +- workspace/all/player/player_utils.h | 2 +- 24 files changed, 456 insertions(+), 753 deletions(-) rename tests/unit/all/{player/test_player_cpu.c => common/test_cpu.c} (72%) rename workspace/all/{player/player_cpu.c => common/cpu.c} (72%) rename workspace/all/{player/player_cpu.h => common/cpu.h} (68%) delete mode 100644 workspace/all/player/player_loop_audioclock.inc.bak delete mode 100644 workspace/all/player/player_loop_vsync.inc.bak diff --git a/CLAUDE.md b/CLAUDE.md index b9613eb1..4ad50282 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -430,20 +430,20 @@ All extracted Player modules follow a standardized naming pattern where the modu | player_core | `PlayerCore_` | `PlayerCore_buildGameInfo()`, `PlayerCore_processAVInfo()` | | player_menu | `PlayerMenu_` | `PlayerMenu_init()`, `PlayerMenuNav_navigate()` | | player_env | `PlayerEnv_` | `PlayerEnv_setRotation()`, `PlayerEnv_handleGeometry()` | -| player_cpu | `PlayerCPU_` | `PlayerCPU_update()`, `PlayerCPU_detectFrequencies()` | +| cpu | `CPU_` | `CPU_update()`, `CPU_detectFrequencies()` | | player_game | `PlayerGame_` | `PlayerGame_parseExtensions()`, `PlayerGame_detectM3uPath()` | | player_scaler | `PlayerScaler_` | `PlayerScaler_calculate()` | **Type naming:** Types follow the same pattern with `Player[Module]TypeName`: -- `PlayerCPUState`, `PlayerCPUConfig`, `PlayerCPUDecision` +- `CPUState`, `CPUConfig`, `CPUDecision` - `PlayerOption`, `PlayerOptionList` - `PlayerMemoryResult`, `PlayerStateResult` **Constants:** Module-specific constants use `PLAYER_MODULE_` prefix: -- `PLAYER_CPU_MAX_FREQUENCIES` -- `PLAYER_CPU_DEFAULT_WINDOW_FRAMES` +- `CPU_MAX_FREQUENCIES` +- `CPU_DEFAULT_WINDOW_FRAMES` - `PLAYER_MEM_OK`, `PLAYER_STATE_OK` This standardization makes it immediately clear which module owns each function and prevents naming collisions as the codebase grows. @@ -517,7 +517,7 @@ See `.clang-format` for complete style definition. | Player core AV processing | `workspace/all/player/player_core.c` | | Player memory persistence | `workspace/all/player/player_memory.c` | | Player save states | `workspace/all/player/player_state.c` | -| Player CPU scaling | `workspace/all/player/player_cpu.c` | +| CPU scaling | `workspace/all/common/cpu.c` | | Player input handling | `workspace/all/player/player_input.c` | | Player save paths | `workspace/all/player/player_paths.c` | | Launcher Entry type | `workspace/all/launcher/launcher_entry.c` | @@ -569,7 +569,7 @@ To enable comprehensive testing, complex logic has been extracted from large fil | player_scaler.c | 26 | player.c | Video scaling geometry calculations | | player_core.c | 23 | player.c | Core AV info processing, aspect ratio calculation | | effect_system.c | 43 | platform files | Visual effect state management | -| player_cpu.c | 42 | player.c | Auto CPU scaling algorithm | +| cpu.c | 42 | player.c | CPU topology + auto scaling algorithm | | player_utils.c | 41 | player.c | Core name extraction, string utilities | | player_menu.c | 41 | player.c | In-game menu, context pattern validation | | nointro_parser.c | 39 | (original) | No-Intro ROM naming conventions | diff --git a/Makefile.qa b/Makefile.qa index 4ec03689..9e8740e3 100644 --- a/Makefile.qa +++ b/Makefile.qa @@ -236,7 +236,7 @@ TEST_UNITY = tests/vendor/unity/unity.c PATHS_STUB = tests/support/paths_stub.c # All test executables (built from tests/unit/ and tests/integration/) -TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/player_cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/frame_pacer_test +TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/frame_pacer_test # Default targets: use Docker for consistency test: docker-test @@ -408,9 +408,9 @@ tests/launcher_launcher_test: tests/unit/all/launcher/test_launcher_launcher.c w @echo "Building launcher command tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -# Build auto CPU scaling tests (pure algorithm, no external dependencies) -tests/player_cpu_test: tests/unit/all/player/test_player_cpu.c workspace/all/player/player_cpu.c $(TEST_UNITY) - @echo "Building auto CPU scaling tests..." +# Build CPU scaling tests (pure algorithm, no external dependencies) +tests/cpu_test: tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c $(TEST_UNITY) + @echo "Building CPU scaling tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) # Build frame pacing tests (pure algorithm, no external dependencies) diff --git a/docs/auto-cpu-scaling.md b/docs/auto-cpu-scaling.md index 4dd76e74..715b1332 100644 --- a/docs/auto-cpu-scaling.md +++ b/docs/auto-cpu-scaling.md @@ -256,7 +256,7 @@ Auto CPU scaling uses a **two-thread design** to keep the main emulation loop re - Polls every 50ms checking for target changes - When target ≠ current, applies the change: - - **Topology mode**: Calls `PlayerCPU_applyPerfState()` to set governors on all clusters, queues affinity change for main thread + - **Topology mode**: Calls `CPU_applyPerfState()` to set governors on all clusters, queues affinity change for main thread - **Granular mode**: Calls `PLAT_setCPUFrequency()` to set frequency via sysfs - **Fallback mode**: Calls `PWR_setCPUSpeed()` which may fork `system("overclock.elf")` - Updates current level/state after successful application @@ -519,8 +519,8 @@ The discovered frequency steps and performance data come from a custom CPU bench - [workspace/all/common/api.c](../workspace/all/common/api.c) - `SND_calculateRateAdjust()`, `PWR_getAvailableCPUFrequencies_sysfs()`, `PWR_setCPUFrequency_sysfs()`, `PWR_detectCPUTopology()`, `PWR_setCPUGovernor()`, `PWR_setThreadAffinity()` - [workspace/all/common/api.h](../workspace/all/common/api.h) - `PLAT_getAvailableCPUFrequencies()`, `PLAT_setCPUFrequency()` API - [workspace/all/player/player.c](../workspace/all/player/player.c) - Main emulation loop, `updateAutoCPU()`, `auto_cpu_detectFrequencies()` -- [workspace/all/player/player_cpu.c](../workspace/all/player/player_cpu.c) - CPU scaling algorithm, `PlayerCPU_buildPerfStates()`, `PlayerCPU_applyPerfState()`, `PlayerCPU_getPerformancePercent()` -- [workspace/all/player/player_cpu.h](../workspace/all/player/player_cpu.h) - CPU scaling types and API +- [workspace/all/common/cpu.c](../workspace/all/common/cpu.c) - CPU scaling algorithm, `CPU_buildPerfStates()`, `CPU_applyPerfState()`, `CPU_getPerformancePercent()` +- [workspace/all/common/cpu.h](../workspace/all/common/cpu.h) - CPU scaling types and API - [workspace/all/paks/Benchmark/](../workspace/all/paks/Benchmark/) - CPU frequency benchmark tool ## Tuning Status @@ -731,14 +731,14 @@ Helper functions provide a consistent interface regardless of scaling mode: ```c // Get normalized performance level (0-100%) -int PlayerCPU_getPerformancePercent(const PlayerCPUState* state); +int CPU_getPerformancePercent(const CPUState* state); // - Topology: (current_state / max_state) * 100 // - Granular: (current_index / max_index) * 100 // - Fallback: level * 50 (0=0%, 1=50%, 2=100%) // - Returns -1 if scaling disabled // Get mode name for logging/debugging -const char* PlayerCPU_getModeName(const PlayerCPUState* state); +const char* CPU_getModeName(const CPUState* state); // - Returns: "topology", "granular", "fallback", or "disabled" ``` diff --git a/docs/changes.md b/docs/changes.md index 7c0e69f8..4438f827 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -56,7 +56,7 @@ The main files `player.c` (peaked at ~7200 lines) and `launcher.c` (peaked at ~2 | player_core | Core AV info processing, aspect ratio | | player_menu | In-game menu system | | player_env | Libretro environment callback handlers | -| player_cpu | Auto CPU scaling algorithm | +| cpu (common) | Auto CPU scaling algorithm | | player_game | ZIP parsing, extension matching, M3U detection | | player_scaler | Video scaling geometry calculations | | player_archive | 7z/ZIP archive extraction | diff --git a/docs/minarch-refactoring.md b/docs/minarch-refactoring.md index a9b6458d..34ecc7b9 100644 --- a/docs/minarch-refactoring.md +++ b/docs/minarch-refactoring.md @@ -54,7 +54,7 @@ player.c (orchestration, main loop, SDL integration) │ ├── player_core.c ─ AV info processing, aspect ratio │ ├── player_env.c ─ Libretro environment callbacks │ ├── player_game.c ─ Game file handling, ZIP parsing - │ └── player_cpu.c ─ Auto CPU frequency scaling + │ └── cpu.c (common) ─ Auto CPU frequency scaling │ └── UI ├── player_menu.c ─ In-game menu system @@ -127,7 +127,7 @@ player.c (orchestration, main loop, SDL integration) | `player_core.c` | ~150 | Build game info, calculate aspect ratio, process AV info | 23 | | `player_env.c` | ~400 | Handle 30+ libretro environment callbacks | 51 | | `player_game.c` | ~300 | Extension parsing, ZIP headers, M3U detection | 46 | -| `player_cpu.c` | ~350 | Auto CPU frequency scaling algorithm | 42 | +| `cpu.c` (common)| ~350 | Auto CPU frequency scaling algorithm | 42 | **Key decisions:** @@ -340,7 +340,7 @@ workspace/all/player/ # Emulator frontend ├── player_core.h/c # Core AV processing ├── player_env.h/c # Environment callbacks ├── player_game.h/c # Game file handling -├── player_cpu.h/c # CPU scaling +├── (cpu.h/c in common/) # CPU scaling (moved to common/) ├── player_menu.h/c # Menu system └── player_menu_types.h # Menu types @@ -356,7 +356,7 @@ tests/unit/all/common/ # Unit tests ├── test_player_core.c ├── test_player_env.c ├── test_player_game.c -├── test_player_cpu.c +├── test_cpu.c # (moved to common/) └── test_player_menu.c ``` @@ -476,7 +476,7 @@ Renamed **405 function references** across 8 modules to follow consistent `Playe | player_state | 5 | `Player_readState` → `PlayerState_read` | | player_utils | 3 | `Player_getCoreName` → `PlayerUtils_getCoreName` | | player_zip | 2 | `Player_zipCopy` → `PlayerZip_copy` | -| player_cpu | (earlier) | `AutoCPU_update` → `PlayerCPU_update` | +| cpu (common) | (earlier) | `AutoCPU_update` → `CPU_update` | **Benefits:** @@ -490,7 +490,7 @@ Renamed **405 function references** across 8 modules to follow consistent `Playe - Added comprehensive naming convention guide to CLAUDE.md - Includes table of all module prefixes and example functions -- Documents type naming (`PlayerCPUState`) and constant naming (`PLAYER_CPU_MAX`) +- Documents type naming (`CPUState`) and constant naming (`CPU_MAX_*`) --- diff --git a/scripts/run-coverage.sh b/scripts/run-coverage.sh index d3a51600..63ba48fe 100755 --- a/scripts/run-coverage.sh +++ b/scripts/run-coverage.sh @@ -121,7 +121,7 @@ declare -a TEST_BUILDS=( "player_memory_test:tests/unit/all/player/test_player_memory.c workspace/all/player/player_memory.c tests/support/libretro_mocks.c tests/support/test_temp.c:-D_GNU_SOURCE" "player_state_test:tests/unit/all/player/test_player_state.c workspace/all/player/player_state.c workspace/all/player/player_paths.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c workspace/all/common/log.c tests/support/libretro_mocks.c:-D_DEFAULT_SOURCE" "launcher_launcher_test:tests/unit/all/launcher/test_launcher_launcher.c workspace/all/launcher/launcher_launcher.c:" - "player_cpu_test:tests/unit/all/player/test_player_cpu.c workspace/all/player/player_cpu.c:" + "cpu_test:tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c:" "frame_pacer_test:tests/unit/all/player/test_frame_pacer.c workspace/all/player/frame_pacer.c:-lm" "player_input_test:tests/unit/all/player/test_player_input.c workspace/all/player/player_input.c:" "launcher_state_test:tests/unit/all/launcher/test_launcher_state.c workspace/all/launcher/launcher_state.c workspace/all/common/stb_ds_impl.c:" diff --git a/tests/README.md b/tests/README.md index 3bfa06a8..4aff3d0a 100644 --- a/tests/README.md +++ b/tests/README.md @@ -58,7 +58,7 @@ tests/ │ ├── test_player_utils.c # Player utilities │ ├── test_player_config.c # Config path generation │ ├── test_player_options.c # Option management -│ ├── test_player_cpu.c # CPU scaling algorithm +│ ├── (see common/test_cpu.c) # CPU scaling (now in common/) │ ├── test_player_menu.c # Menu system │ ├── test_player_env.c # Environment callbacks │ └── ... # Other player module tests diff --git a/tests/unit/all/player/test_player_cpu.c b/tests/unit/all/common/test_cpu.c similarity index 72% rename from tests/unit/all/player/test_player_cpu.c rename to tests/unit/all/common/test_cpu.c index d7a0bf1e..d64ac7b2 100644 --- a/tests/unit/all/player/test_player_cpu.c +++ b/tests/unit/all/common/test_cpu.c @@ -1,5 +1,5 @@ /** - * test_player_cpu.c - Unit tests for auto CPU scaling + * test_cpu.c - Unit tests for auto CPU scaling * * Tests the CPU frequency scaling algorithm including: * - Frequency detection and preset calculation @@ -14,12 +14,12 @@ */ #include "unity.h" -#include "player_cpu.h" +#include "cpu.h" #include /////////////////////////////// -// Stubs for API functions called by player_cpu.c +// Stubs for API functions called by cpu.c // These allow unit testing without linking api.c /////////////////////////////// @@ -55,19 +55,19 @@ static void reset_stubs(void) { } // Test state and config -static PlayerCPUState state; -static PlayerCPUConfig config; +static CPUState state; +static CPUConfig config; // Forward declaration for helper function (defined later with topology tests) -static void setup_dual_cluster_topology(PlayerCPUState* s); +static void setup_dual_cluster_topology(CPUState* s); /////////////////////////////// // Test Setup/Teardown /////////////////////////////// void setUp(void) { - PlayerCPU_initState(&state); - PlayerCPU_initConfig(&config); + CPU_initState(&state); + CPU_initConfig(&config); reset_stubs(); } @@ -80,8 +80,8 @@ void tearDown(void) { /////////////////////////////// void test_initConfig_sets_defaults(void) { - PlayerCPUConfig c; - PlayerCPU_initConfig(&c); + CPUConfig c; + CPU_initConfig(&c); // Verify values are sensible (not testing exact defaults) TEST_ASSERT_GREATER_THAN(0, c.window_frames); @@ -99,9 +99,9 @@ void test_initConfig_sets_defaults(void) { } void test_initState_zeros_state(void) { - PlayerCPUState s; + CPUState s; memset(&s, 0xFF, sizeof(s)); // Fill with garbage - PlayerCPU_initState(&s); + CPU_initState(&s); TEST_ASSERT_EQUAL(0, s.freq_count); TEST_ASSERT_EQUAL(0, s.target_index); @@ -115,39 +115,39 @@ void test_initState_zeros_state(void) { /////////////////////////////// void test_findNearestIndex_empty_array(void) { - int result = PlayerCPU_findNearestIndex(NULL, 0, 1000000); + int result = CPU_findNearestIndex(NULL, 0, 1000000); TEST_ASSERT_EQUAL(0, result); } void test_findNearestIndex_exact_match(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - int result = PlayerCPU_findNearestIndex(freqs, 4, 800000); + int result = CPU_findNearestIndex(freqs, 4, 800000); TEST_ASSERT_EQUAL(2, result); } void test_findNearestIndex_nearest_lower(void) { int freqs[] = {400000, 600000, 800000, 1000000}; // 750000 is closer to 800000 than 600000 - int result = PlayerCPU_findNearestIndex(freqs, 4, 750000); + int result = CPU_findNearestIndex(freqs, 4, 750000); TEST_ASSERT_EQUAL(2, result); } void test_findNearestIndex_nearest_higher(void) { int freqs[] = {400000, 600000, 800000, 1000000}; // 650000 is closer to 600000 than 800000 - int result = PlayerCPU_findNearestIndex(freqs, 4, 650000); + int result = CPU_findNearestIndex(freqs, 4, 650000); TEST_ASSERT_EQUAL(1, result); } void test_findNearestIndex_below_min(void) { int freqs[] = {400000, 600000, 800000}; - int result = PlayerCPU_findNearestIndex(freqs, 3, 100000); + int result = CPU_findNearestIndex(freqs, 3, 100000); TEST_ASSERT_EQUAL(0, result); } void test_findNearestIndex_above_max(void) { int freqs[] = {400000, 600000, 800000}; - int result = PlayerCPU_findNearestIndex(freqs, 3, 2000000); + int result = CPU_findNearestIndex(freqs, 3, 2000000); TEST_ASSERT_EQUAL(2, result); } @@ -159,7 +159,7 @@ void test_detectFrequencies_filters_below_minimum(void) { // Set explicit min_freq_khz to test filtering behavior config.min_freq_khz = 400000; int raw[] = {100000, 200000, 300000, 400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 6); + CPU_detectFrequencies(&state, &config, raw, 6); // Should only keep 400000, 600000, 800000 (at or above min_freq_khz) TEST_ASSERT_EQUAL(3, state.freq_count); @@ -170,7 +170,7 @@ void test_detectFrequencies_filters_below_minimum(void) { void test_detectFrequencies_enables_granular_mode(void) { int raw[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 4); + CPU_detectFrequencies(&state, &config, raw, 4); TEST_ASSERT_EQUAL(1, state.use_granular); TEST_ASSERT_EQUAL(1, state.frequencies_detected); @@ -178,7 +178,7 @@ void test_detectFrequencies_enables_granular_mode(void) { void test_detectFrequencies_disables_scaling_with_one_freq(void) { int raw[] = {800000}; // Only one frequency - PlayerCPU_detectFrequencies(&state, &config, raw, 1); + CPU_detectFrequencies(&state, &config, raw, 1); TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled TEST_ASSERT_EQUAL(0, state.use_granular); @@ -187,7 +187,7 @@ void test_detectFrequencies_disables_scaling_with_one_freq(void) { } void test_detectFrequencies_disables_scaling_with_zero_freqs(void) { - PlayerCPU_detectFrequencies(&state, &config, NULL, 0); + CPU_detectFrequencies(&state, &config, NULL, 0); TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Scaling disabled TEST_ASSERT_EQUAL(0, state.use_granular); @@ -197,7 +197,7 @@ void test_detectFrequencies_disables_scaling_with_zero_freqs(void) { void test_detectFrequencies_enables_scaling_with_multiple_freqs(void) { int raw[] = {400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 3); + CPU_detectFrequencies(&state, &config, raw, 3); TEST_ASSERT_EQUAL(0, state.scaling_disabled); // Scaling enabled TEST_ASSERT_EQUAL(1, state.use_granular); @@ -211,11 +211,11 @@ void test_detectFrequencies_calculates_preset_indices(void) { // NORMAL (80%): 800000 -> exact match (index 2) // PERFORMANCE (100%): 1000000 (index 3) int raw[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 4); + CPU_detectFrequencies(&state, &config, raw, 4); - TEST_ASSERT_EQUAL(1, state.preset_indices[PLAYER_CPU_LEVEL_POWERSAVE]); - TEST_ASSERT_EQUAL(2, state.preset_indices[PLAYER_CPU_LEVEL_NORMAL]); - TEST_ASSERT_EQUAL(3, state.preset_indices[PLAYER_CPU_LEVEL_PERFORMANCE]); + TEST_ASSERT_EQUAL(1, state.preset_indices[CPU_LEVEL_POWERSAVE]); + TEST_ASSERT_EQUAL(2, state.preset_indices[CPU_LEVEL_NORMAL]); + TEST_ASSERT_EQUAL(3, state.preset_indices[CPU_LEVEL_PERFORMANCE]); } /////////////////////////////// @@ -228,7 +228,7 @@ void test_reset_clears_monitoring_state(void) { state.low_util_windows = 3; state.panic_cooldown = 8; - PlayerCPU_reset(&state, &config, 60.0, 0); + CPU_reset(&state, &config, 60.0, 0); TEST_ASSERT_EQUAL(0, state.frame_count); TEST_ASSERT_EQUAL(0, state.high_util_windows); @@ -238,22 +238,22 @@ void test_reset_clears_monitoring_state(void) { } void test_reset_calculates_frame_budget_60fps(void) { - PlayerCPU_reset(&state, &config, 60.0, 0); + CPU_reset(&state, &config, 60.0, 0); TEST_ASSERT_EQUAL(16666, state.frame_budget_us); // 1000000/60 } void test_reset_calculates_frame_budget_50fps(void) { - PlayerCPU_reset(&state, &config, 50.0, 0); + CPU_reset(&state, &config, 50.0, 0); TEST_ASSERT_EQUAL(20000, state.frame_budget_us); // 1000000/50 } void test_reset_defaults_to_60fps_on_zero(void) { - PlayerCPU_reset(&state, &config, 0.0, 0); + CPU_reset(&state, &config, 0.0, 0); TEST_ASSERT_EQUAL(16667, state.frame_budget_us); } void test_reset_stores_initial_underruns(void) { - PlayerCPU_reset(&state, &config, 60.0, 42); + CPU_reset(&state, &config, 60.0, 42); TEST_ASSERT_EQUAL(42, state.last_underrun); } @@ -262,9 +262,9 @@ void test_reset_stores_initial_underruns(void) { /////////////////////////////// void test_recordFrameTime_stores_in_ring_buffer(void) { - PlayerCPU_recordFrameTime(&state, 15000); - PlayerCPU_recordFrameTime(&state, 16000); - PlayerCPU_recordFrameTime(&state, 17000); + CPU_recordFrameTime(&state, 15000); + CPU_recordFrameTime(&state, 16000); + CPU_recordFrameTime(&state, 17000); TEST_ASSERT_EQUAL(15000, state.frame_times[0]); TEST_ASSERT_EQUAL(16000, state.frame_times[1]); @@ -274,14 +274,14 @@ void test_recordFrameTime_stores_in_ring_buffer(void) { void test_recordFrameTime_wraps_at_buffer_size(void) { // Fill buffer - for (int i = 0; i < PLAYER_CPU_FRAME_BUFFER_SIZE; i++) { - PlayerCPU_recordFrameTime(&state, 10000 + i); + for (int i = 0; i < CPU_FRAME_BUFFER_SIZE; i++) { + CPU_recordFrameTime(&state, 10000 + i); } // Add one more - should wrap to index 0 - PlayerCPU_recordFrameTime(&state, 99999); + CPU_recordFrameTime(&state, 99999); TEST_ASSERT_EQUAL(99999, state.frame_times[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_FRAME_BUFFER_SIZE + 1, state.frame_time_index); + TEST_ASSERT_EQUAL(CPU_FRAME_BUFFER_SIZE + 1, state.frame_time_index); } /////////////////////////////// @@ -289,20 +289,20 @@ void test_recordFrameTime_wraps_at_buffer_size(void) { /////////////////////////////// void test_percentile90_empty_returns_zero(void) { - uint64_t result = PlayerCPU_percentile90(NULL, 0); + uint64_t result = CPU_percentile90(NULL, 0); TEST_ASSERT_EQUAL(0, result); } void test_percentile90_single_value(void) { uint64_t times[] = {12345}; - uint64_t result = PlayerCPU_percentile90(times, 1); + uint64_t result = CPU_percentile90(times, 1); TEST_ASSERT_EQUAL(12345, result); } void test_percentile90_ten_values(void) { // Values 1-10, 90th percentile index = (10 * 90) / 100 = 9, sorted[9] = 10 uint64_t times[] = {5, 3, 8, 1, 9, 2, 7, 4, 10, 6}; - uint64_t result = PlayerCPU_percentile90(times, 10); + uint64_t result = CPU_percentile90(times, 10); TEST_ASSERT_EQUAL(10, result); } @@ -312,7 +312,7 @@ void test_percentile90_ignores_outliers(void) { // 90% of 10 = 9, so index 9 = 1000000 // But we want the frame times to show typical load, not spikes uint64_t times[] = {10, 11, 12, 13, 14, 15, 16, 17, 18, 1000000}; - uint64_t result = PlayerCPU_percentile90(times, 10); + uint64_t result = CPU_percentile90(times, 10); // Index 9 (90%) is the outlier TEST_ASSERT_EQUAL(1000000, result); } @@ -324,19 +324,19 @@ void test_percentile90_ignores_outliers(void) { void test_predictFrequency_boost_case(void) { // At 1000MHz with 90% util, want 70% util // new_freq = 1000 * 90 / 70 = 1285 - int result = PlayerCPU_predictFrequency(1000000, 90, 70); + int result = CPU_predictFrequency(1000000, 90, 70); TEST_ASSERT_EQUAL(1285714, result); } void test_predictFrequency_reduce_case(void) { // At 1000MHz with 40% util, want 70% util // new_freq = 1000 * 40 / 70 = 571 - int result = PlayerCPU_predictFrequency(1000000, 40, 70); + int result = CPU_predictFrequency(1000000, 40, 70); TEST_ASSERT_EQUAL(571428, result); } void test_predictFrequency_zero_target_returns_current(void) { - int result = PlayerCPU_predictFrequency(1000000, 50, 0); + int result = CPU_predictFrequency(1000000, 50, 0); TEST_ASSERT_EQUAL(1000000, result); } @@ -345,15 +345,15 @@ void test_predictFrequency_zero_target_returns_current(void) { /////////////////////////////// void test_getPresetPercentage_powersave(void) { - TEST_ASSERT_EQUAL(55, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_POWERSAVE)); + TEST_ASSERT_EQUAL(55, CPU_getPresetPercentage(CPU_LEVEL_POWERSAVE)); } void test_getPresetPercentage_normal(void) { - TEST_ASSERT_EQUAL(80, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_NORMAL)); + TEST_ASSERT_EQUAL(80, CPU_getPresetPercentage(CPU_LEVEL_NORMAL)); } void test_getPresetPercentage_performance(void) { - TEST_ASSERT_EQUAL(100, PlayerCPU_getPresetPercentage(PLAYER_CPU_LEVEL_PERFORMANCE)); + TEST_ASSERT_EQUAL(100, CPU_getPresetPercentage(CPU_LEVEL_PERFORMANCE)); } /////////////////////////////// @@ -362,36 +362,36 @@ void test_getPresetPercentage_performance(void) { void test_getPerformancePercent_topology_mode(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); // At state 0 of 5 (0%) state.current_state = 0; - TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); // At state 3 of 5 (60%) state.current_state = 3; - TEST_ASSERT_EQUAL(60, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(60, CPU_getPerformancePercent(&state)); // At state 5 of 5 (100%) state.current_state = 5; - TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); } void test_getPerformancePercent_granular_mode(void) { int raw[] = {600000, 800000, 1000000, 1200000, 1400000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 5); + CPU_detectFrequencies(&state, &config, raw, 5); // At index 0 of 4 (0%) state.current_index = 0; - TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); // At index 2 of 4 (50%) state.current_index = 2; - TEST_ASSERT_EQUAL(50, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(50, CPU_getPerformancePercent(&state)); // At index 4 of 4 (100%) state.current_index = 4; - TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); } void test_getPerformancePercent_fallback_mode(void) { @@ -400,52 +400,52 @@ void test_getPerformancePercent_fallback_mode(void) { state.scaling_disabled = 0; state.current_level = 0; - TEST_ASSERT_EQUAL(0, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(0, CPU_getPerformancePercent(&state)); state.current_level = 1; - TEST_ASSERT_EQUAL(50, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(50, CPU_getPerformancePercent(&state)); state.current_level = 2; - TEST_ASSERT_EQUAL(100, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(100, CPU_getPerformancePercent(&state)); } void test_getPerformancePercent_disabled_returns_negative(void) { state.scaling_disabled = 1; state.use_topology = 0; - TEST_ASSERT_EQUAL(-1, PlayerCPU_getPerformancePercent(&state)); + TEST_ASSERT_EQUAL(-1, CPU_getPerformancePercent(&state)); } void test_getPerformancePercent_null_returns_negative(void) { - TEST_ASSERT_EQUAL(-1, PlayerCPU_getPerformancePercent(NULL)); + TEST_ASSERT_EQUAL(-1, CPU_getPerformancePercent(NULL)); } void test_getModeName_topology(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); - TEST_ASSERT_EQUAL_STRING("topology", PlayerCPU_getModeName(&state)); + CPU_buildPerfStates(&state, &config); + TEST_ASSERT_EQUAL_STRING("topology", CPU_getModeName(&state)); } void test_getModeName_granular(void) { int raw[] = {600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, raw, 3); - TEST_ASSERT_EQUAL_STRING("granular", PlayerCPU_getModeName(&state)); + CPU_detectFrequencies(&state, &config, raw, 3); + TEST_ASSERT_EQUAL_STRING("granular", CPU_getModeName(&state)); } void test_getModeName_fallback(void) { state.use_topology = 0; state.use_granular = 0; state.scaling_disabled = 0; - TEST_ASSERT_EQUAL_STRING("fallback", PlayerCPU_getModeName(&state)); + TEST_ASSERT_EQUAL_STRING("fallback", CPU_getModeName(&state)); } void test_getModeName_disabled(void) { state.scaling_disabled = 1; state.use_topology = 0; - TEST_ASSERT_EQUAL_STRING("disabled", PlayerCPU_getModeName(&state)); + TEST_ASSERT_EQUAL_STRING("disabled", CPU_getModeName(&state)); } void test_getModeName_null(void) { - TEST_ASSERT_EQUAL_STRING("disabled", PlayerCPU_getModeName(NULL)); + TEST_ASSERT_EQUAL_STRING("disabled", CPU_getModeName(NULL)); } /////////////////////////////// @@ -453,34 +453,34 @@ void test_getModeName_null(void) { /////////////////////////////// void test_update_skips_during_fast_forward(void) { - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, true, false, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, true, false, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, result.decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); } void test_update_skips_during_menu(void) { - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, true, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, true, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); } void test_update_skips_during_grace_period(void) { config.startup_grace = 300; state.startup_frames = 100; // Not yet at grace period - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); TEST_ASSERT_EQUAL(101, state.startup_frames); // Incremented } void test_update_skips_when_scaling_disabled(void) { // Simulate M17-like single-frequency device int raw[] = {1200000}; // Only one frequency (like M17) - PlayerCPU_detectFrequencies(&state, &config, raw, 1); + CPU_detectFrequencies(&state, &config, raw, 1); TEST_ASSERT_EQUAL(1, state.scaling_disabled); // Pre-condition: scaling disabled @@ -489,26 +489,26 @@ void test_update_skips_when_scaling_disabled(void) { state.frame_count = config.window_frames - 1; state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); // High utilization + CPU_recordFrameTime(&state, 15000); // High utilization } - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, result.decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); } void test_update_skips_when_no_frequencies(void) { // Edge case: no frequencies at all - PlayerCPU_detectFrequencies(&state, &config, NULL, 0); + CPU_detectFrequencies(&state, &config, NULL, 0); TEST_ASSERT_EQUAL(1, state.scaling_disabled); - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_SKIP, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); } /////////////////////////////// @@ -518,15 +518,15 @@ void test_update_skips_when_no_frequencies(void) { void test_update_panic_on_underrun_granular(void) { // Setup: granular mode, not at max int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; // Past grace state.target_index = 1; // At 600MHz state.last_underrun = 0; - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 1, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); TEST_ASSERT_EQUAL(3, state.target_index); // Boosted by panic_step_up=2 (1+2=3) TEST_ASSERT_EQUAL(8, state.panic_cooldown); } @@ -538,24 +538,24 @@ void test_update_panic_on_underrun_fallback(void) { state.target_level = 0; // At powersave state.last_underrun = 0; - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 1, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); TEST_ASSERT_EQUAL(2, state.target_level); // Boosted to max } void test_update_no_panic_when_at_max(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; state.target_index = 3; // Already at max state.last_underrun = 0; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, NULL); // Should not panic, just update underrun tracking - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(3, state.target_index); // Still at max } @@ -565,13 +565,13 @@ void test_update_no_panic_when_at_max(void) { void test_update_waits_for_full_window(void) { int freqs[] = {400000, 600000, 800000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 3); + CPU_detectFrequencies(&state, &config, freqs, 3); state.startup_frames = config.startup_grace; state.frame_count = 10; // Not yet at window_frames (30) - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(11, state.frame_count); // Incremented } @@ -581,7 +581,7 @@ void test_update_waits_for_full_window(void) { void test_update_boost_on_high_util_granular(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; state.target_index = 1; // 600MHz state.frame_count = config.window_frames - 1; @@ -590,19 +590,19 @@ void test_update_boost_on_high_util_granular(void) { // Add frame times that result in high utilization (~90%) state.frame_budget_us = 16667; // 60fps for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); // 90% of 16667 + CPU_recordFrameTime(&state, 15000); // 90% of 16667 } - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_TRUE(state.target_index > 1); // Moved up } void test_update_reduce_on_low_util_granular(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; state.target_index = 3; // 1000MHz state.frame_count = config.window_frames - 1; @@ -612,19 +612,19 @@ void test_update_reduce_on_low_util_granular(void) { // Add frame times that result in low utilization (~40%) state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); // 40% of 16667 + CPU_recordFrameTime(&state, 6667); // 40% of 16667 } - PlayerCPUResult result; - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, &result); + CPUResult result; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_TRUE(state.target_index < 3); // Moved down } void test_update_no_reduce_during_cooldown(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; state.target_index = 3; state.frame_count = config.window_frames - 1; @@ -633,13 +633,13 @@ void test_update_no_reduce_during_cooldown(void) { state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); // Low util + CPU_recordFrameTime(&state, 6667); // Low util } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); // Should NOT reduce due to cooldown - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(3, state.target_index); TEST_ASSERT_EQUAL(4, state.panic_cooldown); // Decremented } @@ -653,12 +653,12 @@ void test_update_boost_fallback_mode(void) { state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); + CPU_recordFrameTime(&state, 15000); } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_EQUAL(1, state.target_level); } @@ -671,18 +671,18 @@ void test_update_reduce_fallback_mode(void) { state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); + CPU_recordFrameTime(&state, 6667); } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_EQUAL(1, state.target_level); } void test_update_sweet_spot_resets_counters(void) { int freqs[] = {400000, 600000, 800000, 1000000}; - PlayerCPU_detectFrequencies(&state, &config, freqs, 4); + CPU_detectFrequencies(&state, &config, freqs, 4); state.startup_frames = config.startup_grace; state.target_index = 2; state.frame_count = config.window_frames - 1; @@ -692,10 +692,10 @@ void test_update_sweet_spot_resets_counters(void) { // Add frame times that result in sweet spot utilization (~70%) state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 11667); // ~70% of 16667 + CPU_recordFrameTime(&state, 11667); // ~70% of 16667 } - PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPU_update(&state, &config, false, false, 0, NULL); // Counters should be reset TEST_ASSERT_EQUAL(0, state.high_util_windows); @@ -707,9 +707,9 @@ void test_update_sweet_spot_resets_counters(void) { /////////////////////////////// void test_initTopology_zeros_topology(void) { - PlayerCPUTopology t; + CPUTopology t; memset(&t, 0xFF, sizeof(t)); // Fill with garbage - PlayerCPU_initTopology(&t); + CPU_initTopology(&t); TEST_ASSERT_EQUAL(0, t.cluster_count); TEST_ASSERT_EQUAL(0, t.state_count); @@ -718,51 +718,51 @@ void test_initTopology_zeros_topology(void) { void test_parseCPUList_single_cpu(void) { int count = 0; - int mask = PlayerCPU_parseCPUList("0", &count); + int mask = CPU_parseCPUList("0", &count); TEST_ASSERT_EQUAL(1, count); TEST_ASSERT_EQUAL(0x1, mask); // CPU 0 } void test_parseCPUList_range(void) { int count = 0; - int mask = PlayerCPU_parseCPUList("0-3", &count); + int mask = CPU_parseCPUList("0-3", &count); TEST_ASSERT_EQUAL(4, count); TEST_ASSERT_EQUAL(0xF, mask); // CPUs 0-3 } void test_parseCPUList_mixed(void) { int count = 0; - int mask = PlayerCPU_parseCPUList("0-3,7", &count); + int mask = CPU_parseCPUList("0-3,7", &count); TEST_ASSERT_EQUAL(5, count); TEST_ASSERT_EQUAL(0x8F, mask); // CPUs 0-3 and 7 } void test_parseCPUList_single_high_cpu(void) { int count = 0; - int mask = PlayerCPU_parseCPUList("7", &count); + int mask = CPU_parseCPUList("7", &count); TEST_ASSERT_EQUAL(1, count); TEST_ASSERT_EQUAL(0x80, mask); // CPU 7 } void test_parseCPUList_empty_string(void) { int count = 0; - int mask = PlayerCPU_parseCPUList("", &count); + int mask = CPU_parseCPUList("", &count); TEST_ASSERT_EQUAL(0, count); TEST_ASSERT_EQUAL(0, mask); } void test_classifyClusters_single_is_little(void) { - PlayerCPUCluster clusters[1]; + CPUCluster clusters[1]; clusters[0].max_khz = 1800000; clusters[0].cpu_count = 4; - PlayerCPU_classifyClusters(clusters, 1); + CPU_classifyClusters(clusters, 1); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); } void test_classifyClusters_dual_little_big(void) { - PlayerCPUCluster clusters[2]; + CPUCluster clusters[2]; // Sorted by max_khz ascending // Use frequencies with <10% gap to get BIG (not PRIME) classification clusters[0].max_khz = 1800000; @@ -770,14 +770,14 @@ void test_classifyClusters_dual_little_big(void) { clusters[1].max_khz = 1900000; // ~5.5% higher, should be BIG clusters[1].cpu_count = 4; - PlayerCPU_classifyClusters(clusters, 2); + CPU_classifyClusters(clusters, 2); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_BIG, clusters[1].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_BIG, clusters[1].type); } void test_classifyClusters_tri_little_big_prime(void) { - PlayerCPUCluster clusters[3]; + CPUCluster clusters[3]; // SD865-like: Silver, Gold, Prime clusters[0].max_khz = 1800000; clusters[0].cpu_count = 4; @@ -786,34 +786,34 @@ void test_classifyClusters_tri_little_big_prime(void) { clusters[2].max_khz = 2840000; clusters[2].cpu_count = 1; // Prime is single-core - PlayerCPU_classifyClusters(clusters, 3); + CPU_classifyClusters(clusters, 3); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_BIG, clusters[1].type); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_PRIME, clusters[2].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_BIG, clusters[1].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_PRIME, clusters[2].type); } void test_classifyClusters_dual_prime_by_frequency_gap(void) { - PlayerCPUCluster clusters[2]; + CPUCluster clusters[2]; // >10% frequency gap makes highest PRIME even with multiple cores clusters[0].max_khz = 1800000; clusters[0].cpu_count = 4; clusters[1].max_khz = 2200000; // >10% higher clusters[1].cpu_count = 4; - PlayerCPU_classifyClusters(clusters, 2); + CPU_classifyClusters(clusters, 2); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_LITTLE, clusters[0].type); - TEST_ASSERT_EQUAL(PLAYER_CPU_CLUSTER_PRIME, clusters[1].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_LITTLE, clusters[0].type); + TEST_ASSERT_EQUAL(CPU_CLUSTER_PRIME, clusters[1].type); } void test_pickRepresentativeFreqs_single_freq(void) { - PlayerCPUCluster c; + CPUCluster c; c.frequencies[0] = 1800000; c.freq_count = 1; int low, mid, high; - PlayerCPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + CPU_pickRepresentativeFreqs(&c, &low, &mid, &high); TEST_ASSERT_EQUAL(1800000, low); TEST_ASSERT_EQUAL(1800000, mid); @@ -821,7 +821,7 @@ void test_pickRepresentativeFreqs_single_freq(void) { } void test_pickRepresentativeFreqs_multiple_freqs(void) { - PlayerCPUCluster c; + CPUCluster c; c.frequencies[0] = 400000; c.frequencies[1] = 800000; c.frequencies[2] = 1200000; @@ -830,7 +830,7 @@ void test_pickRepresentativeFreqs_multiple_freqs(void) { c.freq_count = 5; int low, mid, high; - PlayerCPU_pickRepresentativeFreqs(&c, &low, &mid, &high); + CPU_pickRepresentativeFreqs(&c, &low, &mid, &high); TEST_ASSERT_EQUAL(400000, low); TEST_ASSERT_EQUAL(1200000, mid); // freqs[5/2] = freqs[2] @@ -838,7 +838,7 @@ void test_pickRepresentativeFreqs_multiple_freqs(void) { } // Helper to set up a dual-cluster topology -static void setup_dual_cluster_topology(PlayerCPUState* s) { +static void setup_dual_cluster_topology(CPUState* s) { s->topology.cluster_count = 2; s->topology.topology_detected = 1; // Mark as detected so buildPerfStates works @@ -852,7 +852,7 @@ static void setup_dual_cluster_topology(PlayerCPUState* s) { s->topology.clusters[0].freq_count = 3; s->topology.clusters[0].min_khz = 600000; s->topology.clusters[0].max_khz = 1800000; - s->topology.clusters[0].type = PLAYER_CPU_CLUSTER_LITTLE; + s->topology.clusters[0].type = CPU_CLUSTER_LITTLE; // BIG cluster (policy 4, CPUs 4-7) s->topology.clusters[1].policy_id = 4; @@ -864,13 +864,13 @@ static void setup_dual_cluster_topology(PlayerCPUState* s) { s->topology.clusters[1].freq_count = 3; s->topology.clusters[1].min_khz = 800000; s->topology.clusters[1].max_khz = 2400000; - s->topology.clusters[1].type = PLAYER_CPU_CLUSTER_BIG; + s->topology.clusters[1].type = CPU_CLUSTER_BIG; } void test_buildPerfStates_dual_cluster_creates_six_states(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); TEST_ASSERT_EQUAL(6, state.topology.state_count); TEST_ASSERT_EQUAL(1, state.use_topology); @@ -878,36 +878,36 @@ void test_buildPerfStates_dual_cluster_creates_six_states(void) { void test_buildPerfStates_dual_cluster_state_progression(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); // State 0: LITTLE powersave, BIG powersave, affinity = LITTLE - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[1]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[0].cluster_governor[1]); TEST_ASSERT_EQUAL(0, state.topology.states[0].active_cluster_idx); TEST_ASSERT_EQUAL(0x0F, state.topology.states[0].cpu_affinity_mask); // LITTLE CPUs // State 1: LITTLE schedutil, BIG powersave - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_SCHEDUTIL, state.topology.states[1].cluster_governor[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[1].cluster_governor[1]); + TEST_ASSERT_EQUAL(CPU_GOV_SCHEDUTIL, state.topology.states[1].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[1].cluster_governor[1]); // State 2: LITTLE performance, BIG powersave - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_PERFORMANCE, state.topology.states[2].cluster_governor[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[2].cluster_governor[1]); + TEST_ASSERT_EQUAL(CPU_GOV_PERFORMANCE, state.topology.states[2].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[2].cluster_governor[1]); // State 3: BIG powersave, LITTLE powersave, affinity = BIG - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[0]); - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[1]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[0]); + TEST_ASSERT_EQUAL(CPU_GOV_POWERSAVE, state.topology.states[3].cluster_governor[1]); TEST_ASSERT_EQUAL(1, state.topology.states[3].active_cluster_idx); TEST_ASSERT_EQUAL(0xF0, state.topology.states[3].cpu_affinity_mask); // BIG CPUs // State 5: BIG performance (highest state) - TEST_ASSERT_EQUAL(PLAYER_CPU_GOV_PERFORMANCE, state.topology.states[5].cluster_governor[1]); + TEST_ASSERT_EQUAL(CPU_GOV_PERFORMANCE, state.topology.states[5].cluster_governor[1]); } void test_buildPerfStates_single_cluster_skips_topology(void) { state.topology.cluster_count = 1; - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); TEST_ASSERT_EQUAL(0, state.topology.state_count); TEST_ASSERT_EQUAL(0, state.use_topology); @@ -915,12 +915,12 @@ void test_buildPerfStates_single_cluster_skips_topology(void) { void test_applyPerfState_calls_governors(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.target_state = 0; state.current_state = -1; - int result = PlayerCPU_applyPerfState(&state); + int result = CPU_applyPerfState(&state); TEST_ASSERT_EQUAL(0, result); // Should call governor for each cluster (2 clusters = 2 calls) @@ -929,13 +929,13 @@ void test_applyPerfState_calls_governors(void) { void test_applyPerfState_does_not_set_affinity_directly(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.target_state = 0; state.current_state = -1; state.pending_affinity = 0; - PlayerCPU_applyPerfState(&state); + CPU_applyPerfState(&state); // applyPerfState should NOT set pending_affinity or call PWR_setThreadAffinity // The caller is responsible for setting pending_affinity under mutex @@ -945,19 +945,19 @@ void test_applyPerfState_does_not_set_affinity_directly(void) { void test_applyPerfState_updates_current_state(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.target_state = 3; state.current_state = -1; - PlayerCPU_applyPerfState(&state); + CPU_applyPerfState(&state); TEST_ASSERT_EQUAL(3, state.current_state); } void test_update_topology_boost_increments_state(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.startup_frames = config.startup_grace; state.target_state = 2; @@ -968,18 +968,18 @@ void test_update_topology_boost_increments_state(void) { // High utilization frames (>85%) state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); // ~90% + CPU_recordFrameTime(&state, 15000); // ~90% } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_BOOST, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_EQUAL(3, state.target_state); } void test_update_topology_reduce_decrements_state(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.startup_frames = config.startup_grace; state.target_state = 4; @@ -990,18 +990,18 @@ void test_update_topology_reduce_decrements_state(void) { // Low utilization frames (<55%) state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); // ~40% + CPU_recordFrameTime(&state, 6667); // ~40% } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_LESS_THAN(4, state.target_state); } void test_update_topology_panic_jumps_states(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.startup_frames = config.startup_grace; state.target_state = 1; @@ -1009,15 +1009,15 @@ void test_update_topology_panic_jumps_states(void) { state.last_underrun = 0; // Underrun detected - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 1, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); TEST_ASSERT_GREATER_THAN(1, state.target_state); } void test_update_topology_no_boost_at_max_state(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.startup_frames = config.startup_grace; state.target_state = 5; // Already at max @@ -1028,18 +1028,18 @@ void test_update_topology_no_boost_at_max_state(void) { // High utilization frames state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 15000); + CPU_recordFrameTime(&state, 15000); } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(5, state.target_state); } void test_update_topology_no_reduce_at_min_state(void) { setup_dual_cluster_topology(&state); - PlayerCPU_buildPerfStates(&state, &config); + CPU_buildPerfStates(&state, &config); state.startup_frames = config.startup_grace; state.target_state = 0; // Already at min @@ -1050,12 +1050,12 @@ void test_update_topology_no_reduce_at_min_state(void) { // Low utilization frames state.frame_budget_us = 16667; for (int i = 0; i < 30; i++) { - PlayerCPU_recordFrameTime(&state, 6667); + CPU_recordFrameTime(&state, 6667); } - PlayerCPUDecision decision = PlayerCPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); - TEST_ASSERT_EQUAL(PLAYER_CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(0, state.target_state); } diff --git a/tests/unit/all/player/test_player_utils.c b/tests/unit/all/player/test_player_utils.c index f624ac7b..97624e8c 100644 --- a/tests/unit/all/player/test_player_utils.c +++ b/tests/unit/all/player/test_player_utils.c @@ -10,7 +10,7 @@ * - PlayerUtils_escapeSingleQuotes - Shell quote escaping * * For option-related tests, see test_player_options.c - * For CPU frequency tests, see test_player_cpu.c + * For CPU frequency tests, see test_cpu.c (in common/) */ #include "unity.h" diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 95eb026e..e0207db2 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -3335,8 +3335,8 @@ int PWR_setCPUFrequency_sysfs(int freq_khz) { // Multi-cluster CPU topology support /////////////////////////////// -// Include player_cpu.h for topology types -#include "../player/player_cpu.h" +// Include cpu.h for topology types +#include "cpu.h" // Base path for cpufreq policies #define CPUFREQ_BASE_PATH "/sys/devices/system/cpu/cpufreq" @@ -3345,43 +3345,11 @@ int PWR_setCPUFrequency_sysfs(int freq_khz) { * Comparison function for sorting clusters by max_khz ascending. */ static int compare_cluster_by_max_khz(const void* a, const void* b) { - const PlayerCPUCluster* ca = (const PlayerCPUCluster*)a; - const PlayerCPUCluster* cb = (const PlayerCPUCluster*)b; + const CPUCluster* ca = (const CPUCluster*)a; + const CPUCluster* cb = (const CPUCluster*)b; return ca->max_khz - cb->max_khz; } -/** - * Classifies clusters based on their relative performance. - * Static helper for PWR_detectCPUTopology(). - * (Public version available in player_cpu.c for testing) - */ -static void classify_clusters(PlayerCPUCluster* clusters, int count) { - if (!clusters || count <= 0) - return; - - for (int i = 0; i < count; i++) { - PlayerCPUCluster* cluster = &clusters[i]; - - if (i == 0) { - cluster->type = PLAYER_CPU_CLUSTER_LITTLE; - } else if (i == count - 1) { - int prev_max = clusters[i - 1].max_khz; - int freq_gap_percent = 0; - if (prev_max > 0) { - freq_gap_percent = ((cluster->max_khz - prev_max) * 100) / prev_max; - } - - if (cluster->cpu_count == 1 || freq_gap_percent > 10) { - cluster->type = PLAYER_CPU_CLUSTER_PRIME; - } else { - cluster->type = PLAYER_CPU_CLUSTER_BIG; - } - } else { - cluster->type = PLAYER_CPU_CLUSTER_BIG; - } - } -} - /** * Reads an integer from a sysfs file. * @@ -3408,7 +3376,7 @@ static int read_sysfs_int(const char* path) { * @param cluster Cluster to populate * @return Number of frequencies read */ -static int read_cluster_frequencies(const char* path, PlayerCPUCluster* cluster) { +static int read_cluster_frequencies(const char* path, CPUCluster* cluster) { FILE* fp = fopen(path, "r"); if (!fp) return 0; @@ -3418,7 +3386,7 @@ static int read_cluster_frequencies(const char* path, PlayerCPUCluster* cluster) if (fgets(buffer, sizeof(buffer), fp) != NULL) { char* token = strtok(buffer, " \t\n"); - while (token != NULL && count < PLAYER_CPU_MAX_FREQS_PER_CLUSTER) { + while (token != NULL && count < CPU_MAX_FREQS_PER_CLUSTER) { int freq = atoi(token); if (freq > 0) { cluster->frequencies[count++] = freq; @@ -3494,7 +3462,7 @@ static int parse_related_cpus(const char* path, int* cpu_mask, int* cpu_count) { return (*cpu_count > 0) ? 1 : 0; } -int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { +int PWR_detectCPUTopology(struct CPUTopology* topology) { if (!topology) { return 0; } @@ -3507,8 +3475,7 @@ int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { char path[256]; int cluster_count = 0; - for (int policy_id = 0; policy_id < 16 && cluster_count < PLAYER_CPU_MAX_CLUSTERS; - policy_id++) { + for (int policy_id = 0; policy_id < 16 && cluster_count < CPU_MAX_CLUSTERS; policy_id++) { (void)snprintf(path, sizeof(path), "%s/policy%d", CPUFREQ_BASE_PATH, policy_id); // Check if policy directory exists by trying to read cpuinfo_max_freq @@ -3519,7 +3486,7 @@ int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { continue; // Policy doesn't exist } - PlayerCPUCluster* cluster = &topology->clusters[cluster_count]; + CPUCluster* cluster = &topology->clusters[cluster_count]; cluster->policy_id = policy_id; cluster->max_khz = max_khz; @@ -3564,17 +3531,16 @@ int PWR_detectCPUTopology(struct PlayerCPUTopology* topology) { // Sort clusters by max_khz ascending (LITTLE → BIG → PRIME) if (cluster_count > 1) { - qsort(topology->clusters, cluster_count, sizeof(PlayerCPUCluster), - compare_cluster_by_max_khz); + qsort(topology->clusters, cluster_count, sizeof(CPUCluster), compare_cluster_by_max_khz); } // Classify clusters (LITTLE/BIG/PRIME) - classify_clusters(topology->clusters, cluster_count); + CPU_classifyClusters(topology->clusters, cluster_count); // Log classification results const char* type_names[] = {"LITTLE", "BIG", "PRIME"}; for (int i = 0; i < cluster_count; i++) { - PlayerCPUCluster* cluster = &topology->clusters[i]; + CPUCluster* cluster = &topology->clusters[i]; LOG_info("PWR_detectCPUTopology: cluster %d (policy%d): %s, %d CPUs, %d-%d kHz\n", i, cluster->policy_id, type_names[cluster->type], cluster->cpu_count, cluster->min_khz, cluster->max_khz); diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index 5c2f3072..a831cbbc 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -1525,8 +1525,8 @@ int PWR_setCPUFrequency_sysfs(int freq_khz); // Multi-cluster CPU topology support /////////////////////////////// -// Forward declarations from player_cpu.h (avoid circular include) -struct PlayerCPUTopology; +// Forward declarations from cpu.h (avoid circular include) +struct CPUTopology; /** * Detects CPU topology from sysfs. @@ -1541,7 +1541,7 @@ struct PlayerCPUTopology; * @param topology Output structure to populate * @return Number of clusters found (0 on failure, 1 for single-cluster) */ -int PWR_detectCPUTopology(struct PlayerCPUTopology* topology); +int PWR_detectCPUTopology(struct CPUTopology* topology); /** * Sets frequency bounds for a CPU cluster. diff --git a/workspace/all/common/build.mk b/workspace/all/common/build.mk index 5a29dc99..5e9774fd 100644 --- a/workspace/all/common/build.mk +++ b/workspace/all/common/build.mk @@ -67,7 +67,7 @@ include $(COMMON_DIR)/cflags.mk ########################################################### # Paths and sources -INCDIR = -I. -I$(COMMON_DIR)/ -I$(PLATFORM_DIR)/ -I$(PLATFORM_DEPTH)all/player/libretro-common/include -isystem $(PLATFORM_DEPTH)all/vendor/stb $(EXTRA_INCDIR) +INCDIR = -I. -I$(COMMON_DIR)/ -I$(PLATFORM_DIR)/ -I$(PLATFORM_DEPTH)all/player/ -I$(PLATFORM_DEPTH)all/player/libretro-common/include -isystem $(PLATFORM_DEPTH)all/vendor/stb $(EXTRA_INCDIR) COMMON_SOURCE = \ $(COMMON_DIR)/utils.c \ @@ -81,6 +81,7 @@ COMMON_SOURCE = \ $(COMMON_DIR)/scaler.c \ $(COMMON_DIR)/platform_variant.c \ $(COMMON_DIR)/paths.c \ + $(COMMON_DIR)/cpu.c \ $(PLATFORM_DIR)/platform.c # Add shared rendering modules diff --git a/workspace/all/player/player_cpu.c b/workspace/all/common/cpu.c similarity index 72% rename from workspace/all/player/player_cpu.c rename to workspace/all/common/cpu.c index c3902de3..e9683e85 100644 --- a/workspace/all/player/player_cpu.c +++ b/workspace/all/common/cpu.c @@ -1,11 +1,11 @@ /** - * player_cpu.c - Auto CPU scaling utilities + * cpu.c - CPU scaling and topology utilities * - * Implements dynamic CPU frequency scaling based on emulation performance. - * Extracted from player.c for testability. + * Implements CPU topology detection and dynamic frequency scaling. + * Used by both launcher (topology detection) and player (autoscaling). * - * The algorithm uses frame execution time (90th percentile) to determine - * CPU utilization, then adjusts frequency to maintain target utilization. + * The autoscaling algorithm uses frame execution time (90th percentile) to + * determine CPU utilization, then adjusts frequency to maintain target. * * Key concepts: * - Performance scales linearly with frequency @@ -14,7 +14,7 @@ * - Panic path on audio underrun with cooldown */ -#include "player_cpu.h" +#include "cpu.h" #include #include @@ -37,26 +37,26 @@ static int compare_uint64(const void* a, const void* b) { // Public Functions /////////////////////////////// -void PlayerCPU_initConfig(PlayerCPUConfig* config) { - config->window_frames = PLAYER_CPU_DEFAULT_WINDOW_FRAMES; - config->util_high = PLAYER_CPU_DEFAULT_UTIL_HIGH; - config->util_low = PLAYER_CPU_DEFAULT_UTIL_LOW; - config->boost_windows = PLAYER_CPU_DEFAULT_BOOST_WINDOWS; - config->reduce_windows = PLAYER_CPU_DEFAULT_REDUCE_WINDOWS; - config->startup_grace = PLAYER_CPU_DEFAULT_STARTUP_GRACE; - config->min_freq_khz = PLAYER_CPU_DEFAULT_MIN_FREQ_KHZ; - config->target_util = PLAYER_CPU_DEFAULT_TARGET_UTIL; - config->max_step_down = PLAYER_CPU_DEFAULT_MAX_STEP_DOWN; - config->panic_step_up = PLAYER_CPU_DEFAULT_PANIC_STEP_UP; +void CPU_initConfig(CPUConfig* config) { + config->window_frames = CPU_DEFAULT_WINDOW_FRAMES; + config->util_high = CPU_DEFAULT_UTIL_HIGH; + config->util_low = CPU_DEFAULT_UTIL_LOW; + config->boost_windows = CPU_DEFAULT_BOOST_WINDOWS; + config->reduce_windows = CPU_DEFAULT_REDUCE_WINDOWS; + config->startup_grace = CPU_DEFAULT_STARTUP_GRACE; + config->min_freq_khz = CPU_DEFAULT_MIN_FREQ_KHZ; + config->target_util = CPU_DEFAULT_TARGET_UTIL; + config->max_step_down = CPU_DEFAULT_MAX_STEP_DOWN; + config->panic_step_up = CPU_DEFAULT_PANIC_STEP_UP; } -void PlayerCPU_initState(PlayerCPUState* state) { - memset(state, 0, sizeof(PlayerCPUState)); +void CPU_initState(CPUState* state) { + memset(state, 0, sizeof(CPUState)); // Set sensible defaults state->frame_budget_us = 16667; // 60fps default } -int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz) { +int CPU_findNearestIndex(const int* frequencies, int count, int target_khz) { if (count <= 0) return 0; @@ -73,11 +73,11 @@ int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz return best_idx; } -void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* config, - const int* raw_frequencies, int raw_count) { +void CPU_detectFrequencies(CPUState* state, const CPUConfig* config, const int* raw_frequencies, + int raw_count) { // Filter frequencies below minimum threshold state->freq_count = 0; - for (int i = 0; i < raw_count && state->freq_count < PLAYER_CPU_MAX_FREQUENCIES; i++) { + for (int i = 0; i < raw_count && state->freq_count < CPU_MAX_FREQUENCIES; i++) { if (raw_frequencies[i] >= config->min_freq_khz) { state->frequencies[state->freq_count++] = raw_frequencies[i]; } @@ -99,22 +99,21 @@ void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* c // POWERSAVE: 55% of max int ps_target = max_freq * 55 / 100; - state->preset_indices[PLAYER_CPU_LEVEL_POWERSAVE] = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, ps_target); + state->preset_indices[CPU_LEVEL_POWERSAVE] = + CPU_findNearestIndex(state->frequencies, state->freq_count, ps_target); // NORMAL: 80% of max int normal_target = max_freq * 80 / 100; - state->preset_indices[PLAYER_CPU_LEVEL_NORMAL] = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, normal_target); + state->preset_indices[CPU_LEVEL_NORMAL] = + CPU_findNearestIndex(state->frequencies, state->freq_count, normal_target); // PERFORMANCE: max frequency - state->preset_indices[PLAYER_CPU_LEVEL_PERFORMANCE] = state->freq_count - 1; + state->preset_indices[CPU_LEVEL_PERFORMANCE] = state->freq_count - 1; state->frequencies_detected = 1; } -void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, double fps, - unsigned current_underruns) { +void CPU_reset(CPUState* state, const CPUConfig* config, double fps, unsigned current_underruns) { (void)config; // May be used in future for configurable grace period state->frame_count = 0; @@ -136,21 +135,21 @@ void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, doubl memset(state->frame_times, 0, sizeof(state->frame_times)); } -void PlayerCPU_recordFrameTime(PlayerCPUState* state, uint64_t frame_time_us) { - state->frame_times[state->frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time_us; +void CPU_recordFrameTime(CPUState* state, uint64_t frame_time_us) { + state->frame_times[state->frame_time_index % CPU_FRAME_BUFFER_SIZE] = frame_time_us; state->frame_time_index++; } -uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count) { +uint64_t CPU_percentile90(const uint64_t* frame_times, int count) { if (count <= 0) return 0; // Limit to buffer size - if (count > PLAYER_CPU_FRAME_BUFFER_SIZE) - count = PLAYER_CPU_FRAME_BUFFER_SIZE; + if (count > CPU_FRAME_BUFFER_SIZE) + count = CPU_FRAME_BUFFER_SIZE; // Copy and sort - uint64_t sorted[PLAYER_CPU_FRAME_BUFFER_SIZE]; + uint64_t sorted[CPU_FRAME_BUFFER_SIZE]; memcpy(sorted, frame_times, count * sizeof(uint64_t)); qsort(sorted, count, sizeof(uint64_t), compare_uint64); @@ -162,7 +161,7 @@ uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count) { return sorted[p90_idx]; } -int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_util) { +int CPU_predictFrequency(int current_freq, int current_util, int target_util) { if (target_util <= 0) return current_freq; @@ -170,19 +169,19 @@ int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_ut return current_freq * current_util / target_util; } -int PlayerCPU_getPresetPercentage(PlayerCPULevel level) { +int CPU_getPresetPercentage(CPULevel level) { switch (level) { - case PLAYER_CPU_LEVEL_POWERSAVE: + case CPU_LEVEL_POWERSAVE: return 55; - case PLAYER_CPU_LEVEL_NORMAL: + case CPU_LEVEL_NORMAL: return 80; - case PLAYER_CPU_LEVEL_PERFORMANCE: + case CPU_LEVEL_PERFORMANCE: default: return 100; } } -int PlayerCPU_getPerformancePercent(const PlayerCPUState* state) { +int CPU_getPerformancePercent(const CPUState* state) { if (!state) return -1; @@ -211,7 +210,7 @@ int PlayerCPU_getPerformancePercent(const PlayerCPUState* state) { } } -const char* PlayerCPU_getModeName(const PlayerCPUState* state) { +const char* CPU_getModeName(const CPUState* state) { if (!state) return "disabled"; @@ -228,12 +227,11 @@ const char* PlayerCPU_getModeName(const PlayerCPUState* state) { } } -PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* config, - bool fast_forward, bool show_menu, unsigned current_underruns, - PlayerCPUResult* result) { +CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, + unsigned current_underruns, CPUResult* result) { // Initialize result if provided if (result) { - result->decision = PLAYER_CPU_DECISION_NONE; + result->decision = CPU_DECISION_NONE; result->new_index = state->target_index; result->new_level = state->target_level; result->utilization = 0; @@ -243,23 +241,23 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* // Skip if scaling is disabled (0 or 1 frequency available) AND not using topology mode if (state->scaling_disabled && !state->use_topology) { if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; } // Skip during special states if (fast_forward || show_menu) { if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; } // Startup grace period if (state->startup_frames < config->startup_grace) { state->startup_frames++; if (result) - result->decision = PLAYER_CPU_DECISION_SKIP; - return PLAYER_CPU_DECISION_SKIP; + result->decision = CPU_DECISION_SKIP; + return CPU_DECISION_SKIP; } // Get current indices based on mode @@ -292,7 +290,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* new_state = max_state; state->target_state = new_state; if (result) { - result->decision = PLAYER_CPU_DECISION_PANIC; + result->decision = CPU_DECISION_PANIC; result->new_index = new_state; // Use new_index for state index } } else if (state->use_granular) { @@ -301,7 +299,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* new_idx = max_idx; state->target_index = new_idx; if (result) { - result->decision = PLAYER_CPU_DECISION_PANIC; + result->decision = CPU_DECISION_PANIC; result->new_index = new_idx; } } else { @@ -310,7 +308,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* new_level = 2; state->target_level = new_level; if (result) { - result->decision = PLAYER_CPU_DECISION_PANIC; + result->decision = CPU_DECISION_PANIC; result->new_level = new_level; } } @@ -320,7 +318,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* state->panic_cooldown = 8; // ~4 seconds before allowing reduction state->last_underrun = 0; // Reset after handling - return PLAYER_CPU_DECISION_PANIC; + return CPU_DECISION_PANIC; } // Update underrun tracking (even if at max) @@ -333,21 +331,21 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* // Check if window is complete if (state->frame_count < config->window_frames) { - return PLAYER_CPU_DECISION_NONE; + return CPU_DECISION_NONE; } // Calculate 90th percentile frame time int samples = state->frame_time_index; - if (samples > PLAYER_CPU_FRAME_BUFFER_SIZE) - samples = PLAYER_CPU_FRAME_BUFFER_SIZE; + if (samples > CPU_FRAME_BUFFER_SIZE) + samples = CPU_FRAME_BUFFER_SIZE; if (samples < 5) { // Not enough samples - reset and wait state->frame_count = 0; - return PLAYER_CPU_DECISION_NONE; + return CPU_DECISION_NONE; } - uint64_t p90_time = PlayerCPU_percentile90(state->frame_times, samples); + uint64_t p90_time = CPU_percentile90(state->frame_times, samples); // Calculate utilization as percentage of frame budget unsigned util = 0; @@ -362,7 +360,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* result->p90_time = p90_time; } - PlayerCPUDecision decision = PLAYER_CPU_DECISION_NONE; + CPUDecision decision = CPU_DECISION_NONE; if (state->use_topology) { // Topology mode: multi-cluster PerfState scaling @@ -385,10 +383,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* state->target_state = new_state; state->high_util_windows = 0; - decision = PLAYER_CPU_DECISION_BOOST; + decision = CPU_DECISION_BOOST; if (result) { - result->decision = PLAYER_CPU_DECISION_BOOST; + result->decision = CPU_DECISION_BOOST; result->new_index = new_state; } } @@ -409,10 +407,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* state->target_state = new_state; state->low_util_windows = 0; - decision = PLAYER_CPU_DECISION_REDUCE; + decision = CPU_DECISION_REDUCE; if (result) { - result->decision = PLAYER_CPU_DECISION_REDUCE; + result->decision = CPU_DECISION_REDUCE; result->new_index = new_state; } } @@ -437,10 +435,9 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* if (state->high_util_windows >= config->boost_windows && current_idx < max_idx) { // Predict optimal frequency using linear scaling - int needed_freq = - PlayerCPU_predictFrequency(current_freq, util, config->target_util); + int needed_freq = CPU_predictFrequency(current_freq, util, config->target_util); int new_idx = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); + CPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); // Ensure we actually go higher if (new_idx <= current_idx) @@ -450,10 +447,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* state->target_index = new_idx; state->high_util_windows = 0; - decision = PLAYER_CPU_DECISION_BOOST; + decision = CPU_DECISION_BOOST; if (result) { - result->decision = PLAYER_CPU_DECISION_BOOST; + result->decision = CPU_DECISION_BOOST; result->new_index = new_idx; } } @@ -468,10 +465,9 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* if (reduce_ok) { // Predict lower frequency - int needed_freq = - PlayerCPU_predictFrequency(current_freq, util, config->target_util); + int needed_freq = CPU_predictFrequency(current_freq, util, config->target_util); int new_idx = - PlayerCPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); + CPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); // Ensure we actually go lower if (new_idx >= current_idx) @@ -486,10 +482,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* state->target_index = new_idx; state->low_util_windows = 0; - decision = PLAYER_CPU_DECISION_REDUCE; + decision = CPU_DECISION_REDUCE; if (result) { - result->decision = PLAYER_CPU_DECISION_REDUCE; + result->decision = CPU_DECISION_REDUCE; result->new_index = new_idx; } } @@ -516,10 +512,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* int new_level = current_level + 1; state->target_level = new_level; state->high_util_windows = 0; - decision = PLAYER_CPU_DECISION_BOOST; + decision = CPU_DECISION_BOOST; if (result) { - result->decision = PLAYER_CPU_DECISION_BOOST; + result->decision = CPU_DECISION_BOOST; result->new_level = new_level; } } @@ -529,10 +525,10 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* int new_level = current_level - 1; state->target_level = new_level; state->low_util_windows = 0; - decision = PLAYER_CPU_DECISION_REDUCE; + decision = CPU_DECISION_REDUCE; if (result) { - result->decision = PLAYER_CPU_DECISION_REDUCE; + result->decision = CPU_DECISION_REDUCE; result->new_level = new_level; } } @@ -555,24 +551,24 @@ extern int PWR_setThreadAffinity(int cpu_mask); /** * Returns the governor string for a given governor type. */ -static const char* governor_name(PlayerCPUGovernor gov) { +static const char* governor_name(CPUGovernor gov) { switch (gov) { - case PLAYER_CPU_GOV_POWERSAVE: + case CPU_GOV_POWERSAVE: return "powersave"; - case PLAYER_CPU_GOV_SCHEDUTIL: + case CPU_GOV_SCHEDUTIL: return "schedutil"; - case PLAYER_CPU_GOV_PERFORMANCE: + case CPU_GOV_PERFORMANCE: return "performance"; default: return "schedutil"; } } -void PlayerCPU_initTopology(PlayerCPUTopology* topology) { - memset(topology, 0, sizeof(PlayerCPUTopology)); +void CPU_initTopology(CPUTopology* topology) { + memset(topology, 0, sizeof(CPUTopology)); } -int PlayerCPU_parseCPUList(const char* str, int* cpu_count) { +int CPU_parseCPUList(const char* str, int* cpu_count) { if (!str || !cpu_count) { if (cpu_count) *cpu_count = 0; @@ -620,16 +616,16 @@ int PlayerCPU_parseCPUList(const char* str, int* cpu_count) { return mask; } -void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count) { +void CPU_classifyClusters(CPUCluster* clusters, int count) { if (!clusters || count <= 0) return; for (int i = 0; i < count; i++) { - PlayerCPUCluster* cluster = &clusters[i]; + CPUCluster* cluster = &clusters[i]; if (i == 0) { // First cluster (lowest max_khz) is always LITTLE - cluster->type = PLAYER_CPU_CLUSTER_LITTLE; + cluster->type = CPU_CLUSTER_LITTLE; } else if (i == count - 1) { // Last cluster might be PRIME if single CPU or significantly faster int prev_max = clusters[i - 1].max_khz; @@ -639,19 +635,19 @@ void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count) { } if (cluster->cpu_count == 1 || freq_gap_percent > 10) { - cluster->type = PLAYER_CPU_CLUSTER_PRIME; + cluster->type = CPU_CLUSTER_PRIME; } else { - cluster->type = PLAYER_CPU_CLUSTER_BIG; + cluster->type = CPU_CLUSTER_BIG; } } else { // Middle clusters are BIG - cluster->type = PLAYER_CPU_CLUSTER_BIG; + cluster->type = CPU_CLUSTER_BIG; } } } -void PlayerCPU_pickRepresentativeFreqs(const PlayerCPUCluster* cluster, int* low_khz, int* mid_khz, - int* high_khz) { +void CPU_pickRepresentativeFreqs(const CPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz) { if (!cluster || cluster->freq_count <= 0) { if (low_khz) *low_khz = 0; @@ -688,44 +684,44 @@ void PlayerCPU_pickRepresentativeFreqs(const PlayerCPUCluster* cluster, int* low * @param clusters Array of cluster info * @param governor_level 0=powersave, 1=schedutil, 2=performance for active cluster */ -static void build_perf_state(PlayerCPUPerfState* state, int cluster_count, int active_cluster_idx, - const PlayerCPUCluster* clusters, int governor_level) { +static void build_perf_state(CPUPerfState* state, int cluster_count, int active_cluster_idx, + const CPUCluster* clusters, int governor_level) { memset(state, 0, sizeof(*state)); state->active_cluster_idx = active_cluster_idx; state->cpu_affinity_mask = 0; // Set governors for all clusters - for (int i = 0; i < cluster_count && i < PLAYER_CPU_MAX_CLUSTERS; i++) { - const PlayerCPUCluster* cluster = &clusters[i]; + for (int i = 0; i < cluster_count && i < CPU_MAX_CLUSTERS; i++) { + const CPUCluster* cluster = &clusters[i]; if (i == active_cluster_idx) { // Active cluster: use the specified governor level switch (governor_level) { case 0: - state->cluster_governor[i] = PLAYER_CPU_GOV_POWERSAVE; + state->cluster_governor[i] = CPU_GOV_POWERSAVE; break; case 1: - state->cluster_governor[i] = PLAYER_CPU_GOV_SCHEDUTIL; + state->cluster_governor[i] = CPU_GOV_SCHEDUTIL; break; case 2: default: - state->cluster_governor[i] = PLAYER_CPU_GOV_PERFORMANCE; + state->cluster_governor[i] = CPU_GOV_PERFORMANCE; break; } // Add active cluster to affinity state->cpu_affinity_mask |= cluster->cpu_mask; } else { // Inactive clusters: powersave (let them idle/sleep) - state->cluster_governor[i] = PLAYER_CPU_GOV_POWERSAVE; + state->cluster_governor[i] = CPU_GOV_POWERSAVE; } } } -void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* config) { +void CPU_buildPerfStates(CPUState* state, const CPUConfig* config) { (void)config; // Reserved for future configuration - PlayerCPUTopology* topo = &state->topology; + CPUTopology* topo = &state->topology; if (!topo->topology_detected || topo->cluster_count <= 1) { // Single-cluster or no topology: don't use PerfState mode @@ -750,20 +746,19 @@ void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* con // // Tri-cluster adds 3 more states for PRIME (6-8) - for (int cluster_idx = 0; cluster_idx < cluster_count && state_idx < PLAYER_CPU_MAX_PERF_STATES; + for (int cluster_idx = 0; cluster_idx < cluster_count && state_idx < CPU_MAX_PERF_STATES; cluster_idx++) { // 3 governor levels per cluster - for (int gov_level = 0; gov_level < 3 && state_idx < PLAYER_CPU_MAX_PERF_STATES; - gov_level++) { - PlayerCPUPerfState* ps = &topo->states[state_idx]; + for (int gov_level = 0; gov_level < 3 && state_idx < CPU_MAX_PERF_STATES; gov_level++) { + CPUPerfState* ps = &topo->states[state_idx]; build_perf_state(ps, cluster_count, cluster_idx, topo->clusters, gov_level); // For PRIME cluster, include BIG in affinity (allow scheduler some flexibility) if (cluster_idx == cluster_count - 1 && cluster_count >= 3 && - topo->clusters[cluster_idx].type == PLAYER_CPU_CLUSTER_PRIME) { + topo->clusters[cluster_idx].type == CPU_CLUSTER_PRIME) { // Add BIG cluster(s) to affinity for (int i = 1; i < cluster_idx; i++) { - if (topo->clusters[i].type == PLAYER_CPU_CLUSTER_BIG) { + if (topo->clusters[i].type == CPU_CLUSTER_BIG) { ps->cpu_affinity_mask |= topo->clusters[i].cpu_mask; } } @@ -779,8 +774,8 @@ void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* con state->current_state = -1; // Not yet applied } -int PlayerCPU_applyPerfState(PlayerCPUState* state) { - PlayerCPUTopology* topo = &state->topology; +int CPU_applyPerfState(CPUState* state) { + CPUTopology* topo = &state->topology; if (!state->use_topology || topo->state_count <= 0) { return -1; @@ -792,7 +787,7 @@ int PlayerCPU_applyPerfState(PlayerCPUState* state) { if (target >= topo->state_count) target = topo->state_count - 1; - PlayerCPUPerfState* ps = &topo->states[target]; + CPUPerfState* ps = &topo->states[target]; int result = 0; // Apply governors to each cluster diff --git a/workspace/all/player/player_cpu.h b/workspace/all/common/cpu.h similarity index 68% rename from workspace/all/player/player_cpu.h rename to workspace/all/common/cpu.h index 8d7d113c..eb16298a 100644 --- a/workspace/all/player/player_cpu.h +++ b/workspace/all/common/cpu.h @@ -1,11 +1,11 @@ /** - * player_cpu.h - Auto CPU scaling utilities + * cpu.h - CPU scaling and topology utilities * - * Provides functions for dynamic CPU frequency scaling based on emulation - * performance. Uses frame timing (core.run() execution time) to determine - * optimal CPU frequency. + * Provides types and functions for CPU topology detection and dynamic + * frequency scaling. Used by both the launcher (for topology detection) + * and player (for performance-based autoscaling). * - * Three modes are supported: + * Three scaling modes are supported: * - Topology mode: Multi-cluster SoCs (big.LITTLE, etc.) using PerfState ladder * - Granular mode: Single-cluster with all available frequencies (linear scaling) * - Fallback mode: 3 fixed levels (powersave/normal/performance) @@ -19,11 +19,10 @@ * - Uses CPU affinity to guide which cluster the emulation thread runs on * * Designed for testability with injectable state and callbacks. - * Extracted from player.c. */ -#ifndef __PLAYER_CPU_H__ -#define __PLAYER_CPU_H__ +#ifndef __CPU_H__ +#define __CPU_H__ #include #include @@ -31,54 +30,50 @@ /** * Maximum number of CPU frequencies that can be detected. */ -#define PLAYER_CPU_MAX_FREQUENCIES 32 +#define CPU_MAX_FREQUENCIES 32 /** * Ring buffer size for frame timing samples. */ -#define PLAYER_CPU_FRAME_BUFFER_SIZE 64 +#define CPU_FRAME_BUFFER_SIZE 64 /** * Default tuning constants. - * These can be overridden via PlayerCPUConfig. + * These can be overridden via CPUConfig. */ -#define PLAYER_CPU_DEFAULT_WINDOW_FRAMES 30 // ~500ms at 60fps -#define PLAYER_CPU_DEFAULT_UTIL_HIGH 85 // Boost threshold (%) -#define PLAYER_CPU_DEFAULT_UTIL_LOW 55 // Reduce threshold (%) -#define PLAYER_CPU_DEFAULT_BOOST_WINDOWS 2 // Windows before boost (~1s) -#define PLAYER_CPU_DEFAULT_REDUCE_WINDOWS 4 // Windows before reduce (~2s) -#define PLAYER_CPU_DEFAULT_STARTUP_GRACE 300 // Frames to skip (~5s at 60fps) -#define PLAYER_CPU_DEFAULT_MIN_FREQ_KHZ 0 // No minimum (panic failsafe handles problematic freqs) -#define PLAYER_CPU_DEFAULT_TARGET_UTIL 70 // Target utilization after change -#define PLAYER_CPU_DEFAULT_MAX_STEP_DOWN 1 // Max frequency steps when reducing -#define PLAYER_CPU_DEFAULT_PANIC_STEP_UP 2 // Frequency steps on panic (underrun) -#define PLAYER_CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics +#define CPU_DEFAULT_WINDOW_FRAMES 30 // ~500ms at 60fps +#define CPU_DEFAULT_UTIL_HIGH 85 // Boost threshold (%) +#define CPU_DEFAULT_UTIL_LOW 55 // Reduce threshold (%) +#define CPU_DEFAULT_BOOST_WINDOWS 2 // Windows before boost (~1s) +#define CPU_DEFAULT_REDUCE_WINDOWS 4 // Windows before reduce (~2s) +#define CPU_DEFAULT_STARTUP_GRACE 300 // Frames to skip (~5s at 60fps) +#define CPU_DEFAULT_MIN_FREQ_KHZ 0 // No minimum (panic failsafe handles problematic freqs) +#define CPU_DEFAULT_TARGET_UTIL 70 // Target utilization after change +#define CPU_DEFAULT_MAX_STEP_DOWN 1 // Max frequency steps when reducing +#define CPU_DEFAULT_PANIC_STEP_UP 2 // Frequency steps on panic (underrun) +#define CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics /** * Multi-cluster topology constants. */ -#define PLAYER_CPU_MAX_CLUSTERS 8 // Maximum CPU clusters (policies) -#define PLAYER_CPU_MAX_PERF_STATES 16 // Maximum performance states in ladder -#define PLAYER_CPU_MAX_FREQS_PER_CLUSTER 16 // Maximum frequencies per cluster +#define CPU_MAX_CLUSTERS 8 // Maximum CPU clusters (policies) +#define CPU_MAX_PERF_STATES 16 // Maximum performance states in ladder +#define CPU_MAX_FREQS_PER_CLUSTER 16 // Maximum frequencies per cluster /** * Preset level indices. */ -typedef enum { - PLAYER_CPU_LEVEL_POWERSAVE = 0, - PLAYER_CPU_LEVEL_NORMAL = 1, - PLAYER_CPU_LEVEL_PERFORMANCE = 2 -} PlayerCPULevel; +typedef enum { CPU_LEVEL_POWERSAVE = 0, CPU_LEVEL_NORMAL = 1, CPU_LEVEL_PERFORMANCE = 2 } CPULevel; /** * Cluster type classification based on relative performance. * Determined by sorting clusters by max_khz and analyzing the distribution. */ typedef enum { - PLAYER_CPU_CLUSTER_LITTLE = 0, // Efficiency cores (lowest max_khz) - PLAYER_CPU_CLUSTER_BIG = 1, // Performance cores (middle) - PLAYER_CPU_CLUSTER_PRIME = 2, // Premium core (highest max_khz, often single) -} PlayerCPUClusterType; + CPU_CLUSTER_LITTLE = 0, // Efficiency cores (lowest max_khz) + CPU_CLUSTER_BIG = 1, // Performance cores (middle) + CPU_CLUSTER_PRIME = 2, // Premium core (highest max_khz, often single) +} CPUClusterType; /** * Governor types for PerfState ladder. @@ -90,10 +85,10 @@ typedef enum { * - PERFORMANCE: runs at maximum frequency (full power) */ typedef enum { - PLAYER_CPU_GOV_POWERSAVE = 0, // Min frequency - for light workloads - PLAYER_CPU_GOV_SCHEDUTIL = 1, // Dynamic scaling - kernel finds sweet spot - PLAYER_CPU_GOV_PERFORMANCE = 2, // Max frequency - for demanding workloads -} PlayerCPUGovernor; + CPU_GOV_POWERSAVE = 0, // Min frequency - for light workloads + CPU_GOV_SCHEDUTIL = 1, // Dynamic scaling - kernel finds sweet spot + CPU_GOV_PERFORMANCE = 2, // Max frequency - for demanding workloads +} CPUGovernor; /** * Information about a single CPU cluster (cpufreq policy). @@ -103,13 +98,12 @@ typedef struct { int policy_id; // Policy number (0, 4, 7, etc. from policyN) int cpu_mask; // Bitmask of CPUs in this cluster int cpu_count; // Number of CPUs in cluster - int frequencies - [PLAYER_CPU_MAX_FREQS_PER_CLUSTER]; // Available frequencies (kHz, sorted ascending) + int frequencies[CPU_MAX_FREQS_PER_CLUSTER]; // Available frequencies (kHz, sorted ascending) int freq_count; // Number of frequencies int min_khz; // cpuinfo_min_freq int max_khz; // cpuinfo_max_freq - PlayerCPUClusterType type; // LITTLE/BIG/PRIME classification -} PlayerCPUCluster; + CPUClusterType type; // LITTLE/BIG/PRIME classification +} CPUCluster; /** * A performance state represents one step in the autoscaler's ladder. @@ -122,33 +116,33 @@ typedef struct { * This works WITH the kernel's frequency scaling rather than against it. */ typedef struct { - PlayerCPUGovernor cluster_governor[PLAYER_CPU_MAX_CLUSTERS]; // Governor per cluster + CPUGovernor cluster_governor[CPU_MAX_CLUSTERS]; // Governor per cluster int cpu_affinity_mask; // Bitmask of CPUs for emulation thread int active_cluster_idx; // Which cluster is the "active" one -} PlayerCPUPerfState; +} CPUPerfState; /** * Complete CPU topology information detected from sysfs. * Populated by PWR_detectCPUTopology() at initialization. */ -typedef struct PlayerCPUTopology { - PlayerCPUCluster clusters[PLAYER_CPU_MAX_CLUSTERS]; // Detected clusters (sorted by max_khz) +typedef struct CPUTopology { + CPUCluster clusters[CPU_MAX_CLUSTERS]; // Detected clusters (sorted by max_khz) int cluster_count; // Number of clusters detected - PlayerCPUPerfState states[PLAYER_CPU_MAX_PERF_STATES]; // Performance state ladder + CPUPerfState states[CPU_MAX_PERF_STATES]; // Performance state ladder int state_count; // Number of states in ladder int topology_detected; // 1 if detection completed successfully -} PlayerCPUTopology; +} CPUTopology; /** - * Decision type returned by PlayerCPU_update(). + * Decision type returned by CPU_update(). */ typedef enum { - PLAYER_CPU_DECISION_NONE = 0, // No change needed - PLAYER_CPU_DECISION_BOOST, // Increase frequency/level - PLAYER_CPU_DECISION_REDUCE, // Decrease frequency/level - PLAYER_CPU_DECISION_PANIC, // Emergency boost (underrun detected) - PLAYER_CPU_DECISION_SKIP // Skipped (grace period, menu, etc.) -} PlayerCPUDecision; + CPU_DECISION_NONE = 0, // No change needed + CPU_DECISION_BOOST, // Increase frequency/level + CPU_DECISION_REDUCE, // Decrease frequency/level + CPU_DECISION_PANIC, // Emergency boost (underrun detected) + CPU_DECISION_SKIP // Skipped (grace period, menu, etc.) +} CPUDecision; /** * Configuration constants for auto CPU scaling. @@ -164,7 +158,7 @@ typedef struct { unsigned int target_util; // Target utilization after frequency change int max_step_down; // Max frequency steps when reducing int panic_step_up; // Frequency steps on panic (underrun) -} PlayerCPUConfig; +} CPUConfig; /** * State for auto CPU scaling. @@ -172,7 +166,7 @@ typedef struct { */ typedef struct { // Frequency array (populated by detectFrequencies) - int frequencies[PLAYER_CPU_MAX_FREQUENCIES]; // Available frequencies (kHz, sorted low→high) + int frequencies[CPU_MAX_FREQUENCIES]; // Available frequencies (kHz, sorted low→high) int freq_count; // Number of valid frequencies // Granular mode state @@ -194,7 +188,7 @@ typedef struct { int panic_cooldown; // Windows to wait after panic // Frame timing data - uint64_t frame_times[PLAYER_CPU_FRAME_BUFFER_SIZE]; // Ring buffer of frame times (us) + uint64_t frame_times[CPU_FRAME_BUFFER_SIZE]; // Ring buffer of frame times (us) int frame_time_index; // Current ring buffer position uint64_t frame_budget_us; // Target frame time (from fps) @@ -203,40 +197,40 @@ typedef struct { int scaling_disabled; // 1 if scaling is disabled (0 or 1 frequency available) // Per-frequency panic tracking (failsafe for problematic frequencies) - int panic_count[PLAYER_CPU_MAX_FREQUENCIES]; // Count of panics at each frequency + int panic_count[CPU_MAX_FREQUENCIES]; // Count of panics at each frequency // Multi-cluster topology support - PlayerCPUTopology topology; // Detected CPU topology + CPUTopology topology; // Detected CPU topology int target_state; // Target PerfState index (multi-cluster mode) int current_state; // Currently applied PerfState index int use_topology; // 1 = multi-cluster mode active int pending_affinity; // CPU mask to apply from main thread (0 = none pending) -} PlayerCPUState; +} CPUState; /** * Result of an update operation (for detailed testing). */ typedef struct { - PlayerCPUDecision decision; // What decision was made + CPUDecision decision; // What decision was made int new_index; // New frequency index (if granular) int new_level; // New level (if fallback) unsigned utilization; // Calculated utilization (%) uint64_t p90_time; // 90th percentile frame time -} PlayerCPUResult; +} CPUResult; /** * Initializes config with default values. * * @param config Config to initialize */ -void PlayerCPU_initConfig(PlayerCPUConfig* config); +void CPU_initConfig(CPUConfig* config); /** * Initializes state to empty/zero state. * * @param state State to initialize */ -void PlayerCPU_initState(PlayerCPUState* state); +void CPU_initState(CPUState* state); /** * Finds the index of the nearest frequency to the target. @@ -246,7 +240,7 @@ void PlayerCPU_initState(PlayerCPUState* state); * @param target_khz Target frequency to find * @return Index of nearest frequency (0 if count <= 0) */ -int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz); +int CPU_findNearestIndex(const int* frequencies, int count, int target_khz); /** * Detects available CPU frequencies and initializes granular scaling. @@ -259,8 +253,8 @@ int PlayerCPU_findNearestIndex(const int* frequencies, int count, int target_khz * @param raw_frequencies Array of frequencies from platform * @param raw_count Number of frequencies from platform */ -void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* config, - const int* raw_frequencies, int raw_count); +void CPU_detectFrequencies(CPUState* state, const CPUConfig* config, const int* raw_frequencies, + int raw_count); /** * Resets auto CPU state for a new session. @@ -272,8 +266,7 @@ void PlayerCPU_detectFrequencies(PlayerCPUState* state, const PlayerCPUConfig* c * @param fps Game's target FPS (for frame budget calculation) * @param current_underruns Current underrun count from audio system */ -void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, double fps, - unsigned current_underruns); +void CPU_reset(CPUState* state, const CPUConfig* config, double fps, unsigned current_underruns); /** * Records a frame time sample. @@ -283,7 +276,7 @@ void PlayerCPU_reset(PlayerCPUState* state, const PlayerCPUConfig* config, doubl * @param state State to update * @param frame_time_us Frame execution time in microseconds */ -void PlayerCPU_recordFrameTime(PlayerCPUState* state, uint64_t frame_time_us); +void CPU_recordFrameTime(CPUState* state, uint64_t frame_time_us); /** * Main update function - determines if CPU frequency should change. @@ -299,9 +292,8 @@ void PlayerCPU_recordFrameTime(PlayerCPUState* state, uint64_t frame_time_us); * @param result Optional output for detailed result info * @return Decision type (NONE, BOOST, REDUCE, PANIC, SKIP) */ -PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* config, - bool fast_forward, bool show_menu, unsigned current_underruns, - PlayerCPUResult* result); +CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, + unsigned current_underruns, CPUResult* result); /** * Calculates the recommended frequency for a target utilization. @@ -313,7 +305,7 @@ PlayerCPUDecision PlayerCPU_update(PlayerCPUState* state, const PlayerCPUConfig* * @param target_util Target utilization percentage * @return Recommended frequency in kHz */ -int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_util); +int CPU_predictFrequency(int current_freq, int current_util, int target_util); /** * Returns the percentage of max frequency for a preset level. @@ -321,7 +313,7 @@ int PlayerCPU_predictFrequency(int current_freq, int current_util, int target_ut * @param level Preset level (0=POWERSAVE, 1=NORMAL, 2=PERFORMANCE) * @return Percentage of max frequency (55, 80, or 100) */ -int PlayerCPU_getPresetPercentage(PlayerCPULevel level); +int CPU_getPresetPercentage(CPULevel level); /** * Returns the current performance level as a normalized percentage (0-100). @@ -334,7 +326,7 @@ int PlayerCPU_getPresetPercentage(PlayerCPULevel level); * @param state CPU state to query * @return Performance level 0-100, or -1 if scaling disabled */ -int PlayerCPU_getPerformancePercent(const PlayerCPUState* state); +int CPU_getPerformancePercent(const CPUState* state); /** * Returns a string describing the current CPU scaling mode. @@ -342,7 +334,7 @@ int PlayerCPU_getPerformancePercent(const PlayerCPUState* state); * @param state CPU state to query * @return "topology", "granular", "fallback", or "disabled" */ -const char* PlayerCPU_getModeName(const PlayerCPUState* state); +const char* CPU_getModeName(const CPUState* state); /** * Calculates the 90th percentile of frame times. @@ -351,7 +343,7 @@ const char* PlayerCPU_getModeName(const PlayerCPUState* state); * @param count Number of samples (uses min of count and buffer size) * @return 90th percentile value */ -uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count); +uint64_t CPU_percentile90(const uint64_t* frame_times, int count); /////////////////////////////// // Multi-cluster topology functions @@ -362,7 +354,7 @@ uint64_t PlayerCPU_percentile90(const uint64_t* frame_times, int count); * * @param topology Topology to initialize */ -void PlayerCPU_initTopology(PlayerCPUTopology* topology); +void CPU_initTopology(CPUTopology* topology); /** * Builds the PerfState ladder from detected topology. @@ -381,7 +373,7 @@ void PlayerCPU_initTopology(PlayerCPUTopology* topology); * @param state CPU state with populated topology.clusters * @param config Configuration */ -void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* config); +void CPU_buildPerfStates(CPUState* state, const CPUConfig* config); /** * Applies a PerfState by setting cluster governors and thread affinity. @@ -392,7 +384,7 @@ void PlayerCPU_buildPerfStates(PlayerCPUState* state, const PlayerCPUConfig* con * @param state CPU state with target_state set * @return 0 on success, -1 on failure */ -int PlayerCPU_applyPerfState(PlayerCPUState* state); +int CPU_applyPerfState(CPUState* state); /** * Parses a CPU list string (e.g., "0-3" or "0 1 2 3") into a bitmask. @@ -401,7 +393,7 @@ int PlayerCPU_applyPerfState(PlayerCPUState* state); * @param cpu_count Output: number of CPUs in the list * @return Bitmask of CPUs */ -int PlayerCPU_parseCPUList(const char* str, int* cpu_count); +int CPU_parseCPUList(const char* str, int* cpu_count); /** * Classifies clusters based on their relative performance. @@ -414,7 +406,7 @@ int PlayerCPU_parseCPUList(const char* str, int* cpu_count); * @param clusters Array of clusters (must be sorted by max_khz ascending) * @param count Number of clusters */ -void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count); +void CPU_classifyClusters(CPUCluster* clusters, int count); /** * Picks 3 representative frequencies from a cluster's available frequencies. @@ -427,7 +419,7 @@ void PlayerCPU_classifyClusters(PlayerCPUCluster* clusters, int count); * @param mid_khz Output: mid frequency (freqs[count/2]) * @param high_khz Output: high frequency (freqs[count-1]) */ -void PlayerCPU_pickRepresentativeFreqs(const PlayerCPUCluster* cluster, int* low_khz, int* mid_khz, - int* high_khz); +void CPU_pickRepresentativeFreqs(const CPUCluster* cluster, int* low_khz, int* mid_khz, + int* high_khz); -#endif // __PLAYER_CPU_H__ +#endif // __CPU_H__ diff --git a/workspace/all/launcher/Makefile b/workspace/all/launcher/Makefile index b00c467a..b9ae3563 100644 --- a/workspace/all/launcher/Makefile +++ b/workspace/all/launcher/Makefile @@ -21,10 +21,11 @@ SDL ?= SDL ########################################################### TARGET = launcher -INCDIR = -I. -I../common/ -I../player/libretro-common/include/ -isystem ../vendor/stb/ -I../../$(PLATFORM)/platform/ +INCDIR = -I. -I../common/ -I../player/ -I../player/libretro-common/include/ -isystem ../vendor/stb/ -I../../$(PLATFORM)/platform/ SOURCE = $(TARGET).c ../common/scaler.c ../common/utils.c ../common/nointro_parser.c \ ../common/api.c ../common/ui_layout.c ../common/log.c ../common/pad.c ../common/paths.c \ ../common/gfx_text.c ../common/platform_variant.c ../common/stb_ds_impl.c \ + ../common/cpu.c \ launcher_entry.c launcher_launcher.c directory_index.c launcher_str_compare.c \ launcher_state.c launcher_m3u.c launcher_map.c launcher_file_utils.c launcher_directory.c \ launcher_context.c launcher_navigation.c launcher_thumbnail.c recent_file.c \ diff --git a/workspace/all/player/Makefile b/workspace/all/player/Makefile index be17ebfb..45356f5f 100644 --- a/workspace/all/player/Makefile +++ b/workspace/all/player/Makefile @@ -27,7 +27,7 @@ SOURCE = $(TARGET).c ../common/scaler.c ../common/utils.c ../common/nointro_pars ../common/gfx_text.c ../launcher/launcher_file_utils.c ../common/platform_variant.c \ ../common/paths.c \ player_archive.c player_memory.c player_state.c \ - player_paths.c player_cpu.c player_input.c player_mappings.c \ + player_paths.c ../common/cpu.c player_input.c player_mappings.c \ player_video_convert.c player_rotation.c player_config.c player_context.c \ player_menu.c player_env.c player_game.c player_scaler.c player_core.c \ ../common/gl_video.c \ diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index c0465c66..104816eb 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -54,6 +54,7 @@ #include #include +#include "../common/cpu.h" #include "api.h" #include "defines.h" #include "frame_pacer.h" @@ -66,7 +67,6 @@ #include "player_config.h" #include "player_context.h" #include "player_core.h" -#include "player_cpu.h" #include "player_env.h" #include "player_game.h" #include "player_input.h" @@ -151,9 +151,9 @@ static int overclock = 3; // CPU speed (0=powersave, 1=normal, 2=performance, 3= // Auto CPU Scaling State (when overclock == 3) // Uses frame timing (core.run() execution time) to dynamically adjust CPU speed. -// State and config are managed via player_cpu.h structs for testability. -static PlayerCPUState auto_cpu_state; -static PlayerCPUConfig auto_cpu_config; +// State and config are managed via cpu.h structs for testability. +static CPUState auto_cpu_state; +static CPUConfig auto_cpu_config; static uint64_t auto_cpu_last_frame_start = 0; // For measuring core.run() time // Frame Pacing State @@ -859,14 +859,14 @@ static void* auto_cpu_scaling_thread(void* arg) { LOG_debug("Auto CPU: applying PerfState %d/%d\n", target_state, auto_cpu_state.topology.state_count - 1); - int result = PlayerCPU_applyPerfState(&auto_cpu_state); + int result = CPU_applyPerfState(&auto_cpu_state); if (result != 0) { LOG_warn("Auto CPU: failed to apply PerfState %d\n", target_state); } // Set pending_affinity under mutex (main thread will apply it) // This avoids race condition with main thread reading pending_affinity - PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[target_state]; + CPUPerfState* ps = &auto_cpu_state.topology.states[target_state]; pthread_mutex_lock(&auto_cpu_mutex); if (ps->cpu_affinity_mask > 0) { auto_cpu_state.pending_affinity = ps->cpu_affinity_mask; @@ -1049,8 +1049,7 @@ static int auto_cpu_getCurrentFrequency(void) { * Wrapper around module function for convenience. */ static int auto_cpu_findNearestIndex(int target_khz) { - return PlayerCPU_findNearestIndex(auto_cpu_state.frequencies, auto_cpu_state.freq_count, - target_khz); + return CPU_findNearestIndex(auto_cpu_state.frequencies, auto_cpu_state.freq_count, target_khz); } /** @@ -1074,7 +1073,7 @@ static void auto_cpu_detectFrequencies(void) { auto_cpu_state.use_granular = 0; // Build the PerfState ladder (3 governor levels per cluster tier) - PlayerCPU_buildPerfStates(&auto_cpu_state, &auto_cpu_config); + CPU_buildPerfStates(&auto_cpu_state, &auto_cpu_config); // Note: governors are now set by applyPerfState(), not upfront // This lets each PerfState control its own governor configuration @@ -1084,11 +1083,11 @@ static void auto_cpu_detectFrequencies(void) { // Log cluster info for (int c = 0; c < cluster_count; c++) { - PlayerCPUCluster* cluster = &auto_cpu_state.topology.clusters[c]; - const char* type_str = cluster->type == PLAYER_CPU_CLUSTER_PRIME ? "PRIME" - : cluster->type == PLAYER_CPU_CLUSTER_BIG ? "BIG" - : cluster->type == PLAYER_CPU_CLUSTER_LITTLE ? "LITTLE" - : "?"; + CPUCluster* cluster = &auto_cpu_state.topology.clusters[c]; + const char* type_str = cluster->type == CPU_CLUSTER_PRIME ? "PRIME" + : cluster->type == CPU_CLUSTER_BIG ? "BIG" + : cluster->type == CPU_CLUSTER_LITTLE ? "LITTLE" + : "?"; LOG_debug("Auto CPU: cluster %d (policy%d): %s, %d CPUs, %d-%d MHz\n", c, cluster->policy_id, type_str, cluster->cpu_count, cluster->min_khz / 1000, cluster->max_khz / 1000); @@ -1097,7 +1096,7 @@ static void auto_cpu_detectFrequencies(void) { // Log PerfState ladder (governor-based) static const char* gov_names[] = {"powersave", "schedutil", "performance"}; for (int s = 0; s < auto_cpu_state.topology.state_count; s++) { - PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[s]; + CPUPerfState* ps = &auto_cpu_state.topology.states[s]; LOG_debug("Auto CPU: PerfState %d: cluster %d, affinity=0x%x\n", s, ps->active_cluster_idx, ps->cpu_affinity_mask); for (int c = 0; c < cluster_count; c++) { @@ -1239,9 +1238,9 @@ void setOverclock(int i) { auto_cpu_state.current_state = -1; // Force apply on first thread iteration pthread_mutex_unlock(&auto_cpu_mutex); // Apply initial state immediately (thread will maintain it) - PlayerCPU_applyPerfState(&auto_cpu_state); + CPU_applyPerfState(&auto_cpu_state); // Apply affinity directly since we're on the main (emulation) thread - PlayerCPUPerfState* ps = &auto_cpu_state.topology.states[start_state]; + CPUPerfState* ps = &auto_cpu_state.topology.states[start_state]; if (ps->cpu_affinity_mask > 0) { PWR_setThreadAffinity(ps->cpu_affinity_mask); } @@ -1334,18 +1333,17 @@ static void updateAutoCPU(void) { // Track panic at current frequency (for failsafe blocking). // If a frequency can't keep up, all lower frequencies are also blocked // because lower freq = less CPU throughput = guaranteed worse performance. - if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < PLAYER_CPU_MAX_FREQUENCIES) { + if (auto_cpu_state.use_granular && current_idx >= 0 && current_idx < CPU_MAX_FREQUENCIES) { auto_cpu_state.panic_count[current_idx]++; - if (auto_cpu_state.panic_count[current_idx] >= PLAYER_CPU_PANIC_THRESHOLD) { + if (auto_cpu_state.panic_count[current_idx] >= CPU_PANIC_THRESHOLD) { LOG_warn("Auto CPU: BLOCKING %d kHz and below after %d panics (audio=%u%%)\n", auto_cpu_state.frequencies[current_idx], auto_cpu_state.panic_count[current_idx], audio_fill); // Block this frequency and all below - they can't possibly work // if this one failed (lower freq = strictly less performance) for (int i = 0; i <= current_idx; i++) { - auto_cpu_state.panic_count[i] = PLAYER_CPU_PANIC_THRESHOLD; + auto_cpu_state.panic_count[i] = CPU_PANIC_THRESHOLD; } } } @@ -1536,7 +1534,7 @@ static void updateAutoCPU(void) { // Skip blocked frequencies - find first unblocked one above new_idx. // Frequencies get blocked when they cause repeated panics. while (new_idx >= 0 && - auto_cpu_state.panic_count[new_idx] >= PLAYER_CPU_PANIC_THRESHOLD) { + auto_cpu_state.panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { new_idx++; if (new_idx >= current_idx) { // All lower frequencies blocked - stay at current @@ -3782,7 +3780,7 @@ static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { if (auto_cpu_state.use_topology) { // Topology mode: show state/max and performance % - int perf_pct = PlayerCPU_getPerformancePercent(&auto_cpu_state); + int perf_pct = CPU_getPerformancePercent(&auto_cpu_state); int max_state = auto_cpu_state.topology.state_count - 1; (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", current_state, max_state, perf_pct, util, fill_display); @@ -4103,7 +4101,7 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign if (auto_cpu_state.use_topology) { // Topology mode: show state/max and performance % - int perf_pct = PlayerCPU_getPerformancePercent(&auto_cpu_state); + int perf_pct = CPU_getPerformancePercent(&auto_cpu_state); int max_state = auto_cpu_state.topology.state_count - 1; (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", current_state, max_state, perf_pct, util, fill_display); @@ -6006,8 +6004,8 @@ int main(int argc, char* argv[]) { PlayerContext_initCallbacks(ctx, &callbacks); // Initialize auto CPU scaling config with defaults - PlayerCPU_initConfig(&auto_cpu_config); - PlayerCPU_initState(&auto_cpu_state); + CPU_initConfig(&auto_cpu_config); + CPU_initState(&auto_cpu_state); setOverclock(overclock); // default to normal // force a stack overflow to ensure asan is linked and actually working diff --git a/workspace/all/player/player_context.h b/workspace/all/player/player_context.h index ac48439c..58cbed09 100644 --- a/workspace/all/player/player_context.h +++ b/workspace/all/player/player_context.h @@ -29,7 +29,7 @@ #include #include -#include "player_cpu.h" +#include "../common/cpu.h" #include "player_internal.h" // Forward declaration for SDL_Surface (avoids pulling in SDL headers) @@ -196,8 +196,8 @@ typedef struct PlayerContext { //---------------------------------- // Auto CPU scaling //---------------------------------- - PlayerCPUState* auto_cpu_state; - PlayerCPUConfig* auto_cpu_config; + CPUState* auto_cpu_state; + CPUConfig* auto_cpu_config; //---------------------------------- // Multi-disc support diff --git a/workspace/all/player/player_loop_audioclock.inc b/workspace/all/player/player_loop_audioclock.inc index aeabd063..c2eafb2b 100644 --- a/workspace/all/player/player_loop_audioclock.inc +++ b/workspace/all/player/player_loop_audioclock.inc @@ -81,7 +81,7 @@ static void run_main_loop(void) { // Store frame time for auto CPU scaling analysis if (overclock == 3 && !fast_forward && !show_menu) { auto_cpu_state - .frame_times[auto_cpu_state.frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = + .frame_times[auto_cpu_state.frame_time_index % CPU_FRAME_BUFFER_SIZE] = frame_time; auto_cpu_state.frame_time_index++; } diff --git a/workspace/all/player/player_loop_audioclock.inc.bak b/workspace/all/player/player_loop_audioclock.inc.bak deleted file mode 100644 index 6cfdd5f1..00000000 --- a/workspace/all/player/player_loop_audioclock.inc.bak +++ /dev/null @@ -1,117 +0,0 @@ -/** - * Audio-driven main loop with blocking audio writes. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is defined. - * - * Timing approach: - * - Audio hardware clock is the timing source - * - Core runs every loop iteration (no frame pacing) - * - SND_batchSamples() blocks when buffer is full (up to 10ms) - * - Audio callback drains buffer at hardware audio rate - * - Natural backpressure from audio blocking rate-limits emulation - * - * For devices with unstable vsync (e.g., M17). - */ - -static void run_main_loop(void) { - LOG_info("Using audioclock sync mode (audio-driven timing)\n"); - - PWR_warn(1); - PWR_disableAutosleep(); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (audioclock mode)"); - sec_start = SDL_GetTicks(); - uint32_t last_ff_flip = 0; - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Always run core - audio blocking in SND_batchSamples() handles timing. - // When audio buffer is full, the core will block (up to 10ms) waiting for - // the audio callback to consume samples. This naturally rate-limits emulation - // to match the audio hardware clock. - // - // During fast-forward: audio is skipped (no blocking), limitFF() controls speed. - - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Note: SND_newFrame() is not called here because audioclock mode uses - // blocking audio writes for timing, not the PI rate controller. - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state - .frame_times[auto_cpu_state.frame_time_index % PLAYER_CPU_FRAME_BUFFER_SIZE] = - frame_time; - auto_cpu_state.frame_time_index++; - } - - // Present frame - // During FF: throttle vsync to avoid blocking (limitFF controls speed instead) - // Normal: present every frame (may or may not wait for vsync depending on platform) - if (fast_forward) { - uint32_t now = SDL_GetTicks(); - if (now - last_ff_flip >= 30) { // Visual update every 30ms - GFX_present(&renderer); - frame_ready_for_flip = 0; - last_ff_flip = now; - } - } else { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_loop_vsync.inc b/workspace/all/player/player_loop_vsync.inc index 743bef4f..717410c7 100644 --- a/workspace/all/player/player_loop_vsync.inc +++ b/workspace/all/player/player_loop_vsync.inc @@ -104,7 +104,7 @@ static void run_main_loop(void) { // Store frame time for auto CPU scaling analysis if (overclock == 3 && !fast_forward && !show_menu) { auto_cpu_state.frame_times[auto_cpu_state.frame_time_index % - PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time; + CPU_FRAME_BUFFER_SIZE] = frame_time; auto_cpu_state.frame_time_index++; } } diff --git a/workspace/all/player/player_loop_vsync.inc.bak b/workspace/all/player/player_loop_vsync.inc.bak deleted file mode 100644 index f58fc2bb..00000000 --- a/workspace/all/player/player_loop_vsync.inc.bak +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Vsync-driven main loop with frame pacing and audio rate control. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is not defined. - * - * Timing approach: - * - Display vsync is the timing source (GFX_present blocks until vsync) - * - Frame pacer uses Bresenham accumulator to decide when to step emulation - * - Audio rate control maintains buffer at 50% using dual-timescale PI controller - * - Works with any display Hz / game fps mismatch - * - * For devices with stable vsync. - */ - -static void run_main_loop(void) { - // Initialize frame pacer with display Hz - double display_hz = FramePacer_getDisplayHz(); - FramePacer_init(&frame_pacer, core.fps, display_hz); - LOG_info("Frame pacer: %.2ffps @ %.2fHz (%s) [Q16: %d/%d]\n", core.fps, display_hz, - FramePacer_isDirectMode(&frame_pacer) ? "direct" : "vsync-driven Bresenham", - frame_pacer.game_fps_q16, frame_pacer.display_hz_q16); - - // Keep audio rate control ENABLED with frame pacing. - // The integral term is slow enough (300-frame average) that it won't fight - // the per-frame step/repeat pattern. It learns the average vsync rate and - // compensates for hardware drift (e.g., 58.7Hz vs 60Hz nominal). - // Without rate control, vsync variance causes continuous audio underruns. - - PWR_warn(1); - PWR_disableAutosleep(); - - // force a vsync immediately before loop - // for better frame pacing? - GFX_clearAll(); - GFX_present(NULL); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (vsync mode)"); - sec_start = SDL_GetTicks(); - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Frame pacing: Bresenham accumulator decides whether to run core this vsync. - // Vsync (from GFX_present) is the timing source - each loop iteration = one display refresh. - // Core runs at its natural rate (e.g., 60fps), display refreshes at panel Hz (e.g., 72Hz). - // When display Hz > game fps: some frames are repeated (re-presented). - // - // During fast-forward: run core multiple times per vsync to achieve speedup. - // max_ff_speed: 0=2x, 1=3x, 2=4x, 3=5x - int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; - - for (int run = 0; run < runs_this_vsync; run++) { - // First run uses frame pacer, subsequent FF runs always execute - bool should_run_core = (run == 0) ? (fast_forward || FramePacer_step(&frame_pacer)) - : fast_forward; - - if (should_run_core) { - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Update audio rate control integral (once per frame) - // Skip during FF to prevent integral windup (audio is skipped) - if (!fast_forward) { - SND_newFrame(); - } - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state.frame_times[auto_cpu_state.frame_time_index % - PLAYER_CPU_FRAME_BUFFER_SIZE] = frame_time; - auto_cpu_state.frame_time_index++; - } - } - } - - // Always present for vsync timing - when !should_run_core, re-presents previous frame - GFX_present(&renderer); - frame_ready_for_flip = 0; - - // Track performance (only once per vsync, not per FF run) - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_utils.c b/workspace/all/player/player_utils.c index def5d3a2..bfc05369 100644 --- a/workspace/all/player/player_utils.c +++ b/workspace/all/player/player_utils.c @@ -4,7 +4,7 @@ * These functions have no external dependencies and can be tested in isolation. * * For option-related functions, see player_options.c - * For CPU frequency functions, see player_cpu.c + * For CPU frequency functions, see cpu.c */ #include "player_utils.h" diff --git a/workspace/all/player/player_utils.h b/workspace/all/player/player_utils.h index dcfac086..32ce90a8 100644 --- a/workspace/all/player/player_utils.h +++ b/workspace/all/player/player_utils.h @@ -5,7 +5,7 @@ * They perform string manipulation and other pure computations. * * For option-related functions, see player_options.h - * For CPU frequency functions, see player_cpu.h + * For CPU frequency functions, see cpu.h */ #ifndef PLAYER_UTILS_H From a95774cd9f19c28bc9bdfdd28b7882376ee09ce4 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Tue, 6 Jan 2026 13:42:48 -0800 Subject: [PATCH 06/11] Refactor debug HUD to use consistent scaling. Extracts debug overlay rendering from player.c into reusable platform hooks, eliminating duplication between software and hardware render paths and enabling future platform-specific optimizations. --- CLAUDE.md | 2 +- docs/minarch-refactoring.md | 12 +- workspace/all/common/api.c | 22 + workspace/all/common/api.h | 26 ++ workspace/all/common/render_sdl2.c | 39 ++ workspace/all/common/render_sdl2.h | 2 + workspace/all/player/player.c | 576 +++++++++++++++++------- workspace/miyoomini/platform/platform.c | 3 + 8 files changed, 502 insertions(+), 180 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 4ad50282..bb8a98b3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -517,7 +517,7 @@ See `.clang-format` for complete style definition. | Player core AV processing | `workspace/all/player/player_core.c` | | Player memory persistence | `workspace/all/player/player_memory.c` | | Player save states | `workspace/all/player/player_state.c` | -| CPU scaling | `workspace/all/common/cpu.c` | +| CPU scaling | `workspace/all/common/cpu.c` | | Player input handling | `workspace/all/player/player_input.c` | | Player save paths | `workspace/all/player/player_paths.c` | | Launcher Entry type | `workspace/all/launcher/launcher_entry.c` | diff --git a/docs/minarch-refactoring.md b/docs/minarch-refactoring.md index 34ecc7b9..2ef5580d 100644 --- a/docs/minarch-refactoring.md +++ b/docs/minarch-refactoring.md @@ -122,12 +122,12 @@ player.c (orchestration, main loop, SDL integration) ### Core Integration Layer -| Module | Lines | Responsibility | Tests | -| --------------- | ----- | -------------------------------------------------------- | ----- | -| `player_core.c` | ~150 | Build game info, calculate aspect ratio, process AV info | 23 | -| `player_env.c` | ~400 | Handle 30+ libretro environment callbacks | 51 | -| `player_game.c` | ~300 | Extension parsing, ZIP headers, M3U detection | 46 | -| `cpu.c` (common)| ~350 | Auto CPU frequency scaling algorithm | 42 | +| Module | Lines | Responsibility | Tests | +| ---------------- | ----- | -------------------------------------------------------- | ----- | +| `player_core.c` | ~150 | Build game info, calculate aspect ratio, process AV info | 23 | +| `player_env.c` | ~400 | Handle 30+ libretro environment callbacks | 51 | +| `player_game.c` | ~300 | Extension parsing, ZIP headers, M3U detection | 46 | +| `cpu.c` (common) | ~350 | Auto CPU frequency scaling algorithm | 42 | **Key decisions:** diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index e0207db2..33814c7b 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -713,6 +713,28 @@ void GFX_present(GFX_Renderer* renderer) { PLAT_present(renderer); } +/** + * Default (weak) implementation of debug HUD rendering (software). + * Player overrides this to render debug overlay before flip. + */ +FALLBACK_IMPLEMENTATION void PLAT_renderDebugHUD(SDL_Surface* surface) { + (void)surface; + // No-op by default +} + +/** + * Default (weak) implementation of debug HUD buffer (hardware/GL). + * Player overrides this to provide RGBA buffer for GL compositing. + */ +FALLBACK_IMPLEMENTATION uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, + int screen_h) { + (void)src_w; + (void)src_h; + (void)screen_w; + (void)screen_h; + return NULL; // No HUD by default +} + /** * Waits for vsync without presenting new content. * diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index a831cbbc..8f85990d 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -1386,6 +1386,32 @@ scaler_t PLAT_getScaler(GFX_Renderer* renderer); */ void PLAT_present(GFX_Renderer* renderer); +/** + * Render debug HUD overlay to display surface (software rendering). + * + * Called by PLAT_present() implementations before buffer flip. + * Weak default does nothing; player provides implementation. + * + * @param surface Final display surface to render HUD onto (RGB565) + */ +FALLBACK_IMPLEMENTATION void PLAT_renderDebugHUD(SDL_Surface* surface); + +/** + * Get debug HUD buffer for GL compositing (hardware rendering). + * + * Called by SDL2_present() GLES path before swap buffers. + * Returns an RGBA8888 buffer that will be composited over the game frame. + * Weak default returns NULL (no HUD); player provides implementation. + * + * @param src_w Source (game) width for HUD text generation + * @param src_h Source (game) height for HUD text generation + * @param screen_w Screen width in pixels + * @param screen_h Screen height in pixels + * @return RGBA8888 pixel buffer (screen_w x screen_h) or NULL if no HUD + */ +FALLBACK_IMPLEMENTATION uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, + int screen_h); + /** * Platform-specific overscan support check. * diff --git a/workspace/all/common/render_sdl2.c b/workspace/all/common/render_sdl2.c index cd37c555..d14670b2 100644 --- a/workspace/all/common/render_sdl2.c +++ b/workspace/all/common/render_sdl2.c @@ -243,6 +243,20 @@ SDL_Surface* SDL2_initVideo(SDL2_RenderContext* ctx, int width, int height, SDL_DestroyWindow(ctx->window); return NULL; } + +#if !HAS_OPENGLES + // Create HUD texture for debug overlay (RGBA for alpha blending) + ctx->hud_texture = SDL_CreateTexture(ctx->renderer, SDL_PIXELFORMAT_RGBA8888, + SDL_TEXTUREACCESS_STREAMING, w, h); + if (!ctx->hud_texture) { + LOG_warn("SDL2_initVideo: Failed to create HUD texture: %s", SDL_GetError()); + // Non-fatal, continue without HUD support + } else { + SDL_SetTextureBlendMode(ctx->hud_texture, SDL_BLENDMODE_BLEND); + } + ctx->hud_surface = NULL; // Not used - PLAT_getDebugHUDBuffer provides RGBA buffer +#endif + LOG_debug("SDL2_initVideo: Surfaces created successfully"); // Store dimensions @@ -298,6 +312,9 @@ void SDL2_quitVideo(SDL2_RenderContext* ctx) { SDL_DestroyTexture(ctx->target); if (ctx->effect) SDL_DestroyTexture(ctx->effect); + if (ctx->hud_texture) + SDL_DestroyTexture(ctx->hud_texture); + // Note: hud_surface is unused (set to NULL) - PLAT_getDebugHUDBuffer provides RGBA buffer SDL_DestroyTexture(ctx->texture); // Destroy renderer and window @@ -392,6 +409,15 @@ void SDL2_present(SDL2_RenderContext* ctx, GFX_Renderer* renderer) { // Draw software frame (with effect support) GLVideo_drawSoftwareFrame(&src_rect, &dst_rect, rotation, sharpness, renderer->visual_scale); + + // Render debug HUD overlay if available (player provides implementation) + uint32_t* hud_buf = PLAT_getDebugHUDBuffer(renderer->src_w, renderer->src_h, ctx->device_width, + ctx->device_height); + if (hud_buf) { + GLVideo_renderHUD(hud_buf, ctx->device_width, ctx->device_height, ctx->device_width, + ctx->device_height); + } + GLVideo_swapBuffers(); #else @@ -474,6 +500,19 @@ void SDL2_present(SDL2_RenderContext* ctx, GFX_Renderer* renderer) { } } + // Render debug HUD overlay if available (player provides implementation) + if (ctx->hud_texture) { + uint32_t* hud_buf = PLAT_getDebugHUDBuffer(renderer->src_w, renderer->src_h, + ctx->device_width, ctx->device_height); + if (hud_buf) { + // Upload RGBA buffer to texture + SDL_UpdateTexture(ctx->hud_texture, NULL, hud_buf, + ctx->device_width * (int)sizeof(uint32_t)); + // Composite HUD texture over game (fullscreen, no rotation) + SDL_RenderCopy(ctx->renderer, ctx->hud_texture, NULL, NULL); + } + } + SDL_RenderPresent(ctx->renderer); #endif } diff --git a/workspace/all/common/render_sdl2.h b/workspace/all/common/render_sdl2.h index d65c1426..7e88491d 100644 --- a/workspace/all/common/render_sdl2.h +++ b/workspace/all/common/render_sdl2.h @@ -68,10 +68,12 @@ typedef struct SDL2_RenderContext { SDL_Texture* texture; // Main texture (source resolution) SDL_Texture* target; // Intermediate texture for crisp scaling SDL_Texture* effect; // Effect overlay texture + SDL_Texture* hud_texture; // Debug HUD overlay texture // Surfaces SDL_Surface* buffer; // Wrapper for texture lock (unused pixels pointer) SDL_Surface* screen; // Main screen surface for UI rendering + SDL_Surface* hud_surface; // Debug HUD surface for rendering text // Video dimensions int width; // Current source width diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 104816eb..3cc62b7e 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -80,6 +80,7 @@ #include "player_scaler.h" #include "player_state.h" #include "player_video_convert.h" +#include "render_common.h" #include "scaler.h" #include "utils.h" @@ -3556,6 +3557,24 @@ static const char* bitmap_font[] = { "1 " "1 1" " 111 ", + ['V'] = "1 1" + "1 1" + "1 1" + "1 1" + "1 1" + "1 1" + " 1 1 " + " 1 1 " + " 1 ", + ['+'] = " " + " " + " 1 " + " 1 " + "11111" + " 1 " + " 1 " + " " + " ", }; static void blitBitmapText(char* text, int ox, int oy, uint16_t* data, int stride, int width, int height) { @@ -3673,6 +3692,193 @@ static void blitBitmapTextRGBA(char* text, int ox, int oy, uint32_t* data, int s } } +/** + * Render bitmap text to an RGBA buffer with DP-based scaling. + * + * Similar to blitBitmapTextScaled but for RGBA format used by HW rendering. + * White text with black outline, transparent background, scaled for consistent sizing. + * + * @param text Text to render + * @param ox X position in pixels (negative = right-align from edge) + * @param oy Y position in pixels (negative = bottom-align from edge) + * @param data RGBA8888 pixel buffer + * @param stride Buffer width in pixels (not bytes) + * @param width Total buffer width in pixels + * @param height Total buffer height in pixels + * @param scale Scale factor (e.g., 2 = double size) + */ +static void blitBitmapTextRGBAScaled(char* text, int ox, int oy, uint32_t* data, int stride, + int width, int height, int scale) { + if (scale < 1) + scale = 1; + + const uint32_t RGBA_WHITE = 0xFFFFFFFF; + const uint32_t RGBA_BLACK = 0xFF000000; + + int len = strlen(text); + int base_w = ((CHAR_WIDTH + LETTERSPACING) * len) - 1; + int base_h = CHAR_HEIGHT; + int w = base_w * scale; + int h = base_h * scale; + + // Handle negative offsets (right/bottom alignment) + if (ox < 0) + ox = width - w + ox; + if (oy < 0) + oy = height - h + oy; + + // Bounds check - need scale px margin for outline + if (ox < scale || oy < scale || ox + w + scale > width || oy + h + scale > height) + return; + + data += oy * stride + ox; + + // Top outline rows + for (int outline_y = -scale; outline_y < 0; outline_y++) { + uint32_t* row = data + (ptrdiff_t)outline_y * stride; + for (int x = -scale; x < w + scale; x++) { + row[x] = RGBA_BLACK; + } + } + + // Main text rows with side outlines + for (int y = 0; y < base_h; y++) { + for (int sy = 0; sy < scale; sy++) { + uint32_t* row = data + (ptrdiff_t)(y * scale + sy) * stride; + + // Left outline + for (int x = -scale; x < 0; x++) { + row[x] = RGBA_BLACK; + } + + // Character pixels + int col = 0; + for (int i = 0; i < len; i++) { + const char* c = bitmap_font[(unsigned char)text[i]]; + if (!c) + c = bitmap_font[' ']; + for (int x = 0; x < CHAR_WIDTH; x++) { + int j = y * CHAR_WIDTH + x; + uint32_t color = (c[j] == '1') ? RGBA_WHITE : RGBA_BLACK; + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = color; + } + col++; + } + // Letter spacing + for (int s = 0; s < LETTERSPACING; s++) { + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = RGBA_BLACK; + } + col++; + } + } + + // Right outline + for (int x = 0; x < scale; x++) { + row[w + x] = RGBA_BLACK; + } + } + } + + // Bottom outline rows + for (int outline_y = 0; outline_y < scale; outline_y++) { + uint32_t* row = data + (ptrdiff_t)(h + outline_y) * stride; + for (int x = -scale; x < w + scale; x++) { + row[x] = RGBA_BLACK; + } + } +} + +/** + * Render bitmap text to an RGB565 buffer with DP-based scaling. + * + * Renders text at a scale factor determined by display points, ensuring + * consistent visual size across different screen resolutions. Each character + * is upscaled by repeating pixels. + * + * @param text Text to render + * @param ox X position in pixels (negative = right-align from edge) + * @param oy Y position in pixels (negative = bottom-align from edge) + * @param data RGB565 pixel buffer + * @param stride Buffer width in pixels (not bytes) + * @param width Total buffer width in pixels + * @param height Total buffer height in pixels + * @param scale Scale factor (e.g., 2 = double size) + */ +static void blitBitmapTextScaled(char* text, int ox, int oy, uint16_t* data, int stride, int width, + int height, int scale) { + if (scale < 1) + scale = 1; + + int len = strlen(text); + int base_w = ((CHAR_WIDTH + LETTERSPACING) * len) - 1; + int base_h = CHAR_HEIGHT; + int w = base_w * scale; + int h = base_h * scale; + + // Handle negative offsets (right/bottom alignment) + if (ox < 0) + ox = width - w + ox; + if (oy < 0) + oy = height - h + oy; + + // Bounds check - need scale px margin for outline + if (ox < scale || oy < scale || ox + w + scale > width || oy + h + scale > height) + return; + + // Draw black outline (scale pixels around text) + data += oy * stride + ox; + + // Top outline rows + for (int outline_y = -scale; outline_y < 0; outline_y++) { + uint16_t* row = data + (ptrdiff_t)outline_y * stride; + memset(row - scale, 0, (size_t)(w + 2 * scale) * 2); + } + + // Main text rows with side outlines + for (int y = 0; y < base_h; y++) { + for (int sy = 0; sy < scale; sy++) { + uint16_t* row = data + (ptrdiff_t)(y * scale + sy) * stride; + + // Left outline + memset(row - scale, 0, (size_t)scale * 2); + + // Character pixels + int col = 0; + for (int i = 0; i < len; i++) { + const char* c = bitmap_font[(unsigned char)text[i]]; + if (!c) + c = bitmap_font[' ']; + for (int x = 0; x < CHAR_WIDTH; x++) { + int j = y * CHAR_WIDTH + x; + uint16_t color = (c[j] == '1') ? 0xffff : 0x0000; + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = color; + } + col++; + } + // Letter spacing + for (int s = 0; s < LETTERSPACING; s++) { + for (int sx = 0; sx < scale; sx++) { + row[col * scale + sx] = 0x0000; + } + col++; + } + } + + // Right outline + memset(row + w, 0, (size_t)scale * 2); + } + } + + // Bottom outline rows + for (int outline_y = 0; outline_y < scale; outline_y++) { + uint16_t* row = data + (ptrdiff_t)(h + outline_y) * stride; + memset(row - scale, 0, (size_t)(w + 2 * scale) * 2); + } +} + /////////////////////////////////////// // Performance Counters (needed by HW HUD before video processing section) /////////////////////////////////////// @@ -3687,75 +3893,59 @@ static double use_double = 0; // System CPU usage percentage static uint32_t sec_start = 0; /////////////////////////////////////// -// HW Debug HUD +// Shared Debug HUD Logic /////////////////////////////////////// -// HUD buffer for HW rendering (allocated once, reused) -static uint32_t* hw_hud_buffer = NULL; -static int hw_hud_width = 0; -static int hw_hud_height = 0; +/** + * Debug text strings for HUD display. + * Generated once per frame, used by both SW and HW rendering paths. + */ +typedef struct DebugHUDText { + char top_left[128]; // FPS and CPU % + char top_right[128]; // Source/output resolution + char bottom_left[128]; // CPU mode and buffer fill + char bottom_right[128]; // Output/source resolution +} DebugHUDText; /** - * Render debug HUD overlay for hardware-rendered frames. + * Generate debug HUD text strings. * - * Creates an RGBA surface with the same debug info as the software path - * (FPS, CPU usage, resolution, etc.) and passes it to the HW render module - * for compositing over the game frame. + * Consolidates all debug metric formatting logic used by both software + * and hardware rendering paths. Samples audio buffer fill every 15 frames. * + * @param text Output structure for formatted debug strings * @param src_w Source (game) width in pixels * @param src_h Source (game) height in pixels * @param screen_w Screen width in pixels * @param screen_h Screen height in pixels */ -static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { - // Allocate or resize HUD buffer if needed - if (!hw_hud_buffer || hw_hud_width != screen_w || hw_hud_height != screen_h) { - free(hw_hud_buffer); - hw_hud_buffer = malloc((size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); - if (!hw_hud_buffer) { - LOG_error("Failed to allocate HW HUD buffer"); - return; - } - hw_hud_width = screen_w; - hw_hud_height = screen_h; - } - - // Clear to fully transparent - memset(hw_hud_buffer, 0, (size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); - - int x = 2; - int y = 2; - char debug_text[128]; - - // Calculate scale factor for HW rendering (approximate) - int scale = 1; - if (src_w > 0 && src_h > 0) { - int scale_x = screen_w / src_w; - int scale_y = screen_h / src_h; - scale = (scale_x < scale_y) ? scale_x : scale_y; - if (scale < 1) - scale = 1; - } - - // Get buffer fill (sampled every 15 frames for readability) +static void generateDebugHUDText(DebugHUDText* text, int src_w, int src_h, int screen_w, + int screen_h) { + // Get buffer fill and rate adjustment (sampled every 15 frames for readability) static unsigned fill_display = 0; + static float rate_adj_display = 1.0f; static int sample_count = 0; if (++sample_count >= 15) { sample_count = 0; - fill_display = SND_getBufferOccupancy(); + SND_Snapshot snap = SND_getSnapshot(); + fill_display = snap.fill_pct; + rate_adj_display = snap.rate_adjust; } - // Top-left: FPS and system CPU % + // Top-left: FPS, sync mode, and rate control adjustment + // Modes: AC = audio clock, VS = vsync direct (fps≈hz), FP = frame paced (fps≠hz) + // Rate adjustment shows audio stretch: >1.0 = running fast, <1.0 = running slow + float rate_pct = (rate_adj_display - 1.0f) * 100.0f; #ifdef SYNC_MODE_AUDIOCLOCK - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%% AC", fps_double, (int)use_double); + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f AC", fps_double); #else - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%%", fps_double, (int)use_double); + const char* sync_mode = FramePacer_isDirectMode(&frame_pacer) ? "VS" : "FP"; + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f %s %+.1f%%", fps_double, sync_mode, + rate_pct); #endif - blitBitmapTextRGBA(debug_text, x, y, hw_hud_buffer, screen_w, screen_w, screen_h); - // Top-right: Source resolution and scale factor - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i %ix", src_w, src_h, scale); - blitBitmapTextRGBA(debug_text, -x, y, hw_hud_buffer, screen_w, screen_w, screen_h); + // Top-right: Source resolution + (void)snprintf(text->top_right, sizeof(text->top_right), "%ix%i", src_w, src_h); // Bottom-left: CPU info + buffer fill if (overclock == 3) { @@ -3782,31 +3972,120 @@ static void renderHWDebugHUD(int src_w, int src_h, int screen_w, int screen_h) { // Topology mode: show state/max and performance % int perf_pct = CPU_getPerformancePercent(&auto_cpu_state); int max_state = auto_cpu_state.topology.state_count - 1; - (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", - current_state, max_state, perf_pct, util, fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), + "T%i/%i %i%% u:%u%% b:%u%%", current_state, max_state, perf_pct, util, + fill_display); } else if (auto_cpu_state.use_granular && current_idx >= 0 && current_idx < auto_cpu_state.freq_count) { // Granular mode: show frequency in MHz int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; - (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, - fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "%i u:%u%% b:%u%%", + freq_mhz, util, fill_display); } else { // Fallback mode: show level - (void)snprintf(debug_text, sizeof(debug_text), "L%i u:%u%% b:%u%%", level, util, - fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "L%i u:%u%% b:%u%%", level, + util, fill_display); } } else { // Manual mode: show level and buffer fill - (void)snprintf(debug_text, sizeof(debug_text), "L%i b:%u%%", overclock, fill_display); + (void)snprintf(text->bottom_left, sizeof(text->bottom_left), "L%i b:%u%%", overclock, + fill_display); } - blitBitmapTextRGBA(debug_text, x, -y, hw_hud_buffer, screen_w, screen_w, screen_h); // Bottom-right: Output resolution - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i", screen_w, screen_h); - blitBitmapTextRGBA(debug_text, -x, -y, hw_hud_buffer, screen_w, screen_w, screen_h); + (void)snprintf(text->bottom_right, sizeof(text->bottom_right), "%ix%i", screen_w, screen_h); +} + +/////////////////////////////////////// +// HW Debug HUD +/////////////////////////////////////// + +// HUD buffer for HW rendering (allocated once, reused) +static uint32_t* hw_hud_buffer = NULL; +static int hw_hud_width = 0; +static int hw_hud_height = 0; + +/** + * Build debug HUD into RGBA buffer for GL compositing. + * + * Allocates/resizes buffer as needed and renders debug text. + * Does NOT call GLVideo_renderHUD - caller handles compositing. + * + * @param src_w Source (game) width in pixels + * @param src_h Source (game) height in pixels + * @param screen_w Screen width in pixels + * @param screen_h Screen height in pixels + * @return RGBA8888 buffer or NULL on allocation failure + */ +static uint32_t* buildDebugHUDBuffer(int src_w, int src_h, int screen_w, int screen_h) { + // Allocate or resize HUD buffer if needed + if (!hw_hud_buffer || hw_hud_width != screen_w || hw_hud_height != screen_h) { + free(hw_hud_buffer); + hw_hud_buffer = malloc((size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); + if (!hw_hud_buffer) { + LOG_error("Failed to allocate HW HUD buffer"); + hw_hud_width = 0; + hw_hud_height = 0; + return NULL; + } + hw_hud_width = screen_w; + hw_hud_height = screen_h; + } + + // Clear to fully transparent + memset(hw_hud_buffer, 0, (size_t)screen_w * (size_t)screen_h * sizeof(uint32_t)); + + // Generate debug text using shared logic + DebugHUDText text; + generateDebugHUDText(&text, src_w, src_h, screen_w, screen_h); + + // Calculate text scale based on screen height for consistent proportions + // Using screen_h/180 gives ~3-4% of screen height across devices: + // - 480px: 2x (16px = 3.3%) + // - 560px: 3x (24px = 4.3%) + // - 720px: 4x (32px = 4.4%) + // This avoids integer quantization issues with DP-based calculation + int text_scale = screen_h / 180; + if (text_scale < 1) + text_scale = 1; + if (text_scale > 6) + text_scale = 6; + + // Debug: log HUD rendering parameters (GL path, once) + static int logged_gl = 0; + if (!logged_gl) { + LOG_info("Debug HUD GL: buffer=%dx%d, text_scale=%dx (%dpx, %.1f%% of screen)\n", screen_w, + screen_h, text_scale, text_scale * CHAR_HEIGHT, + 100.0f * text_scale * CHAR_HEIGHT / screen_h); + logged_gl = 1; + } + + // Offset from screen edges (proportional to text size) + int margin = text_scale * 2; + + // Render all four corners + blitBitmapTextRGBAScaled(text.top_left, margin, margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.top_right, -margin, margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.bottom_left, margin, -margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + blitBitmapTextRGBAScaled(text.bottom_right, -margin, -margin, hw_hud_buffer, screen_w, screen_w, + screen_h, text_scale); + + return hw_hud_buffer; +} - // Pass HUD to HW renderer for compositing - GLVideo_renderHUD(hw_hud_buffer, screen_w, screen_h, screen_w, screen_h); +/** + * Get debug HUD buffer for GL compositing (PLAT hook implementation). + * + * Called by SDL2_present() GLES path. Returns RGBA buffer to be + * composited over the game frame via GLVideo_renderHUD(). + */ +uint32_t* PLAT_getDebugHUDBuffer(int src_w, int src_h, int screen_w, int screen_h) { + if (!show_debug) + return NULL; + return buildDebugHUDBuffer(src_w, src_h, screen_w, screen_h); } /** @@ -3819,6 +4098,66 @@ static void cleanupHWDebugHUD(void) { hw_hud_height = 0; } +/////////////////////////////////////// +// SW Debug HUD +/////////////////////////////////////// + +/** + * Render debug HUD to an RGB565 surface (PLAT hook implementation). + * + * Called by PLAT_present() implementations before buffer flip. + * Uses screen-proportional scaling for consistent sizing across platforms. + * + * @param surface SDL surface to render to (must be RGB565 format) + */ +void PLAT_renderDebugHUD(SDL_Surface* surface) { + if (!show_debug || !surface || !surface->pixels) + return; + + int screen_w = surface->w; + int screen_h = surface->h; + int pitch_in_pixels = surface->pitch / sizeof(uint16_t); + uint16_t* pixels = (uint16_t*)surface->pixels; + + // Generate debug text using shared logic + DebugHUDText text; + generateDebugHUDText(&text, renderer.src_w, renderer.src_h, screen_w, screen_h); + + // Calculate text scale based on screen height for consistent proportions + // Using screen_h/180 gives ~3-4% of screen height across devices: + // - 480px: 2x (16px = 3.3%) + // - 560px: 3x (24px = 4.3%) + // - 720px: 4x (32px = 4.4%) + // This avoids integer quantization issues with DP-based calculation + int text_scale = screen_h / 180; + if (text_scale < 1) + text_scale = 1; + if (text_scale > 6) + text_scale = 6; + + // Debug: log HUD rendering parameters (SW path, once) + static int logged_sw = 0; + if (!logged_sw) { + LOG_info("Debug HUD SW: surface=%dx%d, text_scale=%dx (%dpx, %.1f%% of screen)\n", screen_w, + screen_h, text_scale, text_scale * CHAR_HEIGHT, + 100.0f * text_scale * CHAR_HEIGHT / screen_h); + logged_sw = 1; + } + + // Offset from screen edges (proportional to text size) + int margin = text_scale * 2; + + // Render all four corners + blitBitmapTextScaled(text.top_left, margin, margin, pixels, pitch_in_pixels, screen_w, screen_h, + text_scale); + blitBitmapTextScaled(text.top_right, -margin, margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); + blitBitmapTextScaled(text.bottom_left, margin, -margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); + blitBitmapTextScaled(text.bottom_right, -margin, -margin, pixels, pitch_in_pixels, screen_w, + screen_h, text_scale); +} + /////////////////////////////////////// // Video Processing /////////////////////////////////////// @@ -4017,117 +4356,6 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign } renderer.src = rotated_data; - - // debug - render after pixel conversion so we write to RGB565 buffer - if (show_debug) { - int x = 2 + renderer.src_x; - int y = 2 + renderer.src_y; - char debug_text[128]; - int scale = renderer.scale; - if (scale == -1) - scale = 1; // nearest neighbor flag - - // Debug text rendering needs correct buffer dimensions and pitch. - // blitBitmapText expects pitch in pixels (uint16_t), not bytes. - // - // After 90°/270° rotation, the buffer dimensions are swapped (width becomes height - // and vice versa) because the image has been rotated. We detect this by checking if - // rotated_data != frame_data (indicating rotation was actually applied). - // - // blitBitmapText needs the post-rotation dimensions to correctly bounds-check text - // rendering, and the rotation buffer's pitch instead of the original pitch. - int pitch_in_pixels; - int debug_width = width; - int debug_height = height; - - if (rotated_data != frame_data) { - // Use rotation buffer pitch when rotation was applied - pitch_in_pixels = PlayerRotation_getBuffer()->pitch / sizeof(uint16_t); - if (video_state.rotation == ROTATION_90 || video_state.rotation == ROTATION_270) { - // Swap dimensions for 90°/270° rotations - debug_width = height; - debug_height = width; - } - } else { - // Use original pitch when rotation was skipped - pitch_in_pixels = rgb565_pitch / sizeof(uint16_t); - } - - // Get buffer fill (sampled every 15 frames for readability) - static unsigned fill_display = 0; - static int sample_count = 0; - if (++sample_count >= 15) { - sample_count = 0; - fill_display = SND_getBufferOccupancy(); - } - - // Top-left: FPS and system CPU % -#ifdef SYNC_MODE_AUDIOCLOCK - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%% AC", fps_double, - (int)use_double); -#else - (void)snprintf(debug_text, sizeof(debug_text), "%.0f FPS %i%%", fps_double, - (int)use_double); -#endif - blitBitmapText(debug_text, x, y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Top-right: Source resolution and scale factor - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i %ix", renderer.src_w, renderer.src_h, - scale); - blitBitmapText(debug_text, -x, y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Bottom-left: CPU info + buffer fill (always), plus utilization when auto - if (overclock == 3) { - // Auto CPU mode: show mode-specific info, utilization, and buffer fill - pthread_mutex_lock(&auto_cpu_mutex); - int current_idx = auto_cpu_state.current_index; - int current_state = auto_cpu_state.current_state; - int level = auto_cpu_state.current_level; - pthread_mutex_unlock(&auto_cpu_mutex); - - // Calculate current utilization from most recent frame times - unsigned util = 0; - int samples = (auto_cpu_state.frame_time_index < auto_cpu_config.window_frames) - ? auto_cpu_state.frame_time_index - : auto_cpu_config.window_frames; - if (samples >= 5 && auto_cpu_state.frame_budget_us > 0) { - uint64_t p90 = percentileUint64(auto_cpu_state.frame_times, samples, 0.90f); - util = (unsigned)((p90 * 100) / auto_cpu_state.frame_budget_us); - if (util > 200) - util = 200; - } - - if (auto_cpu_state.use_topology) { - // Topology mode: show state/max and performance % - int perf_pct = CPU_getPerformancePercent(&auto_cpu_state); - int max_state = auto_cpu_state.topology.state_count - 1; - (void)snprintf(debug_text, sizeof(debug_text), "T%i/%i %i%% u:%u%% b:%u%%", - current_state, max_state, perf_pct, util, fill_display); - } else if (auto_cpu_state.use_granular && current_idx >= 0 && - current_idx < auto_cpu_state.freq_count) { - // Granular mode: show frequency in MHz (e.g., "1200" for 1200 MHz) - int freq_mhz = auto_cpu_state.frequencies[current_idx] / 1000; - (void)snprintf(debug_text, sizeof(debug_text), "%i u:%u%% b:%u%%", freq_mhz, util, - fill_display); - } else { - // Fallback mode: show level - (void)snprintf(debug_text, sizeof(debug_text), "L%i u:%u%% b:%u%%", level, util, - fill_display); - } - } else { - // Manual mode: show level and buffer fill (overclock 0/1/2 maps to L0/L1/L2) - (void)snprintf(debug_text, sizeof(debug_text), "L%i b:%u%%", overclock, fill_display); - } - blitBitmapText(debug_text, x, -y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - - // Bottom-right: Output resolution - (void)snprintf(debug_text, sizeof(debug_text), "%ix%i", renderer.dst_w, renderer.dst_h); - blitBitmapText(debug_text, -x, -y, (uint16_t*)renderer.src, pitch_in_pixels, debug_width, - debug_height); - } renderer.dst = screen->pixels; // LOG_info("video_refresh_callback: %ix%i@%i %ix%i@%i",width,height,pitch,screen->w,screen->h,screen->pitch); @@ -4162,8 +4390,10 @@ void video_refresh_callback(const void* data, unsigned width, unsigned height, s core.aspect_ratio, renderer.visual_scale); // Render debug HUD overlay if enabled - if (show_debug) { - renderHWDebugHUD((int)width, (int)height, DEVICE_WIDTH, DEVICE_HEIGHT); + uint32_t* hud = + PLAT_getDebugHUDBuffer((int)width, (int)height, DEVICE_WIDTH, DEVICE_HEIGHT); + if (hud) { + GLVideo_renderHUD(hud, DEVICE_WIDTH, DEVICE_HEIGHT, DEVICE_WIDTH, DEVICE_HEIGHT); } // Swap buffers to display the frame diff --git a/workspace/miyoomini/platform/platform.c b/workspace/miyoomini/platform/platform.c index 1367ece7..5c917488 100644 --- a/workspace/miyoomini/platform/platform.c +++ b/workspace/miyoomini/platform/platform.c @@ -700,6 +700,9 @@ void PLAT_present(GFX_Renderer* renderer) { GFX_BlitSurfaceExec(vid.effect, &src_rect, vid.video, &dst_rect, 0, 0, 0); } } + + // Render debug HUD overlay (game mode only) + PLAT_renderDebugHUD(vid.video); } else { // UI mode: blit screen to video if (!vid.direct) { From af378b0935fb3dcd2d08073faccb68143d367170 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Tue, 6 Jan 2026 21:33:03 -0800 Subject: [PATCH 07/11] Add cubic safety boost to audio rate control and improve CPU scaling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audio rate control improvements: - Add cubic boost to proportional term (1× at center, 4× at limits) - Prevents buffer hitting 0%/100% during integral learning - Only boosts P term, not integral (avoids over-correction) - Improves tg5040 stability: 40.5% mean fill, 704 warnings (down from 8000+) CPU scaling improvements: - Add panic grace period (60 frames after frequency change) - Ignore underruns during grace to prevent cascade panics - Add buffer threshold for reduce (min 40% to allow scaling down) - Add stability decay (reduce panic counts after stable operation) - Reduce panic step from 2 to 1 (less aggressive on panic) --- tests/unit/all/common/test_cpu.c | 608 +++++++++++++++++++++++++++++-- workspace/all/common/api.c | 19 +- workspace/all/common/api.h | 1 + workspace/all/common/cpu.c | 108 ++++-- workspace/all/common/cpu.h | 15 +- workspace/all/player/player.c | 151 +++++--- 6 files changed, 780 insertions(+), 122 deletions(-) diff --git a/tests/unit/all/common/test_cpu.c b/tests/unit/all/common/test_cpu.c index d64ac7b2..c4d01a9b 100644 --- a/tests/unit/all/common/test_cpu.c +++ b/tests/unit/all/common/test_cpu.c @@ -9,8 +9,11 @@ * - Granular vs fallback modes * - Frame timing percentile calculation * - Single-frequency/scaling-disabled scenarios (M17-like devices) + * - Grace period after frequency changes (prevents cascade panics) + * - Stability decay (earns back blocked frequencies over time) + * - Step-by-1 behavior for predictable scaling * - * 46 tests organized by functionality. + * 100 tests organized by functionality. */ #include "unity.h" @@ -96,6 +99,8 @@ void test_initConfig_sets_defaults(void) { TEST_ASSERT_LESS_OR_EQUAL(100, c.target_util); TEST_ASSERT_GREATER_THAN(0, c.max_step_down); TEST_ASSERT_GREATER_THAN(0, c.panic_step_up); + TEST_ASSERT_GREATER_THAN(0, c.min_buffer_for_reduce); // Must have a minimum buffer level + TEST_ASSERT_LESS_OR_EQUAL(100, c.min_buffer_for_reduce); } void test_initState_zeros_state(void) { @@ -454,7 +459,7 @@ void test_getModeName_null(void) { void test_update_skips_during_fast_forward(void) { CPUResult result; - CPUDecision decision = CPU_update(&state, &config, true, false, 0, &result); + CPUDecision decision = CPU_update(&state, &config, true, false, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); @@ -462,7 +467,7 @@ void test_update_skips_during_fast_forward(void) { void test_update_skips_during_menu(void) { CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, true, 0, &result); + CPUDecision decision = CPU_update(&state, &config, false, true, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); } @@ -471,7 +476,7 @@ void test_update_skips_during_grace_period(void) { config.startup_grace = 300; state.startup_frames = 100; // Not yet at grace period - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); TEST_ASSERT_EQUAL(101, state.startup_frames); // Incremented @@ -493,7 +498,7 @@ void test_update_skips_when_scaling_disabled(void) { } CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, result.decision); @@ -506,7 +511,7 @@ void test_update_skips_when_no_frequencies(void) { TEST_ASSERT_EQUAL(1, state.scaling_disabled); CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_SKIP, decision); } @@ -524,10 +529,10 @@ void test_update_panic_on_underrun_granular(void) { state.last_underrun = 0; CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 1, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); - TEST_ASSERT_EQUAL(3, state.target_index); // Boosted by panic_step_up=2 (1+2=3) + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted by panic_step_up=1 (1+1=2) TEST_ASSERT_EQUAL(8, state.panic_cooldown); } @@ -539,10 +544,10 @@ void test_update_panic_on_underrun_fallback(void) { state.last_underrun = 0; CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 1, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); - TEST_ASSERT_EQUAL(2, state.target_level); // Boosted to max + TEST_ASSERT_EQUAL(1, state.target_level); // Boosted by panic_step_up=1 (0+1=1) } void test_update_no_panic_when_at_max(void) { @@ -552,7 +557,7 @@ void test_update_no_panic_when_at_max(void) { state.target_index = 3; // Already at max state.last_underrun = 0; - CPUDecision decision = CPU_update(&state, &config, false, false, 1, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); // Should not panic, just update underrun tracking TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); @@ -569,7 +574,7 @@ void test_update_waits_for_full_window(void) { state.startup_frames = config.startup_grace; state.frame_count = 10; // Not yet at window_frames (30) - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(11, state.frame_count); // Incremented @@ -594,7 +599,7 @@ void test_update_boost_on_high_util_granular(void) { } CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_TRUE(state.target_index > 1); // Moved up @@ -616,7 +621,7 @@ void test_update_reduce_on_low_util_granular(void) { } CPUResult result; - CPUDecision decision = CPU_update(&state, &config, false, false, 0, &result); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, &result); TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_TRUE(state.target_index < 3); // Moved down @@ -636,7 +641,7 @@ void test_update_no_reduce_during_cooldown(void) { CPU_recordFrameTime(&state, 6667); // Low util } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); // Should NOT reduce due to cooldown TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); @@ -656,7 +661,7 @@ void test_update_boost_fallback_mode(void) { CPU_recordFrameTime(&state, 15000); } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_EQUAL(1, state.target_level); @@ -674,7 +679,7 @@ void test_update_reduce_fallback_mode(void) { CPU_recordFrameTime(&state, 6667); } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_EQUAL(1, state.target_level); @@ -695,13 +700,89 @@ void test_update_sweet_spot_resets_counters(void) { CPU_recordFrameTime(&state, 11667); // ~70% of 16667 } - CPU_update(&state, &config, false, false, 0, NULL); + CPU_update(&state, &config, false, false, 0, 100, NULL); // Counters should be reset TEST_ASSERT_EQUAL(0, state.high_util_windows); TEST_ASSERT_EQUAL(0, state.low_util_windows); } +void test_update_reduce_blocked_by_low_buffer(void) { + // Setup: granular mode at high frequency, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Add low utilization frame times (would normally trigger reduce) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Pass buffer_fill below threshold (default is 40) + unsigned low_buffer = config.min_buffer_for_reduce - 1; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, low_buffer, NULL); + + // Should NOT reduce because buffer is too low + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(3, state.target_index); // Still at max frequency + + // low_util_windows should have incremented but no reduce happened + TEST_ASSERT_EQUAL(config.reduce_windows, state.low_util_windows); +} + +void test_update_reduce_allowed_with_healthy_buffer(void) { + // Same setup as above + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Pass buffer_fill at threshold (default is 40) + unsigned healthy_buffer = config.min_buffer_for_reduce; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, healthy_buffer, NULL); + + // Should reduce because buffer is healthy + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Reduced from 3 to 2 +} + +void test_update_reduce_no_grace_period(void) { + // Setup: ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.current_index = 3; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // Low util + } + + // Healthy buffer so reduce should happen + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + + // Verify NO grace period was set (unlike boost which sets grace) + TEST_ASSERT_EQUAL(0, state.panic_grace); +} + /////////////////////////////// // Topology Tests /////////////////////////////// @@ -971,7 +1052,7 @@ void test_update_topology_boost_increments_state(void) { CPU_recordFrameTime(&state, 15000); // ~90% } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_BOOST, decision); TEST_ASSERT_EQUAL(3, state.target_state); @@ -993,8 +1074,58 @@ void test_update_topology_reduce_decrements_state(void) { CPU_recordFrameTime(&state, 6667); // ~40% } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); + + TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); + TEST_ASSERT_LESS_THAN(4, state.target_state); +} + +void test_update_topology_reduce_blocked_by_low_buffer(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // ~40% + } + + // Buffer below threshold + unsigned low_buffer = config.min_buffer_for_reduce - 1; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, low_buffer, NULL); + + // Should NOT reduce because buffer is too low + TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); + TEST_ASSERT_EQUAL(4, state.target_state); // Still at original state +} + +void test_update_topology_reduce_allowed_with_healthy_buffer(void) { + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + + state.startup_frames = config.startup_grace; + state.target_state = 4; + state.current_state = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization frames (<55%) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 6667); // ~40% + } + + // Buffer at threshold + unsigned healthy_buffer = config.min_buffer_for_reduce; + CPUDecision decision = CPU_update(&state, &config, false, false, 0, healthy_buffer, NULL); + // Should reduce because buffer is healthy TEST_ASSERT_EQUAL(CPU_DECISION_REDUCE, decision); TEST_ASSERT_LESS_THAN(4, state.target_state); } @@ -1009,7 +1140,7 @@ void test_update_topology_panic_jumps_states(void) { state.last_underrun = 0; // Underrun detected - CPUDecision decision = CPU_update(&state, &config, false, false, 1, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); TEST_ASSERT_GREATER_THAN(1, state.target_state); @@ -1031,7 +1162,7 @@ void test_update_topology_no_boost_at_max_state(void) { CPU_recordFrameTime(&state, 15000); } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(5, state.target_state); @@ -1053,12 +1184,412 @@ void test_update_topology_no_reduce_at_min_state(void) { CPU_recordFrameTime(&state, 6667); } - CPUDecision decision = CPU_update(&state, &config, false, false, 0, NULL); + CPUDecision decision = CPU_update(&state, &config, false, false, 0, 100, NULL); TEST_ASSERT_EQUAL(CPU_DECISION_NONE, decision); TEST_ASSERT_EQUAL(0, state.target_state); } +/////////////////////////////// +// Grace Period Tests +/////////////////////////////// + +void test_panic_grace_ignores_underruns(void) { + // Setup: granular mode with grace period active + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period active + + // Underrun occurs during grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should NOT panic - grace period protects + TEST_ASSERT_NOT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(1, state.target_index); // Unchanged +} + +void test_panic_grace_allows_panic_when_expired(void) { + // Setup: granular mode with grace period expired + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; // Grace period expired + + // Underrun occurs after grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should panic normally + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted by 1 +} + +void test_panic_sets_grace_period(void) { + // Setup: granular mode + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + + // Trigger panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace period should be set + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_panic_resets_stability_streak(void) { + // Setup: granular mode with stability streak + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + state.stability_streak = 5; // Had some stability + + // Trigger panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Stability streak should be reset + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +void test_panic_grace_decrements_each_update(void) { + // Setup: granular mode with grace period + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.panic_grace = 10; + + // Call update (no underrun, not completing a window) + state.frame_count = 0; + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Grace should decrement + TEST_ASSERT_EQUAL(9, state.panic_grace); +} + +void test_panic_grace_topology_mode(void) { + // Setup: topology mode with grace period active + setup_dual_cluster_topology(&state); + CPU_buildPerfStates(&state, &config); + state.startup_frames = config.startup_grace; + state.target_state = 1; + state.current_state = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period active + + // Underrun occurs during grace period + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should NOT panic + TEST_ASSERT_NOT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(1, state.target_state); // Unchanged +} + +void test_grace_underruns_tracked_during_grace(void) { + // Setup: granular mode with grace period active + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; + state.grace_underruns = 0; + + // Underrun occurs during grace period + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace underruns should be tracked + TEST_ASSERT_EQUAL(1, state.grace_underruns); +} + +void test_grace_period_override_on_max_underruns(void) { + // Setup: granular mode with grace period active but near max underruns + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 30; // Grace period still active + state.grace_underruns = CPU_PANIC_GRACE_MAX_UNDERRUNS - 1; // One more triggers override + + // Underrun occurs - should exceed max and trigger panic despite grace + CPUDecision decision = CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Should PANIC despite grace period (catastrophic failure override) + TEST_ASSERT_EQUAL(CPU_DECISION_PANIC, decision); + TEST_ASSERT_EQUAL(2, state.target_index); // Boosted +} + +void test_grace_underruns_reset_on_panic(void) { + // Setup: granular mode, trigger a panic + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; // No grace period + state.grace_underruns = 3; // Some accumulated + + // Underrun occurs - triggers panic + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Grace underruns should be reset (along with grace period being set) + TEST_ASSERT_EQUAL(0, state.grace_underruns); + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_stability_decay_does_not_affect_lower_frequencies(void) { + // Setup: stable at 800MHz (index 2), 400MHz (index 0) is blocked + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 2; // At 800MHz + + // Block 400MHz + state.panic_count[0] = CPU_PANIC_THRESHOLD; + state.stability_streak = CPU_STABILITY_DECAY_WINDOWS - 1; + state.frame_count = config.window_frames - 1; + + // Complete a stable window + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); + } + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // 400MHz should still be blocked (below current, not decayed) + TEST_ASSERT_EQUAL(CPU_PANIC_THRESHOLD, state.panic_count[0]); +} + +/////////////////////////////// +// Stability Decay Tests +/////////////////////////////// + +void test_stability_streak_increments_on_stable_window(void) { + // Setup: granular mode, complete a window without panic + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 3; + state.frame_count = config.window_frames - 1; + state.stability_streak = 0; + + // Add frame times for a complete window (low util, sweet spot) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); // ~60% - in sweet spot + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Stability streak should increment + TEST_ASSERT_EQUAL(1, state.stability_streak); +} + +void test_stability_decay_after_threshold_windows(void) { + // Setup: granular mode with panic counts at index 1 (600MHz) + // Stability at 600MHz should decay 600/800/1000 but NOT 400MHz + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz + state.frame_count = config.window_frames - 1; + state.stability_streak = CPU_STABILITY_DECAY_WINDOWS - 1; // One more for decay + + // Set panic counts: below, at, and above current index + state.panic_count[0] = 2; // Below current - should NOT decay + state.panic_count[1] = 2; // At current - should decay + state.panic_count[2] = 1; // Above current - should decay + state.panic_count[3] = 0; // Above current - stays 0 + + // Add frame times for stable window + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); // ~60% + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Only current index and above should decay + TEST_ASSERT_EQUAL(2, state.panic_count[0]); // Below - unchanged + TEST_ASSERT_EQUAL(1, state.panic_count[1]); // At current: 2 -> 1 + TEST_ASSERT_EQUAL(0, state.panic_count[2]); // Above: 1 -> 0 + TEST_ASSERT_EQUAL(0, state.panic_count[3]); // Above: stays 0 + // Stability streak should reset after decay + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +void test_stability_decay_unblocks_frequency(void) { + // Setup: frequency 1 (600MHz) is blocked, we're stable at that frequency + // Only being stable AT a frequency can unblock it (not being stable above it) + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz - same as blocked frequency + + // Block frequency 1 (panic_count at threshold) + state.panic_count[1] = CPU_PANIC_THRESHOLD; + + // Run enough stable windows to decay + state.frame_budget_us = 16667; + for (int w = 0; w < CPU_PANIC_THRESHOLD; w++) { + // Each iteration: reach decay threshold, then check + for (int s = 0; s < CPU_STABILITY_DECAY_WINDOWS; s++) { + state.frame_count = config.window_frames - 1; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 10000); + } + CPU_update(&state, &config, false, false, 0, 100, NULL); + } + } + + // After enough decays, frequency should be unblocked + TEST_ASSERT_LESS_THAN(CPU_PANIC_THRESHOLD, state.panic_count[1]); +} + +void test_no_stability_increment_during_panic(void) { + // Setup: a panic happens this frame + int freqs[] = {400000, 600000, 800000, 1000000}; + CPU_detectFrequencies(&state, &config, freqs, 4); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.last_underrun = 0; + state.panic_grace = 0; + state.stability_streak = 3; + + // Panic happens + CPU_update(&state, &config, false, false, 1, 100, NULL); + + // Stability streak should be reset, not incremented + TEST_ASSERT_EQUAL(0, state.stability_streak); +} + +/////////////////////////////// +// Step-by-1 Tests +/////////////////////////////// + +void test_panic_step_default_is_one(void) { + CPUConfig cfg; + CPU_initConfig(&cfg); + + TEST_ASSERT_EQUAL(1, cfg.panic_step_up); +} + +void test_granular_boost_steps_by_one(void) { + // Setup: granular mode, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 1; // At 600MHz + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // High utilization (would predict big jump with old algorithm) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 16000); // ~96% - would have jumped more before + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 + TEST_ASSERT_EQUAL(2, state.target_index); // 1 -> 2, not 1 -> 4 +} + +void test_granular_boost_extreme_util_still_steps_by_one(void) { + // Setup: granular mode at lowest freq, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 0; // At 400MHz (lowest) + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + + // Extreme utilization - 200% would predict 400*200/70 = 1142MHz (index 4) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 33334); // 200% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 even with extreme utilization + TEST_ASSERT_EQUAL(1, state.target_index); // 0 -> 1, NOT 0 -> 4 +} + +void test_granular_boost_sets_grace_period(void) { + // Setup: granular mode, ready to boost + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 1; + state.frame_count = config.window_frames - 1; + state.high_util_windows = config.boost_windows - 1; + state.panic_grace = 0; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 15000); // 90% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Boost should set grace period + TEST_ASSERT_EQUAL(CPU_PANIC_GRACE_FRAMES, state.panic_grace); +} + +void test_granular_reduce_no_grace_period(void) { + // Setup: granular mode, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 4; + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + state.panic_grace = 0; + + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 5000); // 30% utilization + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Reduce should NOT set grace period (if we underrun, frequency is too slow) + TEST_ASSERT_EQUAL(0, state.panic_grace); +} + +void test_granular_reduce_steps_by_one(void) { + // Setup: granular mode, ready to reduce + int freqs[] = {400000, 600000, 800000, 1000000, 1200000}; + CPU_detectFrequencies(&state, &config, freqs, 5); + state.startup_frames = config.startup_grace; + state.target_index = 4; // At 1200MHz + state.frame_count = config.window_frames - 1; + state.low_util_windows = config.reduce_windows - 1; + + // Low utilization (would predict big drop with old algorithm) + state.frame_budget_us = 16667; + for (int i = 0; i < 30; i++) { + CPU_recordFrameTime(&state, 3333); // ~20% - would have dropped more before + } + + CPU_update(&state, &config, false, false, 0, 100, NULL); + + // Should only step by 1 + TEST_ASSERT_EQUAL(3, state.target_index); // 4 -> 3, not 4 -> 0 +} + /////////////////////////////// // Test Runner /////////////////////////////// @@ -1149,6 +1680,9 @@ int main(void) { RUN_TEST(test_update_boost_fallback_mode); RUN_TEST(test_update_reduce_fallback_mode); RUN_TEST(test_update_sweet_spot_resets_counters); + RUN_TEST(test_update_reduce_blocked_by_low_buffer); + RUN_TEST(test_update_reduce_allowed_with_healthy_buffer); + RUN_TEST(test_update_reduce_no_grace_period); // Topology - initialization RUN_TEST(test_initTopology_zeros_topology); @@ -1183,9 +1717,37 @@ int main(void) { // Topology - update decisions RUN_TEST(test_update_topology_boost_increments_state); RUN_TEST(test_update_topology_reduce_decrements_state); + RUN_TEST(test_update_topology_reduce_blocked_by_low_buffer); + RUN_TEST(test_update_topology_reduce_allowed_with_healthy_buffer); RUN_TEST(test_update_topology_panic_jumps_states); RUN_TEST(test_update_topology_no_boost_at_max_state); RUN_TEST(test_update_topology_no_reduce_at_min_state); + // Grace period + RUN_TEST(test_panic_grace_ignores_underruns); + RUN_TEST(test_panic_grace_allows_panic_when_expired); + RUN_TEST(test_panic_sets_grace_period); + RUN_TEST(test_panic_resets_stability_streak); + RUN_TEST(test_panic_grace_decrements_each_update); + RUN_TEST(test_panic_grace_topology_mode); + RUN_TEST(test_grace_underruns_tracked_during_grace); + RUN_TEST(test_grace_period_override_on_max_underruns); + RUN_TEST(test_grace_underruns_reset_on_panic); + + // Stability decay + RUN_TEST(test_stability_streak_increments_on_stable_window); + RUN_TEST(test_stability_decay_after_threshold_windows); + RUN_TEST(test_stability_decay_unblocks_frequency); + RUN_TEST(test_no_stability_increment_during_panic); + RUN_TEST(test_stability_decay_does_not_affect_lower_frequencies); + + // Step-by-1 behavior + RUN_TEST(test_panic_step_default_is_one); + RUN_TEST(test_granular_boost_steps_by_one); + RUN_TEST(test_granular_boost_extreme_util_still_steps_by_one); + RUN_TEST(test_granular_boost_sets_grace_period); + RUN_TEST(test_granular_reduce_no_grace_period); + RUN_TEST(test_granular_reduce_steps_by_one); + return UNITY_END(); } diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 33814c7b..d09066aa 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -1723,6 +1723,7 @@ static struct SND_Context { float rate_integral; // PI integral term (accumulates from smoothed error) float error_avg; // Smoothed error for slow integral timescale float last_rate_adjust; // Last computed adjustment (for snapshot without side effects) + float last_rate_boost; // Last computed boost (for snapshot diagnostics) // SDL callback timing diagnostics uint64_t callback_count; // Total callbacks @@ -1901,9 +1902,22 @@ static float SND_calculateRateAdjust(void) { // Fast timescale (proportional): immediate response to buffer level changes float p_term = error * SND_RATE_CONTROL_D; + // Cubic safety boost: ~1.0× at center, 4× at limits + // Only boost proportional term - integral is a learned steady-state offset + // Prevents hitting 0% or 100% during integral learning or after CPU frequency resets + // More responsive in middle range than quartic, catches drift earlier + float distance = fill - 0.5f; + float normalized = distance * 2.0f; // -1 to +1 + float abs_norm = normalized < 0 ? -normalized : normalized; // 0 to 1 + float boost = 1.0f + 3.0f * (abs_norm * abs_norm * abs_norm); // 1.0 to 4.0 + float boosted_p = p_term * boost; + // Slow timescale (integral): persistent offset learned in SND_newFrame() - // Integral is updated once per frame, not here (avoids N updates for N audio batches) - float adjustment = p_term + snd.rate_integral; + // Integral is NOT boosted - it's a steady-state correction, not emergency response + float adjustment = boosted_p + snd.rate_integral; + + // Store for diagnostics + snd.last_rate_boost = boost; // Invert for our resampler convention (larger ratio = fewer outputs) snd.last_rate_adjust = 1.0f - adjustment; @@ -2213,6 +2227,7 @@ SND_Snapshot SND_getSnapshot(void) { snap.rate_adjust = snd.last_rate_adjust; snap.total_adjust = snd.last_rate_adjust; snap.rate_integral = snd.rate_integral; + snap.rate_boost = snd.last_rate_boost; snap.rate_control_d = SND_RATE_CONTROL_D; snap.rate_control_ki = SND_RATE_CONTROL_KI; snap.error_avg = snd.error_avg; diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index 8f85990d..4bfee1df 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -896,6 +896,7 @@ typedef struct { float rate_adjust; // Dynamic rate control adjustment (1.0 ± d) float total_adjust; // Same as rate_adjust (no separate corrections) float rate_integral; // PI controller integral term (drift correction) + float rate_boost; // Cubic safety boost multiplier (1.0 at center, up to 4.0 at limits) float rate_control_d; // Proportional gain float rate_control_ki; // Integral gain float error_avg; // Smoothed error (for debugging integral behavior) diff --git a/workspace/all/common/cpu.c b/workspace/all/common/cpu.c index e9683e85..0d428c5b 100644 --- a/workspace/all/common/cpu.c +++ b/workspace/all/common/cpu.c @@ -48,6 +48,7 @@ void CPU_initConfig(CPUConfig* config) { config->target_util = CPU_DEFAULT_TARGET_UTIL; config->max_step_down = CPU_DEFAULT_MAX_STEP_DOWN; config->panic_step_up = CPU_DEFAULT_PANIC_STEP_UP; + config->min_buffer_for_reduce = CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE; } void CPU_initState(CPUState* state) { @@ -228,7 +229,8 @@ const char* CPU_getModeName(const CPUState* state) { } CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, - unsigned current_underruns, CPUResult* result) { + unsigned current_underruns, unsigned buffer_fill_percent, + CPUResult* result) { // Initialize result if provided if (result) { result->decision = CPU_DECISION_NONE; @@ -260,6 +262,11 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa return CPU_DECISION_SKIP; } + // Decrement panic grace period (ignore underruns after frequency change) + if (state->panic_grace > 0) { + state->panic_grace--; + } + // Get current indices based on mode int current_idx = state->target_index; int current_level = state->target_level; @@ -281,8 +288,16 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa at_max = (current_level >= 2); } + // Track underruns during grace period + bool underrun_detected = (current_underruns > state->last_underrun); + if (underrun_detected && state->panic_grace > 0) { + state->grace_underruns++; + } + // Emergency: check for underruns (panic path) - if (current_underruns > state->last_underrun && !at_max) { + // Skip if in grace period UNLESS too many underruns (catastrophic failure) + bool grace_exceeded = (state->grace_underruns >= CPU_PANIC_GRACE_MAX_UNDERRUNS); + if (underrun_detected && !at_max && (state->panic_grace == 0 || grace_exceeded)) { // Underrun detected - boost by panic_step_up if (state->use_topology) { int new_state = current_state_idx + config->panic_step_up; @@ -315,7 +330,10 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa state->high_util_windows = 0; state->low_util_windows = 0; + state->stability_streak = 0; state->panic_cooldown = 8; // ~4 seconds before allowing reduction + state->panic_grace = CPU_PANIC_GRACE_FRAMES; // Ignore underruns while new freq settles + state->grace_underruns = 0; state->last_underrun = 0; // Reset after handling return CPU_DECISION_PANIC; @@ -395,9 +413,10 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa state->low_util_windows++; state->high_util_windows = 0; - // Only reduce if enough windows AND panic cooldown expired + // Only reduce if: enough windows, cooldown expired, buffer healthy bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && - (state->panic_cooldown == 0) && (current_state_idx > 0); + (state->panic_cooldown == 0) && (current_state_idx > 0) && + (buffer_fill_percent >= config->min_buffer_for_reduce); if (reduce_ok) { // Step down one state at a time @@ -421,7 +440,6 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa } } else if (state->use_granular) { // Granular mode: linear frequency scaling - int current_freq = state->frequencies[current_idx]; // Decrement panic cooldown if (state->panic_cooldown > 0) { @@ -434,19 +452,15 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa state->low_util_windows = 0; if (state->high_util_windows >= config->boost_windows && current_idx < max_idx) { - // Predict optimal frequency using linear scaling - int needed_freq = CPU_predictFrequency(current_freq, util, config->target_util); - int new_idx = - CPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); - - // Ensure we actually go higher - if (new_idx <= current_idx) - new_idx = current_idx + 1; + // Step up by 1 - simple and predictable + int new_idx = current_idx + 1; if (new_idx > max_idx) new_idx = max_idx; state->target_index = new_idx; state->high_util_windows = 0; + state->panic_grace = CPU_PANIC_GRACE_FRAMES; + state->grace_underruns = 0; decision = CPU_DECISION_BOOST; if (result) { @@ -459,34 +473,30 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa state->low_util_windows++; state->high_util_windows = 0; - // Only reduce if enough windows AND panic cooldown expired + // Only reduce if: enough windows, panic cooldown expired, buffer healthy bool reduce_ok = (state->low_util_windows >= config->reduce_windows) && - (state->panic_cooldown == 0) && (current_idx > 0); + (state->panic_cooldown == 0) && (current_idx > 0) && + (buffer_fill_percent >= config->min_buffer_for_reduce); if (reduce_ok) { - // Predict lower frequency - int needed_freq = CPU_predictFrequency(current_freq, util, config->target_util); - int new_idx = - CPU_findNearestIndex(state->frequencies, state->freq_count, needed_freq); - - // Ensure we actually go lower - if (new_idx >= current_idx) - new_idx = current_idx - 1; - if (new_idx < 0) - new_idx = 0; - - // Limit reduction to max_step_down - if (current_idx - new_idx > config->max_step_down) { - new_idx = current_idx - config->max_step_down; + // Step down by 1 - simple and predictable + int new_idx = current_idx - 1; + + // Skip blocked frequencies + while (new_idx >= 0 && state->panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { + new_idx--; } - state->target_index = new_idx; - state->low_util_windows = 0; - decision = CPU_DECISION_REDUCE; + if (new_idx >= 0) { + state->target_index = new_idx; + state->low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + decision = CPU_DECISION_REDUCE; - if (result) { - result->decision = CPU_DECISION_REDUCE; - result->new_index = new_idx; + if (result) { + result->decision = CPU_DECISION_REDUCE; + result->new_index = new_idx; + } } } } else { @@ -496,6 +506,12 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa } } else { // Fallback mode: 3-level scaling + + // Decrement panic cooldown + if (state->panic_cooldown > 0) { + state->panic_cooldown--; + } + if (util > config->util_high) { state->high_util_windows++; state->low_util_windows = 0; @@ -512,6 +528,8 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa int new_level = current_level + 1; state->target_level = new_level; state->high_util_windows = 0; + state->panic_grace = CPU_PANIC_GRACE_FRAMES; + state->grace_underruns = 0; decision = CPU_DECISION_BOOST; if (result) { @@ -520,11 +538,13 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa } } - // Reduce if sustained low utilization - if (state->low_util_windows >= config->reduce_windows && current_level > 0) { + // Reduce if sustained low utilization (and panic cooldown expired, buffer healthy) + if (state->low_util_windows >= config->reduce_windows && current_level > 0 && + state->panic_cooldown == 0 && buffer_fill_percent >= config->min_buffer_for_reduce) { int new_level = current_level - 1; state->target_level = new_level; state->low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow decision = CPU_DECISION_REDUCE; if (result) { @@ -534,6 +554,20 @@ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forwa } } + // Track stability for panic count decay + // If we reached here, no panic happened during this window + state->stability_streak++; + if (state->stability_streak >= CPU_STABILITY_DECAY_WINDOWS) { + // Earned stability - decay panic counts for current freq and above only + // Being stable at 600MHz proves 800/1000/1200 are fine too, but not 400MHz + for (int i = current_idx; i < state->freq_count; i++) { + if (state->panic_count[i] > 0) { + state->panic_count[i]--; + } + } + state->stability_streak = 0; + } + // Reset window counter state->frame_count = 0; diff --git a/workspace/all/common/cpu.h b/workspace/all/common/cpu.h index eb16298a..36f059d1 100644 --- a/workspace/all/common/cpu.h +++ b/workspace/all/common/cpu.h @@ -50,8 +50,12 @@ #define CPU_DEFAULT_MIN_FREQ_KHZ 0 // No minimum (panic failsafe handles problematic freqs) #define CPU_DEFAULT_TARGET_UTIL 70 // Target utilization after change #define CPU_DEFAULT_MAX_STEP_DOWN 1 // Max frequency steps when reducing -#define CPU_DEFAULT_PANIC_STEP_UP 2 // Frequency steps on panic (underrun) +#define CPU_DEFAULT_PANIC_STEP_UP 1 // Frequency steps on panic (underrun) #define CPU_PANIC_THRESHOLD 3 // Block frequency after this many panics +#define CPU_PANIC_GRACE_FRAMES 60 // Frames to ignore underruns after freq change (~1s at 60fps) +#define CPU_PANIC_GRACE_MAX_UNDERRUNS 5 // Max underruns during grace before panic anyway +#define CPU_STABILITY_DECAY_WINDOWS 8 // Stable windows before decaying panic counts (~4s) +#define CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE 40 // Min audio buffer % to allow reduce /** * Multi-cluster topology constants. @@ -158,6 +162,7 @@ typedef struct { unsigned int target_util; // Target utilization after frequency change int max_step_down; // Max frequency steps when reducing int panic_step_up; // Frequency steps on panic (underrun) + unsigned int min_buffer_for_reduce; // Min audio buffer % to allow reduce } CPUConfig; /** @@ -199,6 +204,11 @@ typedef struct { // Per-frequency panic tracking (failsafe for problematic frequencies) int panic_count[CPU_MAX_FREQUENCIES]; // Count of panics at each frequency + // Grace period and stability tracking + int panic_grace; // Frames remaining where underruns are ignored after freq change + int grace_underruns; // Underruns accumulated during grace period + int stability_streak; // Consecutive windows without panic (for decay) + // Multi-cluster topology support CPUTopology topology; // Detected CPU topology int target_state; // Target PerfState index (multi-cluster mode) @@ -289,11 +299,12 @@ void CPU_recordFrameTime(CPUState* state, uint64_t frame_time_us); * @param fast_forward True if fast-forwarding (skip scaling) * @param show_menu True if menu is showing (skip scaling) * @param current_underruns Current underrun count from audio + * @param buffer_fill_percent Current audio buffer fill (0-100) * @param result Optional output for detailed result info * @return Decision type (NONE, BOOST, REDUCE, PANIC, SKIP) */ CPUDecision CPU_update(CPUState* state, const CPUConfig* config, bool fast_forward, bool show_menu, - unsigned current_underruns, CPUResult* result); + unsigned current_underruns, unsigned buffer_fill_percent, CPUResult* result); /** * Calculates the recommended frequency for a target utilization. diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 3cc62b7e..302880f3 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -1171,6 +1171,9 @@ static void resetAutoCPUState(void) { auto_cpu_state.startup_frames = 0; auto_cpu_state.frame_time_index = 0; auto_cpu_state.panic_cooldown = 0; + auto_cpu_state.panic_grace = 0; + auto_cpu_state.grace_underruns = 0; + auto_cpu_state.stability_streak = 0; // Reset panic tracking (menu changes may allow lower frequencies to work) memset(auto_cpu_state.panic_count, 0, sizeof(auto_cpu_state.panic_count)); @@ -1314,7 +1317,13 @@ static void updateAutoCPU(void) { PWR_setThreadAffinity(pending_affinity); } + // Decrement panic grace period (ignore underruns after frequency change) + if (auto_cpu_state.panic_grace > 0) { + auto_cpu_state.panic_grace--; + } + // Emergency: check for actual underruns (panic path) + // Skip if in grace period - new frequency needs time to refill audio buffer unsigned underruns = SND_getUnderrunCount(); int max_idx = auto_cpu_state.freq_count - 1; int max_state = auto_cpu_state.topology.state_count - 1; @@ -1327,7 +1336,16 @@ static void updateAutoCPU(void) { at_max = (current_level >= 2); } - if (underruns > auto_cpu_state.last_underrun && !at_max) { + // Track underruns during grace period + bool underrun_detected = (underruns > auto_cpu_state.last_underrun); + if (underrun_detected && auto_cpu_state.panic_grace > 0) { + auto_cpu_state.grace_underruns++; + } + + // Override grace period if too many underruns (catastrophic failure) + bool grace_exceeded = (auto_cpu_state.grace_underruns >= CPU_PANIC_GRACE_MAX_UNDERRUNS); + + if (underrun_detected && !at_max && (auto_cpu_state.panic_grace == 0 || grace_exceeded)) { // Underrun detected - track panic and boost unsigned audio_fill = SND_getBufferOccupancy(); @@ -1374,8 +1392,12 @@ static void updateAutoCPU(void) { } auto_cpu_state.high_util_windows = 0; auto_cpu_state.low_util_windows = 0; + auto_cpu_state.stability_streak = 0; // Cooldown: wait 8 windows (~4 seconds) before allowing reduction auto_cpu_state.panic_cooldown = 8; + // Grace period: ignore underruns while new frequency refills audio buffer + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; SND_resetUnderrunCount(); auto_cpu_state.last_underrun = 0; return; @@ -1410,6 +1432,9 @@ static void updateAutoCPU(void) { util = 200; // Cap at 200% for sanity } + // Get buffer fill for reduce decisions + unsigned buffer_fill = SND_getBufferOccupancy(); + if (auto_cpu_state.use_topology) { // Topology mode: step through PerfStates one at a time // Unlike granular mode, we don't predict - just step conservatively @@ -1429,6 +1454,8 @@ static void updateAutoCPU(void) { int new_state = current_state + 1; auto_cpu_setTargetState(new_state); auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; LOG_debug("Auto CPU: BOOST state %d→%d (util=%u%%)\n", current_state, new_state, util); } @@ -1437,10 +1464,11 @@ static void updateAutoCPU(void) { auto_cpu_state.low_util_windows++; auto_cpu_state.high_util_windows = 0; - // Only reduce if: enough consecutive low windows AND panic cooldown expired + // Only reduce if: enough windows, cooldown expired, buffer healthy int reduce_ok = (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && - (auto_cpu_state.panic_cooldown == 0) && (current_state > 0); + (auto_cpu_state.panic_cooldown == 0) && (current_state > 0) && + (buffer_fill >= auto_cpu_config.min_buffer_for_reduce); if (reduce_ok) { // Step down by max_step_down (usually 1) @@ -1449,8 +1477,9 @@ static void updateAutoCPU(void) { new_state = 0; auto_cpu_setTargetState(new_state); auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, - new_state, util); + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%% buf=%u%%)\n", current_state, + new_state, util, buffer_fill); } } else { // In sweet spot - reset counters @@ -1463,9 +1492,10 @@ static void updateAutoCPU(void) { if (++debug_window_count_topo >= 4) { debug_window_count_topo = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% state=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_state, - max_state); + LOG_debug( + "Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% state=%d/%d\n", + snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, util, + current_state, max_state); } } else if (auto_cpu_state.use_granular) { // Granular mode: use linear performance scaling to find optimal frequency @@ -1486,21 +1516,12 @@ static void updateAutoCPU(void) { if (auto_cpu_state.high_util_windows >= auto_cpu_config.boost_windows && current_idx < max_idx) { - // Find next frequency that would bring util to target (sweet spot) - // Using: new_util = util * (current_freq / new_freq) - // So: new_freq = current_freq * util / target_util - // No step limit - linear scaling prediction is accurate, boost aggressively - int needed_freq = current_freq * (int)util / auto_cpu_config.target_util; - int new_idx = auto_cpu_findNearestIndex(needed_freq); - - // Ensure we actually go higher - if (new_idx <= current_idx) - new_idx = current_idx + 1; - if (new_idx > max_idx) - new_idx = max_idx; - + // Step up by 1 - simple and predictable + int new_idx = current_idx + 1; auto_cpu_setTargetIndex(new_idx); auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; LOG_debug("Auto CPU: BOOST %d→%d kHz (util=%u%%)\n", current_freq, auto_cpu_state.frequencies[new_idx], util); } @@ -1509,49 +1530,32 @@ static void updateAutoCPU(void) { auto_cpu_state.low_util_windows++; auto_cpu_state.high_util_windows = 0; - // Only reduce if: enough consecutive low windows AND panic cooldown expired + // Only reduce if: enough windows, cooldown expired, buffer healthy int reduce_ok = (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && - (auto_cpu_state.panic_cooldown == 0) && (current_idx > 0); + (auto_cpu_state.panic_cooldown == 0) && (current_idx > 0) && + (buffer_fill >= auto_cpu_config.min_buffer_for_reduce); if (reduce_ok) { - // Find frequency that would bring util up to target (sweet spot) - // new_util = util * (current_freq / new_freq) - // new_freq = current_freq * util / target_util - int needed_freq = current_freq * (int)util / auto_cpu_config.target_util; - int new_idx = auto_cpu_findNearestIndex(needed_freq); - - // Ensure we actually go lower - if (new_idx >= current_idx) - new_idx = current_idx - 1; - if (new_idx < 0) - new_idx = 0; - - // Limit reduction to max_step_down indices at once - if (current_idx - new_idx > auto_cpu_config.max_step_down) { - new_idx = current_idx - auto_cpu_config.max_step_down; - } + // Step down by 1 - simple and predictable + int new_idx = current_idx - 1; - // Skip blocked frequencies - find first unblocked one above new_idx. - // Frequencies get blocked when they cause repeated panics. + // Skip blocked frequencies while (new_idx >= 0 && auto_cpu_state.panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { - new_idx++; - if (new_idx >= current_idx) { - // All lower frequencies blocked - stay at current - break; - } + new_idx--; } // Don't reduce if no safe frequency found - if (new_idx >= current_idx) { + if (new_idx < 0) { auto_cpu_state.low_util_windows = 0; } else { int new_freq = auto_cpu_state.frequencies[new_idx]; auto_cpu_setTargetIndex(new_idx); auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE %d→%d kHz (util=%u%%)\n", current_freq, - new_freq, util); + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE %d→%d kHz (util=%u%% buf=%u%%)\n", current_freq, + new_freq, util, buffer_fill); } } } else { @@ -1565,12 +1569,19 @@ static void updateAutoCPU(void) { if (++debug_window_count >= 4) { debug_window_count = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% freq=%dkHz idx=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_freq, - current_idx, max_idx); + LOG_debug("Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% freq=%dkHz " + "idx=%d/%d\n", + snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, + util, current_freq, current_idx, max_idx); } } else { // Fallback mode: 3-level scaling (original algorithm) + + // Decrement panic cooldown each window + if (auto_cpu_state.panic_cooldown > 0) { + auto_cpu_state.panic_cooldown--; + } + if (util > auto_cpu_config.util_high) { auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; @@ -1587,9 +1598,9 @@ static void updateAutoCPU(void) { if (++debug_window_count_fallback >= 4) { debug_window_count_fallback = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% level=%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, - current_level); + LOG_debug("Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% level=%d\n", + snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, + util, current_level); } // Boost if sustained high utilization @@ -1598,17 +1609,41 @@ static void updateAutoCPU(void) { int new_level = current_level + 1; auto_cpu_setTargetLevel(new_level); auto_cpu_state.high_util_windows = 0; + auto_cpu_state.panic_grace = CPU_PANIC_GRACE_FRAMES; + auto_cpu_state.grace_underruns = 0; LOG_debug("Auto CPU: BOOST level %d (util=%u%%)\n", new_level, util); } - // Reduce if sustained low utilization + // Reduce if sustained low utilization, buffer healthy (respects panic cooldown) if (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows && - current_level > 0) { + auto_cpu_state.panic_cooldown == 0 && current_level > 0 && + buffer_fill >= auto_cpu_config.min_buffer_for_reduce) { int new_level = current_level - 1; auto_cpu_setTargetLevel(new_level); auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE level %d (util=%u%% buf=%u%%)\n", new_level, util, + buffer_fill); + } + } + + // Track stability for panic count decay + // If we reached here, no panic happened during this window + auto_cpu_state.stability_streak++; + if (auto_cpu_state.stability_streak >= CPU_STABILITY_DECAY_WINDOWS) { + // Earned stability - decay panic counts for current freq and above only + // Being stable at 600MHz proves 800/1000/1200 are fine too, but not 400MHz + int decayed = 0; + for (int i = current_idx; i < auto_cpu_state.freq_count; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + if (decayed > 0) { + LOG_debug("Auto CPU: stability earned, decayed %d panic counts\n", decayed); } + auto_cpu_state.stability_streak = 0; } // Reset window counter (frame times stay in ring buffer) From 1309a35307359504519b8141dcd36601b600b924 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Tue, 6 Jan 2026 21:33:25 -0800 Subject: [PATCH 08/11] Add vsync measurement to frame pacer for accurate refresh rate detection. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frame pacer improvements: - Measure actual vsync timing by recording SDL_RenderPresent() intervals - Use exponential moving average (α=0.01) for stable Hz measurement - Stable after 120 samples (~2 seconds of gameplay) - Continuously track drift, re-check every 300 frames (~5 seconds) - Automatically reinitialize when measured Hz differs >0.1% from current - Reset Bresenham accumulator on Hz change to prevent glitches Tolerance and cleanup: - Reduce FRAME_PACER_TOLERANCE from 2% to 1% (matches RetroArch guidance) - Remove FORCE_FRAME_PACING from tg5040/tg5050 (testing hack) Results on tg5050: - Discovered actual refresh rate: 62-68Hz (SDL reports 60Hz) - Screen drifts dynamically: starts 65-68Hz, settles to 62-63Hz - 14% mismatch explains audio buffer issues - Frame pacing insufficient at this mismatch (needs audio-clock mode) Tests: - Added 6 tests for vsync measurement and drift tracking - All 1578 tests passing --- TODO.md | 202 +++++++++++++++++++++ tests/unit/all/player/test_frame_pacer.c | 160 +++++++++++++++- workspace/all/player/frame_pacer.c | 97 ++++++++++ workspace/all/player/frame_pacer.h | 64 ++++++- workspace/all/player/player_loop_vsync.inc | 3 + workspace/miyoomini/platform/platform.h | 3 +- workspace/my355/platform/platform.h | 3 +- workspace/tg5040/platform/platform.h | 2 +- workspace/tg5050/platform/platform.h | 2 +- workspace/trimuismart/platform/platform.h | 3 +- 10 files changed, 513 insertions(+), 26 deletions(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 00000000..5ea45247 --- /dev/null +++ b/TODO.md @@ -0,0 +1,202 @@ +# TODO: Outstanding Issues + +## Audio/Video Sync on tg5050 (Trimui Smart Pro) + +**Status:** Root cause identified, solution designed, implementation pending +**Priority:** High (severely degraded user experience) +**Platform:** tg5050 (tg5040 works fine with cubic boost) + +### Problem Summary + +tg5050 experiences severe audio buffer issues (2649 overruns + 103 underruns in single session) due to extreme display refresh rate mismatch that current vsync-driven timing cannot handle. + +### Root Cause: Dynamic Refresh Rate Mismatch + +**Discovered via vsync measurement (implemented 2026-01-06):** + +SDL reports: 60Hz (integer, via `SDL_GetCurrentDisplayMode()`) +**Actual measured behavior:** +- Starts at: 65-68Hz (14% faster than games!) +- Drifts to: 62-63Hz over 30 seconds +- Continues oscillating: ±1Hz throughout session + +**Example from GB.log:** +``` +[03:45:35] Measured: 68.510Hz (14.18% above SDL's 60Hz) +[03:45:38] Drift to: 63.860Hz (dropped 6.79%) +[03:45:43] Drift to: 62.991Hz (dropped 1.36%) +[03:46:23] Drift to: 61.526Hz (dropped 1.86%) +[03:46:28] Drift to: 62.873Hz (rose 2.19%) +``` + +The screen refresh rate is **both wrong and unstable**. + +### Why Current Approaches Fail + +**Frame pacing (Bresenham) fails:** +- At 14% mismatch (68Hz screen vs 60fps game), would skip 13% of frames +- Human perception threshold for stutter: 2-3% +- 13% frame skipping = very visible stuttering +- Dynamic drift (62→68Hz) requires constant re-adjustment +- Result: 2649 buffer overruns despite paced mode + +**Audio rate control fails:** +- PI controller tuned for 68Hz overshoots when screen drops to 62Hz +- Integral term chases a moving target +- Frame pacing + audio rate control fight each other (one skips frames, other adjusts pitch) +- Result: Oscillates between overrun (100% full) and underrun (0% empty) + +**Fundamental issue:** Vsync-driven timing assumes: +1. Display Hz ≈ game Hz (within 1-2%) ❌ tg5050: 14% +2. Display Hz is stable ❌ tg5050: drifts 6Hz +3. Audio pitch adjustment can compensate ❌ 14% is too much + +### Expert Analysis (Subject Matter Expert Agent, 2026-01-06) + +> **"The approach is fundamentally problematic for this hardware."** +> +> At 14% mismatch with dynamic drift, vsync-driven timing cannot work. The correct architecture is **audio-clock master timing** with vsync used only for tear-free presentation, not timing. + +**Key insight:** +- **Frame pacing (dropping frames):** Game *jumps* forward periodically → visible stutter +- **Audio-clock (duplicating frames):** Some frames *held slightly longer* → barely perceptible + +At 68Hz screen / 60fps game: +- Frame pacing: Skip every 8th frame → visible stutter +- Audio-clock: Show some frames twice → much less noticeable + +### Solution: Auto Sync Mode Selection + +Implement hybrid sync strategy based on measured Hz mismatch: + +```c +if (|measured_hz - game_fps| < 1%) → VSYNC_DIRECT // Current default +if (|measured_hz - game_fps| < 5%) → VSYNC_PACED // Current fallback +if (|measured_hz - game_fps| > 5%) → AUDIO_MASTER // NEW for tg5050 +``` + +**Audio-clock master mode:** +- Audio hardware is master clock (runs core at exact 60fps) +- Vsync only prevents tearing (not for timing) +- Frame duplication instead of frame skipping +- No audio rate control needed (buffer naturally stable) +- Already implemented: `workspace/all/player/player_loop_audioclock.inc` + +### Implementation Status + +**✅ Completed (2026-01-06):** +1. Vsync measurement system + - Measures actual Hz by timing `SDL_RenderPresent()` calls + - Uses exponential moving average (α=0.01) for stability + - Rejects outliers outside 50-120Hz range + - Stable after 120 samples (~2 seconds) + +2. Continuous drift tracking + - Re-checks every 300 frames (~5 seconds) + - Logs Hz changes >0.1% + - Resets Bresenham accumulator on Hz change (prevents glitches) + +3. Tighter tolerance + - Changed `FRAME_PACER_TOLERANCE` from 2% to 1% + - Based on RetroArch paper: audio-only rate control works up to ~0.5% + +4. Removed `FORCE_FRAME_PACING` + - Was testing hack, didn't solve root cause + - Removed from tg5040/tg5050 platform.h + +**📋 TODO:** +1. Add sync mode selection logic in frame pacer + - Detect when measured Hz > 5% from game fps + - Switch to audio-clock mode automatically + - Keep vsync measurement running for dynamic detection + +2. Disable audio rate control in audio-clock mode + - Buffer naturally stable when audio drives timing + - Rate control fights audio-clock (causes oscillation) + +3. Test on tg5050 + - Verify audio-clock eliminates buffer warnings + - Confirm frame duplication is imperceptible + - Measure actual improvement vs current state + +4. Consider per-platform override + - Add `SYNC_MODE_AUDIO_CLOCK` define for platforms that need it + - tg5050 might always need audio-clock regardless of game + +### Files Modified (2026-01-06) + +**Vsync measurement implementation:** +- `workspace/all/player/frame_pacer.h` - Added measurement state, functions, constants +- `workspace/all/player/frame_pacer.c` - Implemented measurement + drift tracking +- `workspace/all/player/player_loop_vsync.inc` - Calls `FramePacer_recordVsync()` +- `tests/unit/all/player/test_frame_pacer.c` - Added 6 tests for measurement + +**Cleanup:** +- `workspace/tg5040/platform/platform.h` - Removed FORCE_FRAME_PACING +- `workspace/tg5050/platform/platform.h` - Removed FORCE_FRAME_PACING +- `workspace/all/common/cpu.c` - Fixed unused variable lint error + +### Related Context + +**tg5040 status:** ✅ Working well with cubic boost +- Mean fill: 40.5% (healthy) +- Buffer warnings: 704 (acceptable) +- System stable + +**Why tg5040 works but tg5050 doesn't:** +- Both have refresh rate mismatch, but tg5050's is more extreme +- tg5050's dynamic drift is more pronounced +- tg5040 likely closer to 60Hz or more stable + +### References + +**RetroArch Dynamic Rate Control (Arntzen, 2012):** +- Recommends d = 0.2-0.5% for audio pitch adjustment +- States frame pacing "only useful when game frame rate is **close** to monitor frame rate" +- At >0.5% mismatch: "other methods should be employed" + +**Expert recommendation (SME agent a77771d):** +- Primary: Audio-clock master for |mismatch| > 5% +- Fallback: Hybrid sync mode selection +- Never: Frame pacing at >5% mismatch (causes perceptible stutter) + +### Test Logs Evidence + +**Before vsync measurement (old behavior):** +- System thinks 60Hz, uses direct vsync +- Result: 8000+ buffer overrun warnings + +**With vsync measurement + frame pacing (tested 2026-01-06):** +- Detects 68Hz, switches to paced mode +- Tracks drift: 68→63Hz over time +- Result: Still 2649 overruns + 103 underruns (frame pacing can't handle 14%) + +**Expected with audio-clock mode:** +- Audio drives timing at exact game fps +- Vsync just presents frames (duplicating when needed) +- Result: Zero buffer warnings (audio naturally stable) + +### Open Questions + +1. Does tg5040 also have dynamic refresh rate? (Not tested yet) +2. Should we keep vsync mode as default and auto-switch, or force audio-clock for some platforms? +3. Do other devices in the lineup have similar issues? +4. Does audio-clock mode affect input latency measurably? + +### Next Steps + +1. **Implement auto sync mode selection** - Priority: High + - Use measured Hz to pick VSYNC vs AUDIO_CLOCK + - Test threshold value (currently thinking 5%) + +2. **Test on tg5050** - Priority: High + - Verify it solves the buffer issue + - Measure subjective quality (frame duplication vs stutter) + +3. **Measure other devices** - Priority: Medium + - Run vsync measurement on all supported platforms + - Identify which need audio-clock mode + +4. **Document behavior** - Priority: Low + - Add platform notes about sync modes + - Explain why some devices use audio-clock diff --git a/tests/unit/all/player/test_frame_pacer.c b/tests/unit/all/player/test_frame_pacer.c index 7fbb5484..0ee007c3 100644 --- a/tests/unit/all/player/test_frame_pacer.c +++ b/tests/unit/all/player/test_frame_pacer.c @@ -12,12 +12,24 @@ #include "unity.h" #include "frame_pacer.h" +#include // Stub for PLAT_getDisplayHz - not tested here, just needed for linking double PLAT_getDisplayHz(void) { return 60.0; } +// Stub for getMicroseconds - returns incrementing time for vsync measurement tests +static uint64_t mock_time_us = 0; +uint64_t getMicroseconds(void) { + return mock_time_us; +} + +// Stub for LOG_info - just suppress output during tests +void LOG_info(const char* fmt, ...) { + (void)fmt; +} + // Q16.16 scale factor for test assertions #define Q16_SCALE 65536 @@ -31,6 +43,8 @@ static FramePacer pacer; void setUp(void) { // Fresh pacer for each test FramePacer_init(&pacer, 60.0, 60.0); + // Reset mock time to non-zero (recordVsync checks last_vsync_time > 0) + mock_time_us = 1000000; // Start at 1 second } void tearDown(void) { @@ -58,17 +72,17 @@ void test_init_5994fps_60hz_direct_mode(void) { TEST_ASSERT_TRUE(pacer.direct_mode); } -void test_init_60fps_61hz_direct_mode(void) { - // 60fps @ 61Hz = 1.6% diff → direct mode (within 2% tolerance) +void test_init_60fps_60_5hz_direct_mode(void) { + // 60fps @ 60.5Hz = 0.83% diff → direct mode (within 1% tolerance) // This is the kind of hardware variance audio rate control can handle - FramePacer_init(&pacer, 60.0, 61.0); + FramePacer_init(&pacer, 60.0, 60.5); TEST_ASSERT_TRUE(pacer.direct_mode); } -void test_init_60fps_63hz_paced_mode(void) { - // 60fps @ 63Hz = 4.8% diff → paced mode (outside 2% tolerance) - FramePacer_init(&pacer, 60.0, 63.0); +void test_init_60fps_61hz_paced_mode(void) { + // 60fps @ 61Hz = 1.6% diff → paced mode (outside 1% tolerance) + FramePacer_init(&pacer, 60.0, 61.0); TEST_ASSERT_FALSE(pacer.direct_mode); } @@ -338,6 +352,128 @@ void test_reset_preserves_settings(void) { TEST_ASSERT_FALSE(pacer.direct_mode); } +/////////////////////////////// +// Vsync Measurement Tests +/////////////////////////////// + +void test_vsync_measurement_not_stable_initially(void) { + FramePacer_init(&pacer, 60.0, 60.0); + + TEST_ASSERT_FALSE(FramePacer_isMeasurementStable(&pacer)); + // getMeasuredHz returns 0 when not stable + double hz = FramePacer_getMeasuredHz(&pacer); + TEST_ASSERT_TRUE(hz == 0.0); +} + +void test_vsync_measurement_accumulates_samples(void) { + FramePacer_init(&pacer, 60.0, 60.0); + + // First call just sets baseline, doesn't count as sample + FramePacer_recordVsync(&pacer); + + // Simulate 60Hz vsync (16667µs intervals) + for (int i = 0; i < 50; i++) { + mock_time_us += 16667; // ~60Hz + FramePacer_recordVsync(&pacer); + } + + // Should have samples but not stable yet (need 120) + TEST_ASSERT_FALSE(FramePacer_isMeasurementStable(&pacer)); + TEST_ASSERT_EQUAL(50, pacer.vsync_samples); +} + +// Helper to check if a double is within tolerance +static int within_tolerance(double actual, double expected, double tolerance) { + double diff = actual - expected; + if (diff < 0) diff = -diff; + return diff <= tolerance; +} + +void test_vsync_measurement_becomes_stable(void) { + FramePacer_init(&pacer, 60.0, 60.0); + + // First call sets baseline + FramePacer_recordVsync(&pacer); + + // Simulate 60Hz vsync (16667µs intervals) for warmup period + for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { + mock_time_us += 16667; + FramePacer_recordVsync(&pacer); + } + + TEST_ASSERT_TRUE(FramePacer_isMeasurementStable(&pacer)); + // Should be approximately 60Hz (within 0.5Hz) + double measured = FramePacer_getMeasuredHz(&pacer); + TEST_ASSERT_TRUE(within_tolerance(measured, 60.0, 0.5)); +} + +void test_vsync_measurement_detects_higher_hz(void) { + FramePacer_init(&pacer, 60.0, 60.0); + + // First call sets baseline + FramePacer_recordVsync(&pacer); + + // Simulate 60.05Hz vsync (16653µs intervals instead of 16667µs) + for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { + mock_time_us += 16653; // ~60.05Hz + FramePacer_recordVsync(&pacer); + } + + TEST_ASSERT_TRUE(FramePacer_isMeasurementStable(&pacer)); + double measured = FramePacer_getMeasuredHz(&pacer); + // Should be approximately 60.05Hz (within 0.1Hz) + TEST_ASSERT_TRUE(within_tolerance(measured, 60.05, 0.1)); +} + +void test_vsync_measurement_rejects_outliers(void) { + FramePacer_init(&pacer, 60.0, 60.0); + + // First call sets baseline + FramePacer_recordVsync(&pacer); + + // Simulate normal 60Hz vsync + for (int i = 0; i < 50; i++) { + mock_time_us += 16667; + FramePacer_recordVsync(&pacer); + } + int samples_before = pacer.vsync_samples; + + // Simulate a frame drop (long interval = low Hz, rejected) + mock_time_us += 50000; // ~20Hz, should be rejected + FramePacer_recordVsync(&pacer); + + // Sample count should not have increased (outlier rejected) + TEST_ASSERT_EQUAL(samples_before, pacer.vsync_samples); + + // Simulate a fast frame (very short interval = high Hz, rejected) + mock_time_us += 5000; // ~200Hz, should be rejected + FramePacer_recordVsync(&pacer); + + // Sample count should still not have increased + TEST_ASSERT_EQUAL(samples_before, pacer.vsync_samples); +} + +void test_vsync_measurement_reinits_pacer_when_hz_differs(void) { + // Start with reported 60Hz but actual 60.05Hz + FramePacer_init(&pacer, 60.0, 60.0); + + // Originally in direct mode (60fps @ 60Hz) + TEST_ASSERT_TRUE(pacer.direct_mode); + + // First call sets baseline + FramePacer_recordVsync(&pacer); + + // Simulate 60.05Hz vsync for warmup period + for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { + mock_time_us += 16653; // ~60.05Hz + FramePacer_recordVsync(&pacer); + } + + // After measurement, display_hz_q16 should be updated to ~60.05 (within 0.1Hz) + double updated_hz = pacer.display_hz_q16 / (double)Q16_SCALE; + TEST_ASSERT_TRUE(within_tolerance(updated_hz, 60.05, 0.1)); +} + /////////////////////////////// // Test Runner /////////////////////////////// @@ -348,8 +484,8 @@ int main(void) { // Initialization and tolerance tests RUN_TEST(test_init_60fps_60hz_direct_mode); RUN_TEST(test_init_5994fps_60hz_direct_mode); - RUN_TEST(test_init_60fps_61hz_direct_mode); // within 2% tolerance - RUN_TEST(test_init_60fps_63hz_paced_mode); // outside 2% tolerance + RUN_TEST(test_init_60fps_60_5hz_direct_mode); // within 1% tolerance + RUN_TEST(test_init_60fps_61hz_paced_mode); // outside 1% tolerance RUN_TEST(test_init_60fps_72hz_paced_mode); RUN_TEST(test_init_50fps_60hz_paced_mode); RUN_TEST(test_init_30fps_60hz_paced_mode); @@ -381,5 +517,13 @@ int main(void) { RUN_TEST(test_reset_ensures_next_step); RUN_TEST(test_reset_preserves_settings); + // Vsync measurement + RUN_TEST(test_vsync_measurement_not_stable_initially); + RUN_TEST(test_vsync_measurement_accumulates_samples); + RUN_TEST(test_vsync_measurement_becomes_stable); + RUN_TEST(test_vsync_measurement_detects_higher_hz); + RUN_TEST(test_vsync_measurement_rejects_outliers); + RUN_TEST(test_vsync_measurement_reinits_pacer_when_hz_differs); + return UNITY_END(); } diff --git a/workspace/all/player/frame_pacer.c b/workspace/all/player/frame_pacer.c index 73869664..726e6240 100644 --- a/workspace/all/player/frame_pacer.c +++ b/workspace/all/player/frame_pacer.c @@ -6,6 +6,8 @@ */ #include "frame_pacer.h" +#include "log.h" +#include "utils.h" // For getMicroseconds #include // Platform function we need - declared in api.h but we avoid including it @@ -22,6 +24,9 @@ void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz) { display_hz = 60.0; } + // Store original game fps for potential reinit with measured Hz + pacer->game_fps = game_fps; + // Convert to Q16.16 fixed-point for precise integer math // 59.73fps becomes 3,913,359 (59.73 * 65536) pacer->game_fps_q16 = (int32_t)(game_fps * Q16_SCALE); @@ -31,6 +36,11 @@ void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz) { // This avoids showing a black/stale frame on startup pacer->accumulator = pacer->display_hz_q16; + // Initialize vsync measurement state + pacer->last_vsync_time = 0; + pacer->measured_hz = 0.0; + pacer->vsync_samples = 0; + // Direct mode if rates are within tolerance // This handles 59.94fps @ 60Hz, etc. double diff = fabs(game_fps - display_hz) / display_hz; @@ -71,3 +81,90 @@ double FramePacer_getDisplayHz(void) { // On SDL1 or platforms where SDL doesn't know, this returns a hardcoded value. return PLAT_getDisplayHz(); } + +// Smoothing factor for EMA: 0.01 = very smooth (100 frame time constant) +// Lower values = more stable but slower to converge +#define VSYNC_EMA_ALPHA 0.01 + +// Minimum Hz to accept (reject outliers from frame drops) +#define VSYNC_MIN_HZ 50.0 +// Maximum Hz to accept (reject outliers from fast presents) +#define VSYNC_MAX_HZ 120.0 + +void FramePacer_recordVsync(FramePacer* pacer) { + uint64_t now = getMicroseconds(); + + if (pacer->last_vsync_time > 0) { + // Calculate interval in seconds (getMicroseconds returns µs) + double interval = (double)(now - pacer->last_vsync_time) / 1000000.0; + + // Convert to Hz + double hz = 1.0 / interval; + + // Reject outliers (frame drops, fast presents, etc.) + if (hz >= VSYNC_MIN_HZ && hz <= VSYNC_MAX_HZ) { + pacer->vsync_samples++; + + if (pacer->measured_hz == 0.0) { + // First sample: initialize directly + pacer->measured_hz = hz; + } else { + // Exponential moving average for stability + pacer->measured_hz = + pacer->measured_hz * (1.0 - VSYNC_EMA_ALPHA) + hz * VSYNC_EMA_ALPHA; + } + + // Log when measurement becomes stable + if (pacer->vsync_samples == FRAME_PACER_VSYNC_WARMUP) { + double reported_hz = PLAT_getDisplayHz(); + LOG_info("Vsync measurement stable: %.3fHz (reported: %.1fHz, diff: %.2f%%)\n", + pacer->measured_hz, reported_hz, + fabs(pacer->measured_hz - reported_hz) / reported_hz * 100.0); + } + + // Check for drift and reinit if needed (both at warmup and periodically after) + // Check every 300 samples after warmup to catch drift + if (pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP && + (pacer->vsync_samples == FRAME_PACER_VSYNC_WARMUP || + pacer->vsync_samples % 300 == 0)) { + double current_hz = pacer->display_hz_q16 / Q16_SCALE; + double diff = fabs(pacer->measured_hz - current_hz) / current_hz; + if (diff > 0.001) { // >0.1% difference + LOG_info("Display Hz drift detected: %.3f -> %.3f (%.2f%% change)\n", + current_hz, pacer->measured_hz, diff * 100.0); + + // Update display Hz in Q16.16 + pacer->display_hz_q16 = (int32_t)(pacer->measured_hz * Q16_SCALE); + + // Reset accumulator to new display_hz to avoid frame skip glitches + // When Hz changes, the old accumulator state is invalid + pacer->accumulator = pacer->display_hz_q16; + + // Re-evaluate direct mode with new Hz + double fps_diff = + fabs(pacer->game_fps - pacer->measured_hz) / pacer->measured_hz; + bool should_be_direct = (fps_diff < FRAME_PACER_TOLERANCE); + if (pacer->direct_mode != should_be_direct) { + LOG_info("Frame pacer mode changed: %s -> %s\n", + pacer->direct_mode ? "direct" : "paced", + should_be_direct ? "direct" : "paced"); + pacer->direct_mode = should_be_direct; + } + } + } + } + } + + pacer->last_vsync_time = now; +} + +double FramePacer_getMeasuredHz(const FramePacer* pacer) { + if (pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP) { + return pacer->measured_hz; + } + return 0.0; // Not enough samples yet +} + +bool FramePacer_isMeasurementStable(const FramePacer* pacer) { + return pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP; +} diff --git a/workspace/all/player/frame_pacer.h b/workspace/all/player/frame_pacer.h index 9f4e012f..c4585c08 100644 --- a/workspace/all/player/frame_pacer.h +++ b/workspace/all/player/frame_pacer.h @@ -28,21 +28,26 @@ * * Based on RetroArch's dynamic rate control research (Arntzen, 2012): * - Audio pitch changes ≤0.5% are inaudible to most listeners - * - RetroArch's audio rate control can compensate for ±2% drift - * - Speed changes ≤2% are imperceptible in gameplay + * - RetroArch recommends d = 0.2% to 0.5% for rate control + * - Beyond ~0.5% mismatch, "other methods should be employed" * - * Using 2% allows direct mode (no frame pacing overhead) when the mismatch - * is small enough for audio rate control to handle without perceptible - * pitch shift. Larger mismatches (e.g., 60fps @ 72Hz = 20%) use Bresenham - * pacing to maintain correct speed with frame repeats. + * Using 1% as a compromise - allows direct mode for well-matched displays + * while triggering frame pacing for displays with noticeable drift. + * After vsync measurement, the pacer may switch modes based on actual Hz. * - * Examples at 2% tolerance: + * Examples at 1% tolerance: * - 59.94fps @ 60Hz → direct mode (0.1% diff) ✓ - * - 60.0fps @ 61Hz → direct mode (1.6% diff) ✓ + * - 60.0fps @ 60.5Hz → direct mode (0.83% diff) ✓ + * - 60.0fps @ 61Hz → paced mode (1.6% diff) * - 50.0fps @ 60Hz → paced mode (16.7% diff) - * - 60.0fps @ 72Hz → paced mode (16.7% diff) */ -#define FRAME_PACER_TOLERANCE 0.02 +#define FRAME_PACER_TOLERANCE 0.01 + +/** + * Number of vsync samples before measurement is considered stable. + * At 60Hz, 120 samples = 2 seconds of measurement. + */ +#define FRAME_PACER_VSYNC_WARMUP 120 /** * Frame pacing state. @@ -55,6 +60,12 @@ typedef struct { int32_t display_hz_q16; // Display Hz in Q16.16 fixed-point int32_t accumulator; // Bresenham accumulator (Q16.16) bool direct_mode; // True if fps ~= hz (skip accumulator) + + // Vsync measurement state + uint64_t last_vsync_time; // Performance counter at last vsync + double measured_hz; // Exponential moving average of measured Hz + int vsync_samples; // Number of samples collected + double game_fps; // Original game fps (for reinit) } FramePacer; /** @@ -112,4 +123,37 @@ bool FramePacer_isDirectMode(const FramePacer* pacer); */ double FramePacer_getDisplayHz(void); +/** + * Record vsync timing after present. + * + * Call this immediately after GFX_present() or SDL_RenderPresent() returns. + * Measures time between vsyncs to determine actual display refresh rate. + * + * After FRAME_PACER_VSYNC_WARMUP samples, the measured Hz becomes stable. + * If measured Hz differs significantly from reported Hz, the pacer + * automatically reinitializes with the measured value. + * + * @param pacer Pacer state to update + */ +void FramePacer_recordVsync(FramePacer* pacer); + +/** + * Get measured display Hz. + * + * Returns the measured refresh rate based on vsync timing. + * Before enough samples are collected, returns 0.0. + * + * @param pacer Pacer state + * @return Measured Hz, or 0.0 if not yet measured + */ +double FramePacer_getMeasuredHz(const FramePacer* pacer); + +/** + * Check if vsync measurement is stable. + * + * @param pacer Pacer state + * @return true if enough samples collected for stable measurement + */ +bool FramePacer_isMeasurementStable(const FramePacer* pacer); + #endif // __FRAME_PACER_H__ diff --git a/workspace/all/player/player_loop_vsync.inc b/workspace/all/player/player_loop_vsync.inc index 717410c7..07bfd253 100644 --- a/workspace/all/player/player_loop_vsync.inc +++ b/workspace/all/player/player_loop_vsync.inc @@ -117,6 +117,9 @@ static void run_main_loop(void) { frame_ready_for_flip = 0; } + // Record vsync timing for Hz measurement (must be right after present) + FramePacer_recordVsync(&frame_pacer); + // Track performance (only once per vsync, not per FF run) limitFF(); trackFPS(); diff --git a/workspace/miyoomini/platform/platform.h b/workspace/miyoomini/platform/platform.h index be0c048e..3c0e3e20 100644 --- a/workspace/miyoomini/platform/platform.h +++ b/workspace/miyoomini/platform/platform.h @@ -26,8 +26,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for A7 device with high timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/my355/platform/platform.h b/workspace/my355/platform/platform.h index c6d25e49..60b6be9c 100644 --- a/workspace/my355/platform/platform.h +++ b/workspace/my355/platform/platform.h @@ -36,8 +36,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for device with timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Dependencies diff --git a/workspace/tg5040/platform/platform.h b/workspace/tg5040/platform/platform.h index 6d4c59dc..cec822ae 100644 --- a/workspace/tg5040/platform/platform.h +++ b/workspace/tg5040/platform/platform.h @@ -34,7 +34,7 @@ // Audio Configuration /////////////////////////////// -// Uses default SND_RATE_CONTROL_D (0.012f) for standard timing +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/tg5050/platform/platform.h b/workspace/tg5050/platform/platform.h index 1f0dfdd2..b748dded 100644 --- a/workspace/tg5050/platform/platform.h +++ b/workspace/tg5050/platform/platform.h @@ -36,7 +36,7 @@ // Audio Configuration /////////////////////////////// -// Uses default SND_RATE_CONTROL_D (0.012f) for standard timing +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling diff --git a/workspace/trimuismart/platform/platform.h b/workspace/trimuismart/platform/platform.h index 6c6a8f39..c8bb0a38 100644 --- a/workspace/trimuismart/platform/platform.h +++ b/workspace/trimuismart/platform/platform.h @@ -27,8 +27,7 @@ // Audio Configuration /////////////////////////////// -// More aggressive rate control for slower A7 device with high timing variance -#define SND_RATE_CONTROL_D 0.015f +// Uses default SND_RATE_CONTROL_D (0.012f) - cubic safety boost handles edge cases /////////////////////////////// // Video Buffer Scaling From 39936f479a3a19b00fcd12af48be79c2a2a54e12 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Wed, 7 Jan 2026 11:16:18 -0800 Subject: [PATCH 09/11] Refactor to unified runtime-adaptive audio/video sync system. Replaces compile-time dual-loop architecture (vsync vs audio-clock) with single integrated loop that measures actual display refresh rate and adapts sync mode at runtime. Starts in audio-clock mode (safe, universal) and switches to vsync if measured Hz is compatible (<1% mismatch from game fps). Removes frame pacing (Bresenham), cubic boost, and 350 lines of code. Simplifies auto CPU scaling to use pure utilization-based decisions, with time-based reduction strategy for audio-clock mode where blocking makes utilization metrics unreliable. --- Makefile.qa | 9 +- TODO.md | 202 ------- docs/audio-rate-control.md | 84 ++- scripts/run-coverage.sh | 2 +- tests/unit/all/player/test_frame_pacer.c | 529 ------------------ tests/unit/all/player/test_sync_manager.c | 344 ++++++++++++ workspace/all/common/api.c | 263 ++++----- workspace/all/common/api.h | 22 +- workspace/all/common/api_types.h | 1 + workspace/all/common/cpu.h | 3 + workspace/all/player/Makefile | 2 +- workspace/all/player/frame_pacer.c | 170 ------ workspace/all/player/frame_pacer.h | 159 ------ workspace/all/player/player.c | 276 +++++++-- .../all/player/player_loop_audioclock.inc | 124 ---- workspace/all/player/player_loop_vsync.inc | 144 ----- workspace/all/player/sync_manager.c | 171 ++++++ workspace/all/player/sync_manager.h | 176 ++++++ 18 files changed, 1141 insertions(+), 1540 deletions(-) delete mode 100644 TODO.md delete mode 100644 tests/unit/all/player/test_frame_pacer.c create mode 100644 tests/unit/all/player/test_sync_manager.c delete mode 100644 workspace/all/player/frame_pacer.c delete mode 100644 workspace/all/player/frame_pacer.h delete mode 100644 workspace/all/player/player_loop_audioclock.inc delete mode 100644 workspace/all/player/player_loop_vsync.inc create mode 100644 workspace/all/player/sync_manager.c create mode 100644 workspace/all/player/sync_manager.h diff --git a/Makefile.qa b/Makefile.qa index 9e8740e3..ff06e450 100644 --- a/Makefile.qa +++ b/Makefile.qa @@ -236,7 +236,7 @@ TEST_UNITY = tests/vendor/unity/unity.c PATHS_STUB = tests/support/paths_stub.c # All test executables (built from tests/unit/ and tests/integration/) -TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/frame_pacer_test +TEST_EXECUTABLES = tests/utils_test tests/nointro_parser_test tests/pad_test tests/gfx_text_test tests/audio_resampler_test tests/player_paths_test tests/launcher_utils_test tests/m3u_parser_test tests/launcher_file_utils_test tests/map_parser_test tests/collection_parser_test tests/recent_parser_test tests/recent_writer_test tests/recent_runtime_test tests/directory_utils_test tests/binary_file_utils_test tests/ui_layout_test tests/str_compare_test tests/effect_system_test tests/effect_generate_test tests/player_utils_test tests/player_config_test tests/player_options_test tests/platform_variant_test tests/launcher_entry_test tests/directory_index_test tests/player_archive_test tests/player_memory_test tests/player_state_test tests/launcher_launcher_test tests/cpu_test tests/player_input_test tests/launcher_state_test tests/player_menu_test tests/player_env_test tests/player_game_test tests/player_scaler_test tests/player_core_test tests/launcher_directory_test tests/launcher_navigation_test tests/launcher_thumbnail_test tests/launcher_context_test tests/emu_cache_test tests/res_cache_test tests/render_common_test tests/integration_workflows_test tests/log_test tests/sync_manager_test # Default targets: use Docker for consistency test: docker-test @@ -413,9 +413,10 @@ tests/cpu_test: tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c $(TE @echo "Building CPU scaling tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -# Build frame pacing tests (pure algorithm, no external dependencies) -tests/frame_pacer_test: tests/unit/all/player/test_frame_pacer.c workspace/all/player/frame_pacer.c $(TEST_UNITY) - @echo "Building frame pacer tests..." +# Build sync manager tests (vsync measurement and mode switching) +# Note: Uses test stub for getMicroseconds, not utils.c version +tests/sync_manager_test: tests/unit/all/player/test_sync_manager.c workspace/all/player/sync_manager.c workspace/all/common/log.c $(TEST_UNITY) + @echo "Building sync manager tests..." @$(CC) -o $@ $^ $(TEST_INCLUDES) $(TEST_CFLAGS) -lm # Build input handling tests (pure state queries and mapping lookups) diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 5ea45247..00000000 --- a/TODO.md +++ /dev/null @@ -1,202 +0,0 @@ -# TODO: Outstanding Issues - -## Audio/Video Sync on tg5050 (Trimui Smart Pro) - -**Status:** Root cause identified, solution designed, implementation pending -**Priority:** High (severely degraded user experience) -**Platform:** tg5050 (tg5040 works fine with cubic boost) - -### Problem Summary - -tg5050 experiences severe audio buffer issues (2649 overruns + 103 underruns in single session) due to extreme display refresh rate mismatch that current vsync-driven timing cannot handle. - -### Root Cause: Dynamic Refresh Rate Mismatch - -**Discovered via vsync measurement (implemented 2026-01-06):** - -SDL reports: 60Hz (integer, via `SDL_GetCurrentDisplayMode()`) -**Actual measured behavior:** -- Starts at: 65-68Hz (14% faster than games!) -- Drifts to: 62-63Hz over 30 seconds -- Continues oscillating: ±1Hz throughout session - -**Example from GB.log:** -``` -[03:45:35] Measured: 68.510Hz (14.18% above SDL's 60Hz) -[03:45:38] Drift to: 63.860Hz (dropped 6.79%) -[03:45:43] Drift to: 62.991Hz (dropped 1.36%) -[03:46:23] Drift to: 61.526Hz (dropped 1.86%) -[03:46:28] Drift to: 62.873Hz (rose 2.19%) -``` - -The screen refresh rate is **both wrong and unstable**. - -### Why Current Approaches Fail - -**Frame pacing (Bresenham) fails:** -- At 14% mismatch (68Hz screen vs 60fps game), would skip 13% of frames -- Human perception threshold for stutter: 2-3% -- 13% frame skipping = very visible stuttering -- Dynamic drift (62→68Hz) requires constant re-adjustment -- Result: 2649 buffer overruns despite paced mode - -**Audio rate control fails:** -- PI controller tuned for 68Hz overshoots when screen drops to 62Hz -- Integral term chases a moving target -- Frame pacing + audio rate control fight each other (one skips frames, other adjusts pitch) -- Result: Oscillates between overrun (100% full) and underrun (0% empty) - -**Fundamental issue:** Vsync-driven timing assumes: -1. Display Hz ≈ game Hz (within 1-2%) ❌ tg5050: 14% -2. Display Hz is stable ❌ tg5050: drifts 6Hz -3. Audio pitch adjustment can compensate ❌ 14% is too much - -### Expert Analysis (Subject Matter Expert Agent, 2026-01-06) - -> **"The approach is fundamentally problematic for this hardware."** -> -> At 14% mismatch with dynamic drift, vsync-driven timing cannot work. The correct architecture is **audio-clock master timing** with vsync used only for tear-free presentation, not timing. - -**Key insight:** -- **Frame pacing (dropping frames):** Game *jumps* forward periodically → visible stutter -- **Audio-clock (duplicating frames):** Some frames *held slightly longer* → barely perceptible - -At 68Hz screen / 60fps game: -- Frame pacing: Skip every 8th frame → visible stutter -- Audio-clock: Show some frames twice → much less noticeable - -### Solution: Auto Sync Mode Selection - -Implement hybrid sync strategy based on measured Hz mismatch: - -```c -if (|measured_hz - game_fps| < 1%) → VSYNC_DIRECT // Current default -if (|measured_hz - game_fps| < 5%) → VSYNC_PACED // Current fallback -if (|measured_hz - game_fps| > 5%) → AUDIO_MASTER // NEW for tg5050 -``` - -**Audio-clock master mode:** -- Audio hardware is master clock (runs core at exact 60fps) -- Vsync only prevents tearing (not for timing) -- Frame duplication instead of frame skipping -- No audio rate control needed (buffer naturally stable) -- Already implemented: `workspace/all/player/player_loop_audioclock.inc` - -### Implementation Status - -**✅ Completed (2026-01-06):** -1. Vsync measurement system - - Measures actual Hz by timing `SDL_RenderPresent()` calls - - Uses exponential moving average (α=0.01) for stability - - Rejects outliers outside 50-120Hz range - - Stable after 120 samples (~2 seconds) - -2. Continuous drift tracking - - Re-checks every 300 frames (~5 seconds) - - Logs Hz changes >0.1% - - Resets Bresenham accumulator on Hz change (prevents glitches) - -3. Tighter tolerance - - Changed `FRAME_PACER_TOLERANCE` from 2% to 1% - - Based on RetroArch paper: audio-only rate control works up to ~0.5% - -4. Removed `FORCE_FRAME_PACING` - - Was testing hack, didn't solve root cause - - Removed from tg5040/tg5050 platform.h - -**📋 TODO:** -1. Add sync mode selection logic in frame pacer - - Detect when measured Hz > 5% from game fps - - Switch to audio-clock mode automatically - - Keep vsync measurement running for dynamic detection - -2. Disable audio rate control in audio-clock mode - - Buffer naturally stable when audio drives timing - - Rate control fights audio-clock (causes oscillation) - -3. Test on tg5050 - - Verify audio-clock eliminates buffer warnings - - Confirm frame duplication is imperceptible - - Measure actual improvement vs current state - -4. Consider per-platform override - - Add `SYNC_MODE_AUDIO_CLOCK` define for platforms that need it - - tg5050 might always need audio-clock regardless of game - -### Files Modified (2026-01-06) - -**Vsync measurement implementation:** -- `workspace/all/player/frame_pacer.h` - Added measurement state, functions, constants -- `workspace/all/player/frame_pacer.c` - Implemented measurement + drift tracking -- `workspace/all/player/player_loop_vsync.inc` - Calls `FramePacer_recordVsync()` -- `tests/unit/all/player/test_frame_pacer.c` - Added 6 tests for measurement - -**Cleanup:** -- `workspace/tg5040/platform/platform.h` - Removed FORCE_FRAME_PACING -- `workspace/tg5050/platform/platform.h` - Removed FORCE_FRAME_PACING -- `workspace/all/common/cpu.c` - Fixed unused variable lint error - -### Related Context - -**tg5040 status:** ✅ Working well with cubic boost -- Mean fill: 40.5% (healthy) -- Buffer warnings: 704 (acceptable) -- System stable - -**Why tg5040 works but tg5050 doesn't:** -- Both have refresh rate mismatch, but tg5050's is more extreme -- tg5050's dynamic drift is more pronounced -- tg5040 likely closer to 60Hz or more stable - -### References - -**RetroArch Dynamic Rate Control (Arntzen, 2012):** -- Recommends d = 0.2-0.5% for audio pitch adjustment -- States frame pacing "only useful when game frame rate is **close** to monitor frame rate" -- At >0.5% mismatch: "other methods should be employed" - -**Expert recommendation (SME agent a77771d):** -- Primary: Audio-clock master for |mismatch| > 5% -- Fallback: Hybrid sync mode selection -- Never: Frame pacing at >5% mismatch (causes perceptible stutter) - -### Test Logs Evidence - -**Before vsync measurement (old behavior):** -- System thinks 60Hz, uses direct vsync -- Result: 8000+ buffer overrun warnings - -**With vsync measurement + frame pacing (tested 2026-01-06):** -- Detects 68Hz, switches to paced mode -- Tracks drift: 68→63Hz over time -- Result: Still 2649 overruns + 103 underruns (frame pacing can't handle 14%) - -**Expected with audio-clock mode:** -- Audio drives timing at exact game fps -- Vsync just presents frames (duplicating when needed) -- Result: Zero buffer warnings (audio naturally stable) - -### Open Questions - -1. Does tg5040 also have dynamic refresh rate? (Not tested yet) -2. Should we keep vsync mode as default and auto-switch, or force audio-clock for some platforms? -3. Do other devices in the lineup have similar issues? -4. Does audio-clock mode affect input latency measurably? - -### Next Steps - -1. **Implement auto sync mode selection** - Priority: High - - Use measured Hz to pick VSYNC vs AUDIO_CLOCK - - Test threshold value (currently thinking 5%) - -2. **Test on tg5050** - Priority: High - - Verify it solves the buffer issue - - Measure subjective quality (frame duplication vs stutter) - -3. **Measure other devices** - Priority: Medium - - Run vsync measurement on all supported platforms - - Identify which need audio-clock mode - -4. **Document behavior** - Priority: Low - - Add platform notes about sync modes - - Explain why some devices use audio-clock diff --git a/docs/audio-rate-control.md b/docs/audio-rate-control.md index 88b1c5fa..36dbfdc2 100644 --- a/docs/audio-rate-control.md +++ b/docs/audio-rate-control.md @@ -13,7 +13,46 @@ Retro game consoles are highly synchronous - audio generation is locked to video **The fundamental challenge**: Synchronize to vsync (smooth video) while never underrunning or blocking on audio. -## The Algorithm +## Runtime-Adaptive Sync System + +LessUI uses a runtime-adaptive approach that measures the actual display refresh rate and selects the appropriate sync mode automatically. + +### Two Sync Modes + +| Mode | Timing Source | Audio Handling | When Used | +| --------------- | --------------------- | ---------------------------------- | -------------------------------- | +| **Audio Clock** | Blocking audio writes | Fixed ratio (no rate control) | Startup default, Hz mismatch >1% | +| **Vsync** | Display vsync | PI rate control (±0.5% adjustment) | Hz mismatch <1% from game fps | + +### Mode Selection Algorithm + +``` +1. Start in Audio Clock mode (safe default, works on all hardware) +2. Measure actual display Hz via vsync timing (~2 seconds warmup) +3. If measured Hz within 1% of game fps → switch to Vsync mode +4. Monitor for drift; fall back to Audio Clock if Hz becomes unstable +``` + +This eliminates compile-time mode selection and handles hardware variance automatically. + +## Audio Clock Mode + +When display Hz differs significantly from game fps (>1%), rate control cannot compensate without audible pitch changes. Instead: + +- Audio writes **block** when the buffer is full +- Audio hardware clock drives emulation timing +- Frame duplication occurs naturally (less visible than frame skipping) +- No rate control needed - the blocking provides natural backpressure + +**Benefits:** + +- Works with any display refresh rate +- Audio buffer stays naturally stable +- No PI controller oscillation or windup + +## Vsync Mode (Rate Control Active) + +When display Hz closely matches game fps (<1%), vsync provides timing and rate control keeps the audio buffer stable. ### Arntzen's Core Formula @@ -67,6 +106,24 @@ float adjustment = p_term + integral; ## Implementation Details +### Sync Mode Callbacks + +The audio system queries the sync manager to determine behavior: + +```c +// Set by player at init +SND_setSyncCallbacks( + SyncManager_shouldUseRateControl, // true in Vsync mode + SyncManager_shouldBlockAudio // true in Audio Clock mode +); + +// In SND_batchSamples() +bool should_block = snd.should_block_audio(); +bool should_use_rate_control = !should_block && snd.should_use_rate_control(); +``` + +This decouples the audio system from sync mode decisions. + ### Per-Frame Integral Update The integral must update **once per frame**, not once per audio batch. Some cores (e.g., 64-bit snes9x) use per-sample audio callbacks, calling `SND_batchSamples()` ~535 times per frame. Without this fix, effective ki = 535× intended, causing wild oscillation. @@ -74,6 +131,10 @@ The integral must update **once per frame**, not once per audio batch. Some core ```c // Called once per frame from main loop, before core.run() void SND_newFrame(void) { + // Skip in audio-clock mode (no rate control) + if (!snd.should_use_rate_control || !snd.should_use_rate_control()) + return; + SDL_LockAudio(); float fill = SND_getBufferFillLevel(); @@ -104,19 +165,6 @@ int PLAT_pickSampleRate(int requested, int max) { Forcing a different rate (e.g., always 48kHz when core wants 32kHz) causes unnecessary resampling and wider buffer swings. -### Vsync Cadence - -When a libretro core skips rendering (passes NULL to video_refresh), we still flip to maintain vsync timing: - -```c -if (!data) { - frame_ready_for_flip = 1; // Still flip to maintain vsync cadence - return; -} -``` - -Without this, skipped frames cause: no vsync wait → 4ms frame → next frame waits 2 vblanks → 30ms frame. This creates 20% buffer oscillation even with perfect rate control. - ## Tuning Results Tested across three platforms with different timing characteristics: @@ -135,10 +183,12 @@ Tested across three platforms with different timing characteristics: ## Code References -- PI controller: `workspace/all/common/api.c` (SND_calculateRateAdjust, SND_newFrame) -- Parameters: `workspace/all/common/api.c` (lines 1640-1652) +- Sync manager: `workspace/all/player/sync_manager.c` (mode selection, Hz measurement) +- PI controller: `workspace/all/common/api.c` (`SND_calculateRateAdjust`, `SND_newFrame`) +- Sync callbacks: `workspace/all/common/api.c` (`SND_setSyncCallbacks`) +- Parameters: `workspace/all/common/api.c` (SND_RATE_CONTROL_D, SND_RATE_CONTROL_KI, etc.) - Resampler: `workspace/all/common/audio_resampler.c` -- Sample rate policy: `workspace//platform/platform.c` (PLAT_pickSampleRate) +- Sample rate policy: `workspace//platform/platform.c` (`PLAT_pickSampleRate`) ## References diff --git a/scripts/run-coverage.sh b/scripts/run-coverage.sh index 63ba48fe..cbb797ed 100755 --- a/scripts/run-coverage.sh +++ b/scripts/run-coverage.sh @@ -122,7 +122,7 @@ declare -a TEST_BUILDS=( "player_state_test:tests/unit/all/player/test_player_state.c workspace/all/player/player_state.c workspace/all/player/player_paths.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c workspace/all/common/log.c tests/support/libretro_mocks.c:-D_DEFAULT_SOURCE" "launcher_launcher_test:tests/unit/all/launcher/test_launcher_launcher.c workspace/all/launcher/launcher_launcher.c:" "cpu_test:tests/unit/all/common/test_cpu.c workspace/all/common/cpu.c:" - "frame_pacer_test:tests/unit/all/player/test_frame_pacer.c workspace/all/player/frame_pacer.c:-lm" + "sync_manager_test:tests/unit/all/player/test_sync_manager.c workspace/all/player/sync_manager.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c:-lm" "player_input_test:tests/unit/all/player/test_player_input.c workspace/all/player/player_input.c:" "launcher_state_test:tests/unit/all/launcher/test_launcher_state.c workspace/all/launcher/launcher_state.c workspace/all/common/stb_ds_impl.c:" "player_menu_test:tests/unit/all/player/test_player_menu.c workspace/all/player/player_context.c tests/support/menu_state_stub.c tests/support/sdl_fakes.c workspace/all/common/utils.c workspace/all/common/nointro_parser.c workspace/all/common/log.c:-I tests/support/SDL -I workspace/all/player/libretro-common/include -I tests/vendor/fff -D_DEFAULT_SOURCE" diff --git a/tests/unit/all/player/test_frame_pacer.c b/tests/unit/all/player/test_frame_pacer.c deleted file mode 100644 index 0ee007c3..00000000 --- a/tests/unit/all/player/test_frame_pacer.c +++ /dev/null @@ -1,529 +0,0 @@ -/** - * test_frame_pacer.c - Unit tests for frame pacing - * - * Tests the Bresenham-style frame pacing algorithm including: - * - Initialization with Q16.16 fixed-point - * - Direct mode detection - * - Paced mode accumulator behavior - * - First frame always steps - * - Long-run stability (no drift) - * - Reset functionality - */ - -#include "unity.h" -#include "frame_pacer.h" -#include - -// Stub for PLAT_getDisplayHz - not tested here, just needed for linking -double PLAT_getDisplayHz(void) { - return 60.0; -} - -// Stub for getMicroseconds - returns incrementing time for vsync measurement tests -static uint64_t mock_time_us = 0; -uint64_t getMicroseconds(void) { - return mock_time_us; -} - -// Stub for LOG_info - just suppress output during tests -void LOG_info(const char* fmt, ...) { - (void)fmt; -} - -// Q16.16 scale factor for test assertions -#define Q16_SCALE 65536 - -// Test state -static FramePacer pacer; - -/////////////////////////////// -// Test Setup/Teardown -/////////////////////////////// - -void setUp(void) { - // Fresh pacer for each test - FramePacer_init(&pacer, 60.0, 60.0); - // Reset mock time to non-zero (recordVsync checks last_vsync_time > 0) - mock_time_us = 1000000; // Start at 1 second -} - -void tearDown(void) { - // Nothing to clean up -} - -/////////////////////////////// -// Initialization Tests -/////////////////////////////// - -void test_init_60fps_60hz_direct_mode(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_TRUE(pacer.direct_mode); - // Accumulator initialized to display_hz for first-frame-steps - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.accumulator); -} - -void test_init_5994fps_60hz_direct_mode(void) { - // NTSC 59.94fps @ 60Hz = 0.1% diff → direct mode (within 2% tolerance) - FramePacer_init(&pacer, 59.94, 60.0); - - TEST_ASSERT_TRUE(pacer.direct_mode); -} - -void test_init_60fps_60_5hz_direct_mode(void) { - // 60fps @ 60.5Hz = 0.83% diff → direct mode (within 1% tolerance) - // This is the kind of hardware variance audio rate control can handle - FramePacer_init(&pacer, 60.0, 60.5); - - TEST_ASSERT_TRUE(pacer.direct_mode); -} - -void test_init_60fps_61hz_paced_mode(void) { - // 60fps @ 61Hz = 1.6% diff → paced mode (outside 1% tolerance) - FramePacer_init(&pacer, 60.0, 61.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_60fps_72hz_paced_mode(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_FALSE(pacer.direct_mode); - // Accumulator initialized to display_hz for first-frame-steps - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.accumulator); -} - -void test_init_50fps_60hz_paced_mode(void) { - // PAL games on NTSC display - FramePacer_init(&pacer, 50.0, 60.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_30fps_60hz_paced_mode(void) { - // Half-speed games - FramePacer_init(&pacer, 30.0, 60.0); - - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -void test_init_preserves_fractional_fps(void) { - // 59.73fps (SNES) should preserve precision - FramePacer_init(&pacer, 59.73, 60.0); - - // 59.73 * 65536 = 3,913,359.28 -> 3,913,359 - int32_t expected = (int32_t)(59.73 * Q16_SCALE); - TEST_ASSERT_EQUAL_INT32(expected, pacer.game_fps_q16); -} - -/////////////////////////////// -// Direct Mode Tests -/////////////////////////////// - -void test_direct_mode_always_steps(void) { - FramePacer_init(&pacer, 60.0, 60.0); - TEST_ASSERT_TRUE(pacer.direct_mode); - - // Every call should return true - for (int i = 0; i < 100; i++) { - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); - } -} - -void test_isDirectMode_returns_correct_value(void) { - FramePacer_init(&pacer, 60.0, 60.0); - TEST_ASSERT_TRUE(FramePacer_isDirectMode(&pacer)); - - FramePacer_init(&pacer, 60.0, 72.0); - TEST_ASSERT_FALSE(FramePacer_isDirectMode(&pacer)); -} - -/////////////////////////////// -// Paced Mode Tests (60fps @ 72Hz) -/////////////////////////////// - -void test_60fps_72hz_first_vsync_steps(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // First vsync: acc = 72, >= 72 -> step (first frame always steps) - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); -} - -void test_60fps_72hz_second_vsync_repeats(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // First vsync: step - FramePacer_step(&pacer); - - // Second vsync: acc = 60, < 72 -> repeat - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); -} - -void test_60fps_72hz_pattern_6_vsyncs(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // With acc starting at 72 (display_hz), pattern is: - // Vsync 1: acc=72, >=72 -> step, acc = 72 - 72 + 60 = 60 - // Vsync 2: acc=60, <72 -> repeat, acc = 60 + 60 = 120 - // Vsync 3: acc=120, >=72 -> step, acc = 120 - 72 + 60 = 108 - // Vsync 4: acc=108, >=72 -> step, acc = 108 - 72 + 60 = 96 - // Vsync 5: acc=96, >=72 -> step, acc = 96 - 72 + 60 = 84 - // Vsync 6: acc=84, >=72 -> step, acc = 84 - 72 + 60 = 72 - // Result: 5 steps, 1 repeat in 6 vsyncs = 83.3% = 60/72 - - bool results[6]; - for (int i = 0; i < 6; i++) { - results[i] = FramePacer_step(&pacer); - } - - TEST_ASSERT_TRUE(results[0]); // step - TEST_ASSERT_FALSE(results[1]); // repeat - TEST_ASSERT_TRUE(results[2]); // step - TEST_ASSERT_TRUE(results[3]); // step - TEST_ASSERT_TRUE(results[4]); // step - TEST_ASSERT_TRUE(results[5]); // step - - // Count: 5 steps, 1 repeat - int steps = 0; - for (int i = 0; i < 6; i++) { - if (results[i]) steps++; - } - TEST_ASSERT_EQUAL(5, steps); -} - -/////////////////////////////// -// Paced Mode Tests (50fps @ 60Hz - PAL) -/////////////////////////////// - -void test_50fps_60hz_pattern_6_vsyncs(void) { - FramePacer_init(&pacer, 50.0, 60.0); - - // 50fps @ 60Hz = step 50/60 = 83.3% of vsyncs - // Pattern with acc starting at 60: - // Vsync 1: acc=60, >=60 -> step, acc = 60 - 60 + 50 = 50 - // Vsync 2: acc=50, <60 -> repeat, acc = 50 + 50 = 100 - // Vsync 3: acc=100, >=60 -> step, acc = 100 - 60 + 50 = 90 - // Vsync 4: acc=90, >=60 -> step, acc = 90 - 60 + 50 = 80 - // Vsync 5: acc=80, >=60 -> step, acc = 80 - 60 + 50 = 70 - // Vsync 6: acc=70, >=60 -> step, acc = 70 - 60 + 50 = 60 - - bool results[6]; - for (int i = 0; i < 6; i++) { - results[i] = FramePacer_step(&pacer); - } - - TEST_ASSERT_TRUE(results[0]); // step - TEST_ASSERT_FALSE(results[1]); // repeat - TEST_ASSERT_TRUE(results[2]); // step - TEST_ASSERT_TRUE(results[3]); // step - TEST_ASSERT_TRUE(results[4]); // step - TEST_ASSERT_TRUE(results[5]); // step -} - -/////////////////////////////// -// Paced Mode Tests (30fps @ 60Hz) -/////////////////////////////// - -void test_30fps_60hz_alternates(void) { - FramePacer_init(&pacer, 30.0, 60.0); - - // 30fps @ 60Hz = step every other frame - // Vsync 1: acc=60, >=60 -> step, acc = 60 - 60 + 30 = 30 - // Vsync 2: acc=30, <60 -> repeat, acc = 30 + 30 = 60 - // Vsync 3: acc=60, >=60 -> step, acc = 60 - 60 + 30 = 30 - // Vsync 4: acc=30, <60 -> repeat, acc = 30 + 30 = 60 - - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); // step - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); // repeat - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); // step - TEST_ASSERT_FALSE(FramePacer_step(&pacer)); // repeat -} - -/////////////////////////////// -// Long-Run Stability Tests -/////////////////////////////// - -void test_60fps_72hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - int steps = 0; - int total_vsyncs = 7200; // 100 seconds at 72Hz - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 60/72 * 7200 = 6000 steps exactly - TEST_ASSERT_EQUAL(6000, steps); -} - -void test_50fps_60hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 50.0, 60.0); - - int steps = 0; - int total_vsyncs = 6000; // 100 seconds at 60Hz - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 50/60 * 6000 = 5000 steps exactly - TEST_ASSERT_EQUAL(5000, steps); -} - -void test_30fps_60hz_long_run_correct_ratio(void) { - FramePacer_init(&pacer, 30.0, 60.0); - - int steps = 0; - int total_vsyncs = 6000; - - for (int i = 0; i < total_vsyncs; i++) { - if (FramePacer_step(&pacer)) { - steps++; - } - } - - // Expected: 30/60 * 6000 = 3000 steps exactly - TEST_ASSERT_EQUAL(3000, steps); -} - -void test_accumulator_stays_bounded(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Run for many iterations and verify accumulator never exceeds 2x display_hz - // (theoretical max is display_hz + game_fps - 1) - int32_t max_expected = pacer.display_hz_q16 + pacer.game_fps_q16; - for (int i = 0; i < 10000; i++) { - FramePacer_step(&pacer); - TEST_ASSERT_LESS_THAN(max_expected, pacer.accumulator); - } -} - -/////////////////////////////// -// Reset Tests -/////////////////////////////// - -void test_reset_to_display_hz(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Build up some accumulator - FramePacer_step(&pacer); - FramePacer_step(&pacer); - - // Reset - FramePacer_reset(&pacer); - - // Should be back to display_hz - TEST_ASSERT_EQUAL_INT32(pacer.display_hz_q16, pacer.accumulator); -} - -void test_reset_ensures_next_step(void) { - FramePacer_init(&pacer, 60.0, 72.0); - - // Drain accumulator - for (int i = 0; i < 10; i++) { - FramePacer_step(&pacer); - } - - // Reset - FramePacer_reset(&pacer); - - // Next call should step (accumulator = display_hz) - TEST_ASSERT_TRUE(FramePacer_step(&pacer)); -} - -void test_reset_preserves_settings(void) { - FramePacer_init(&pacer, 60.0, 72.0); - FramePacer_step(&pacer); - - FramePacer_reset(&pacer); - - // Settings should be preserved - TEST_ASSERT_EQUAL_INT32(60 * Q16_SCALE, pacer.game_fps_q16); - TEST_ASSERT_EQUAL_INT32(72 * Q16_SCALE, pacer.display_hz_q16); - TEST_ASSERT_FALSE(pacer.direct_mode); -} - -/////////////////////////////// -// Vsync Measurement Tests -/////////////////////////////// - -void test_vsync_measurement_not_stable_initially(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - TEST_ASSERT_FALSE(FramePacer_isMeasurementStable(&pacer)); - // getMeasuredHz returns 0 when not stable - double hz = FramePacer_getMeasuredHz(&pacer); - TEST_ASSERT_TRUE(hz == 0.0); -} - -void test_vsync_measurement_accumulates_samples(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - // First call just sets baseline, doesn't count as sample - FramePacer_recordVsync(&pacer); - - // Simulate 60Hz vsync (16667µs intervals) - for (int i = 0; i < 50; i++) { - mock_time_us += 16667; // ~60Hz - FramePacer_recordVsync(&pacer); - } - - // Should have samples but not stable yet (need 120) - TEST_ASSERT_FALSE(FramePacer_isMeasurementStable(&pacer)); - TEST_ASSERT_EQUAL(50, pacer.vsync_samples); -} - -// Helper to check if a double is within tolerance -static int within_tolerance(double actual, double expected, double tolerance) { - double diff = actual - expected; - if (diff < 0) diff = -diff; - return diff <= tolerance; -} - -void test_vsync_measurement_becomes_stable(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - // First call sets baseline - FramePacer_recordVsync(&pacer); - - // Simulate 60Hz vsync (16667µs intervals) for warmup period - for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { - mock_time_us += 16667; - FramePacer_recordVsync(&pacer); - } - - TEST_ASSERT_TRUE(FramePacer_isMeasurementStable(&pacer)); - // Should be approximately 60Hz (within 0.5Hz) - double measured = FramePacer_getMeasuredHz(&pacer); - TEST_ASSERT_TRUE(within_tolerance(measured, 60.0, 0.5)); -} - -void test_vsync_measurement_detects_higher_hz(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - // First call sets baseline - FramePacer_recordVsync(&pacer); - - // Simulate 60.05Hz vsync (16653µs intervals instead of 16667µs) - for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { - mock_time_us += 16653; // ~60.05Hz - FramePacer_recordVsync(&pacer); - } - - TEST_ASSERT_TRUE(FramePacer_isMeasurementStable(&pacer)); - double measured = FramePacer_getMeasuredHz(&pacer); - // Should be approximately 60.05Hz (within 0.1Hz) - TEST_ASSERT_TRUE(within_tolerance(measured, 60.05, 0.1)); -} - -void test_vsync_measurement_rejects_outliers(void) { - FramePacer_init(&pacer, 60.0, 60.0); - - // First call sets baseline - FramePacer_recordVsync(&pacer); - - // Simulate normal 60Hz vsync - for (int i = 0; i < 50; i++) { - mock_time_us += 16667; - FramePacer_recordVsync(&pacer); - } - int samples_before = pacer.vsync_samples; - - // Simulate a frame drop (long interval = low Hz, rejected) - mock_time_us += 50000; // ~20Hz, should be rejected - FramePacer_recordVsync(&pacer); - - // Sample count should not have increased (outlier rejected) - TEST_ASSERT_EQUAL(samples_before, pacer.vsync_samples); - - // Simulate a fast frame (very short interval = high Hz, rejected) - mock_time_us += 5000; // ~200Hz, should be rejected - FramePacer_recordVsync(&pacer); - - // Sample count should still not have increased - TEST_ASSERT_EQUAL(samples_before, pacer.vsync_samples); -} - -void test_vsync_measurement_reinits_pacer_when_hz_differs(void) { - // Start with reported 60Hz but actual 60.05Hz - FramePacer_init(&pacer, 60.0, 60.0); - - // Originally in direct mode (60fps @ 60Hz) - TEST_ASSERT_TRUE(pacer.direct_mode); - - // First call sets baseline - FramePacer_recordVsync(&pacer); - - // Simulate 60.05Hz vsync for warmup period - for (int i = 0; i < FRAME_PACER_VSYNC_WARMUP + 10; i++) { - mock_time_us += 16653; // ~60.05Hz - FramePacer_recordVsync(&pacer); - } - - // After measurement, display_hz_q16 should be updated to ~60.05 (within 0.1Hz) - double updated_hz = pacer.display_hz_q16 / (double)Q16_SCALE; - TEST_ASSERT_TRUE(within_tolerance(updated_hz, 60.05, 0.1)); -} - -/////////////////////////////// -// Test Runner -/////////////////////////////// - -int main(void) { - UNITY_BEGIN(); - - // Initialization and tolerance tests - RUN_TEST(test_init_60fps_60hz_direct_mode); - RUN_TEST(test_init_5994fps_60hz_direct_mode); - RUN_TEST(test_init_60fps_60_5hz_direct_mode); // within 1% tolerance - RUN_TEST(test_init_60fps_61hz_paced_mode); // outside 1% tolerance - RUN_TEST(test_init_60fps_72hz_paced_mode); - RUN_TEST(test_init_50fps_60hz_paced_mode); - RUN_TEST(test_init_30fps_60hz_paced_mode); - RUN_TEST(test_init_preserves_fractional_fps); - - // Direct mode - RUN_TEST(test_direct_mode_always_steps); - RUN_TEST(test_isDirectMode_returns_correct_value); - - // 60fps @ 72Hz - RUN_TEST(test_60fps_72hz_first_vsync_steps); - RUN_TEST(test_60fps_72hz_second_vsync_repeats); - RUN_TEST(test_60fps_72hz_pattern_6_vsyncs); - - // 50fps @ 60Hz (PAL) - RUN_TEST(test_50fps_60hz_pattern_6_vsyncs); - - // 30fps @ 60Hz - RUN_TEST(test_30fps_60hz_alternates); - - // Long-run stability - RUN_TEST(test_60fps_72hz_long_run_correct_ratio); - RUN_TEST(test_50fps_60hz_long_run_correct_ratio); - RUN_TEST(test_30fps_60hz_long_run_correct_ratio); - RUN_TEST(test_accumulator_stays_bounded); - - // Reset - RUN_TEST(test_reset_to_display_hz); - RUN_TEST(test_reset_ensures_next_step); - RUN_TEST(test_reset_preserves_settings); - - // Vsync measurement - RUN_TEST(test_vsync_measurement_not_stable_initially); - RUN_TEST(test_vsync_measurement_accumulates_samples); - RUN_TEST(test_vsync_measurement_becomes_stable); - RUN_TEST(test_vsync_measurement_detects_higher_hz); - RUN_TEST(test_vsync_measurement_rejects_outliers); - RUN_TEST(test_vsync_measurement_reinits_pacer_when_hz_differs); - - return UNITY_END(); -} diff --git a/tests/unit/all/player/test_sync_manager.c b/tests/unit/all/player/test_sync_manager.c new file mode 100644 index 00000000..c66ea9a6 --- /dev/null +++ b/tests/unit/all/player/test_sync_manager.c @@ -0,0 +1,344 @@ +/** + * test_sync_manager.c - Unit tests for audio/video sync mode management + * + * Tests the runtime-adaptive sync mode switching including: + * - Initialization (starts in AUDIO_CLOCK mode) + * - Vsync measurement with EMA smoothing + * - Mode switching based on measured Hz + * - Drift detection and fallback to AUDIO_CLOCK + * - shouldRunCore (always returns true) + * - shouldUseRateControl/shouldBlockAudio based on mode + */ + +#include "unity.h" +#include "sync_manager.h" +#include +#include + +// Stub for getMicroseconds - returns controllable time for vsync measurement +static uint64_t mock_time_us = 0; +uint64_t getMicroseconds(void) { + return mock_time_us; +} + +// Stub for LOG_info - suppress output during tests +void LOG_info(const char* fmt, ...) { + (void)fmt; +} + +// Test state +static SyncManager manager; + +/////////////////////////////// +// Test Setup/Teardown +/////////////////////////////// + +void setUp(void) { + SyncManager_init(&manager, 60.0, 60.0); + mock_time_us = 1000000; // Start at 1 second +} + +void tearDown(void) { + // No cleanup needed +} + +/////////////////////////////// +// Initialization Tests +/////////////////////////////// + +void test_init_starts_in_audio_clock_mode(void) { + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); +} + +void test_init_stores_game_fps(void) { + SyncManager_init(&manager, 59.94, 60.0); + TEST_ASSERT_EQUAL_FLOAT(59.94, manager.game_fps); +} + +void test_init_stores_display_hz(void) { + SyncManager_init(&manager, 60.0, 72.0); + TEST_ASSERT_EQUAL_FLOAT(72.0, manager.display_hz); +} + +void test_init_with_zero_hz_defaults_to_60(void) { + SyncManager_init(&manager, 60.0, 0.0); + TEST_ASSERT_EQUAL_FLOAT(60.0, manager.display_hz); +} + +void test_init_measurement_not_stable(void) { + TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); +} + +/////////////////////////////// +// Vsync Measurement Tests +/////////////////////////////// + +void test_first_vsync_just_records_timestamp(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + TEST_ASSERT_EQUAL_UINT64(1000000, manager.last_vsync_time); + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +void test_second_vsync_calculates_hz(void) { + // First call + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Second call - 16.667ms later (60Hz) + mock_time_us = 1016667; + SyncManager_recordVsync(&manager); + + // Should have initial Hz measurement (not averaged yet, first sample) + TEST_ASSERT_FLOAT_WITHIN(0.1, 60.0, manager.measured_hz); +} + +void test_rejects_outlier_too_low(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 30Hz (too low, < 50Hz threshold) + mock_time_us = 1033333; + SyncManager_recordVsync(&manager); + + // Should be rejected, no measurement + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); + TEST_ASSERT_EQUAL(0, manager.measurement_samples); +} + +void test_rejects_outlier_too_high(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 150Hz (too high, > 120Hz threshold) + mock_time_us = 1006667; + SyncManager_recordVsync(&manager); + + // Should be rejected + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +void test_rejects_zero_interval(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Same timestamp (division by zero protection) + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Should be rejected + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); +} + +/////////////////////////////// +// Mode Switching Tests +/////////////////////////////// + +void test_switches_to_vsync_when_compatible(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Simulate 120 samples at 60Hz (compatible with 60fps game) + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; // 60Hz interval + SyncManager_recordVsync(&manager); + } + + // Should switch to VSYNC mode (< 1% mismatch) + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, SyncManager_getMode(&manager)); + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_stays_in_audio_clock_when_incompatible(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Simulate 120 samples at 68Hz (incompatible with 60fps game, 13% mismatch) + for (int i = 0; i < 120; i++) { + mock_time_us += 14706; // 68Hz interval + SyncManager_recordVsync(&manager); + } + + // Should stay in AUDIO_CLOCK mode (> 1% mismatch) + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_measurement_stable_after_120_samples(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Need 120 samples after initial baseline + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + if (i < 119) { + TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); + } + } + + // After 120 samples - now stable + TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); +} + +void test_drift_detection_switches_back_to_audio_clock(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Initial measurement at 60Hz - should switch to VSYNC + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + } + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, SyncManager_getMode(&manager)); + + // Drift to 65Hz over 300 frames (> 1% mismatch) + for (int i = 0; i < 300; i++) { + mock_time_us += 15385; // 65Hz interval + SyncManager_recordVsync(&manager); + } + + // Should detect drift and switch back to AUDIO_CLOCK + TEST_ASSERT_EQUAL(SYNC_MODE_AUDIO_CLOCK, SyncManager_getMode(&manager)); +} + +/////////////////////////////// +// API Tests +/////////////////////////////// + +void test_should_run_core_always_returns_true(void) { + TEST_ASSERT_TRUE(SyncManager_shouldRunCore(&manager)); + + // Even after switching to VSYNC + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_TRUE(SyncManager_shouldRunCore(&manager)); +} + +void test_should_use_rate_control_in_vsync_mode(void) { + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_TRUE(SyncManager_shouldUseRateControl(&manager)); +} + +void test_should_not_use_rate_control_in_audio_clock(void) { + manager.mode = SYNC_MODE_AUDIO_CLOCK; + TEST_ASSERT_FALSE(SyncManager_shouldUseRateControl(&manager)); +} + +void test_should_block_audio_in_audio_clock_mode(void) { + manager.mode = SYNC_MODE_AUDIO_CLOCK; + TEST_ASSERT_TRUE(SyncManager_shouldBlockAudio(&manager)); +} + +void test_should_not_block_audio_in_vsync_mode(void) { + manager.mode = SYNC_MODE_VSYNC; + TEST_ASSERT_FALSE(SyncManager_shouldBlockAudio(&manager)); +} + +void test_get_measured_hz_returns_zero_when_not_stable(void) { + TEST_ASSERT_EQUAL_FLOAT(0.0, SyncManager_getMeasuredHz(&manager)); +} + +void test_get_measured_hz_returns_value_when_stable(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // 120 samples at 60Hz + for (int i = 0; i < 120; i++) { + mock_time_us += 16667; + SyncManager_recordVsync(&manager); + } + + double measured = SyncManager_getMeasuredHz(&manager); + TEST_ASSERT_FLOAT_WITHIN(0.5, 60.0, measured); +} + +void test_mode_name_audio_clock(void) { + TEST_ASSERT_EQUAL_STRING("Audio Clock", SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); +} + +void test_mode_name_vsync(void) { + TEST_ASSERT_EQUAL_STRING("Vsync", SyncManager_getModeName(SYNC_MODE_VSYNC)); +} + +/////////////////////////////// +// Edge Cases +/////////////////////////////// + +void test_ema_smooths_noisy_measurements(void) { + mock_time_us = 1000000; + SyncManager_recordVsync(&manager); + + // Alternate between 59Hz and 61Hz (simulating jitter) + for (int i = 0; i < 120; i++) { + if (i % 2 == 0) { + mock_time_us += 16949; // 59Hz + } else { + mock_time_us += 16393; // 61Hz + } + SyncManager_recordVsync(&manager); + } + + // EMA should smooth to ~60Hz + double measured = SyncManager_getMeasuredHz(&manager); + TEST_ASSERT_FLOAT_WITHIN(1.0, 60.0, measured); +} + +void test_drift_check_only_after_stable(void) { + // Before stable, drift check shouldn't run + manager.measurement_stable = false; + manager.mode = SYNC_MODE_VSYNC; + + mock_time_us = 1000000; + for (int i = 0; i < 300; i++) { + mock_time_us += 15385; // 65Hz (should trigger drift) + SyncManager_recordVsync(&manager); + } + + // Mode shouldn't change (not stable yet) + TEST_ASSERT_EQUAL(SYNC_MODE_VSYNC, manager.mode); +} + +/////////////////////////////// +// Test Runner +/////////////////////////////// + +int main(void) { + UNITY_BEGIN(); + + // Initialization tests + RUN_TEST(test_init_starts_in_audio_clock_mode); + RUN_TEST(test_init_stores_game_fps); + RUN_TEST(test_init_stores_display_hz); + RUN_TEST(test_init_with_zero_hz_defaults_to_60); + RUN_TEST(test_init_measurement_not_stable); + + // Vsync measurement tests + RUN_TEST(test_first_vsync_just_records_timestamp); + RUN_TEST(test_second_vsync_calculates_hz); + RUN_TEST(test_rejects_outlier_too_low); + RUN_TEST(test_rejects_outlier_too_high); + RUN_TEST(test_rejects_zero_interval); + + // Mode switching tests + RUN_TEST(test_switches_to_vsync_when_compatible); + RUN_TEST(test_stays_in_audio_clock_when_incompatible); + RUN_TEST(test_measurement_stable_after_120_samples); + RUN_TEST(test_drift_detection_switches_back_to_audio_clock); + + // API tests + RUN_TEST(test_should_run_core_always_returns_true); + RUN_TEST(test_should_use_rate_control_in_vsync_mode); + RUN_TEST(test_should_not_use_rate_control_in_audio_clock); + RUN_TEST(test_should_block_audio_in_audio_clock_mode); + RUN_TEST(test_should_not_block_audio_in_vsync_mode); + RUN_TEST(test_get_measured_hz_returns_zero_when_not_stable); + RUN_TEST(test_get_measured_hz_returns_value_when_stable); + RUN_TEST(test_mode_name_audio_clock); + RUN_TEST(test_mode_name_vsync); + + // Edge cases + RUN_TEST(test_ema_smooths_noisy_measurements); + RUN_TEST(test_drift_check_only_after_stable); + + return UNITY_END(); +} diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index d09066aa..4e816c2d 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -1723,7 +1723,6 @@ static struct SND_Context { float rate_integral; // PI integral term (accumulates from smoothed error) float error_avg; // Smoothed error for slow integral timescale float last_rate_adjust; // Last computed adjustment (for snapshot without side effects) - float last_rate_boost; // Last computed boost (for snapshot diagnostics) // SDL callback timing diagnostics uint64_t callback_count; // Total callbacks @@ -1731,6 +1730,10 @@ static struct SND_Context { uint64_t callback_interval_sum; // Sum of intervals (for average) unsigned callback_samples_min; // Min samples requested unsigned callback_samples_max; // Max samples requested + + // Sync mode callbacks (set by player via SND_setSyncCallbacks) + SND_SyncCallback should_use_rate_control; + SND_SyncCallback should_block_audio; } snd = {0}; /** @@ -1865,26 +1868,26 @@ static float SND_getBufferFillLevel(void) { } /** - * Calculates dynamic rate adjustment using a dual-timescale PI controller. + * Calculate dynamic rate adjustment using simple dual-timescale PI controller. * - * Extends the Arntzen algorithm with an integral term on a separate (slower) - * timescale to correct persistent hardware drift without fighting proportional. + * Based on RetroArch's dynamic rate control algorithm (Arntzen, 2012). + * Uses dual-timescale PI control: fast proportional term for immediate + * response, slow integral term for persistent drift compensation. * - * Dual-timescale PI: - * error = (1 - 2*fill) - * p_term = error * d // Fast: frame-to-frame jitter - * error_avg = α*error + (1-α)*error_avg // Smooth error (~100 frames) - * integral += error_avg * ki // Slow: learns persistent offset + * Algorithm: + * error = (1 - 2*fill) // Positive when buffer low + * p_term = error * d // Fast: frame-to-frame response + * error_avg = α*error + (1-α)*error_avg // Smooth over ~300 frames + * integral += error_avg * ki // Slow: learns steady offset * adjustment = p_term + integral * - * Key insight: Original PI failed because both terms operated on same timescale, - * causing them to fight. By smoothing error before integrating, the integral - * only sees persistent trends, not per-frame noise. + * Works well for < 1% mismatch between game fps and display Hz. + * Beyond 1%, audio-clock mode should be used instead. * * Tuning guide: - * d: Higher = faster jitter response, more pitch variation (0.005-0.025) - * ki: Integral gain, 100× slower than error averaging (0.00005) - * α: Error smoothing factor, ~100 frame average (0.01) + * d: Proportional gain (0.005-0.025) - higher = faster response, more pitch variation + * ki: Integral gain (0.00005) - very slow, learns persistent drift + * α: Error smoothing (0.003) - ~300 frame average * * Our resampler divides by ratio_adjust (larger = fewer outputs), so: * ratio_adjust = 1 - adjustment @@ -1902,22 +1905,8 @@ static float SND_calculateRateAdjust(void) { // Fast timescale (proportional): immediate response to buffer level changes float p_term = error * SND_RATE_CONTROL_D; - // Cubic safety boost: ~1.0× at center, 4× at limits - // Only boost proportional term - integral is a learned steady-state offset - // Prevents hitting 0% or 100% during integral learning or after CPU frequency resets - // More responsive in middle range than quartic, catches drift earlier - float distance = fill - 0.5f; - float normalized = distance * 2.0f; // -1 to +1 - float abs_norm = normalized < 0 ? -normalized : normalized; // 0 to 1 - float boost = 1.0f + 3.0f * (abs_norm * abs_norm * abs_norm); // 1.0 to 4.0 - float boosted_p = p_term * boost; - // Slow timescale (integral): persistent offset learned in SND_newFrame() - // Integral is NOT boosted - it's a steady-state correction, not emergency response - float adjustment = boosted_p + snd.rate_integral; - - // Store for diagnostics - snd.last_rate_boost = boost; + float adjustment = p_term + snd.rate_integral; // Invert for our resampler convention (larger ratio = fewer outputs) snd.last_rate_adjust = 1.0f - adjustment; @@ -1927,18 +1916,18 @@ static float SND_calculateRateAdjust(void) { /** * Writes a batch of audio samples to the ring buffer. * - * Two implementations based on sync mode: + * Runtime adaptive behavior based on sync mode: * - * SYNC_MODE_AUDIOCLOCK (audio-driven timing): + * Audio-clock mode (should_block_audio = true): * - Blocks when buffer is full (up to 10ms) * - Audio hardware clock drives emulation timing * - Fixed 1.0 resampling ratio (no dynamic rate control) - * - For devices with unstable vsync + * - For devices with unstable vsync (>1% mismatch) * - * Default (vsync-driven timing): + * Vsync mode (should_block_audio = false): * - Non-blocking with dynamic rate control * - Adjusts pitch ±0.5% to maintain buffer at 50% full - * - For devices with stable vsync + * - For devices with stable vsync (<1% mismatch) * * @param frames Array of audio frames to write * @param frame_count Number of frames in array @@ -1950,112 +1939,111 @@ size_t SND_batchSamples(const SND_Frame* frames, if (snd.frame_count == 0) return 0; -#ifdef SYNC_MODE_AUDIOCLOCK - // ======================================================================== - // AUDIOCLOCK MODE: Blocking writes with audio hardware timing - // ======================================================================== - - SDL_LockAudio(); - - size_t consumed = 0; - while (frame_count > 0) { - int tries = 0; + // Check sync mode via callback (defaults to vsync mode if not set) + bool should_block = snd.should_block_audio && snd.should_block_audio(); + bool should_use_rate_control = + !should_block && (!snd.should_use_rate_control || snd.should_use_rate_control()); - // Wait for audio callback to drain buffer (up to 10ms) - while (tries < 10 && snd.frame_in == snd.frame_filled) { - tries++; - SDL_UnlockAudio(); - SDL_Delay(1); - SDL_LockAudio(); - } + if (should_block) { + // ======================================================================== + // AUDIO-CLOCK MODE: Blocking writes with audio hardware timing + // ======================================================================== - // Write samples with fixed 1.0 ratio (no rate control) - AudioRingBuffer ring = { - .frames = snd.buffer, - .capacity = snd.frame_count, - .write_pos = snd.frame_in, - .read_pos = snd.frame_out, - }; + SDL_LockAudio(); - ResampleResult result = - AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, 1.0f); + size_t consumed = 0; + while (frame_count > 0) { + int tries = 0; - snd.frame_in = ring.write_pos; - snd.samples_in += result.frames_consumed; - snd.samples_written += result.frames_written; + // Wait for audio callback to drain buffer (up to 10ms) + while (tries < 10 && snd.frame_in == snd.frame_filled) { + tries++; + SDL_UnlockAudio(); + SDL_Delay(1); + SDL_LockAudio(); + } - frames += result.frames_consumed; - frame_count -= result.frames_consumed; - consumed += result.frames_consumed; - } + // Write samples with fixed 1.0 ratio (no rate control) + AudioRingBuffer ring = { + .frames = snd.buffer, + .capacity = snd.frame_count, + .write_pos = snd.frame_in, + .read_pos = snd.frame_out, + }; - SDL_UnlockAudio(); - return consumed; + ResampleResult result = + AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, 1.0f); -#else - // ======================================================================== - // VSYNC MODE: Non-blocking with dynamic rate control - // ======================================================================== + snd.frame_in = ring.write_pos; + snd.samples_in += result.frames_consumed; + snd.samples_written += result.frames_written; - SDL_LockAudio(); + frames += result.frames_consumed; + frame_count -= result.frames_consumed; + consumed += result.frames_consumed; + } - // Dynamic rate control per Arntzen paper: adjust resampling ratio based on buffer fill - // Buffer empty → produce more samples (fill up), buffer full → produce fewer (drain) - // The system naturally converges to a stable equilibrium point - float total_adjust = SND_calculateRateAdjust(); + SDL_UnlockAudio(); + return consumed; - // Note: Debug logging moved to player's unified snapshot logging (SND_getSnapshot) + } else { + // ======================================================================== + // VSYNC MODE: Non-blocking with dynamic rate control + // ======================================================================== - // Estimate how many OUTPUT frames we'll produce (may be more than input when upsampling) - int estimated_output = AudioResampler_estimateOutput(&snd.resampler, frame_count, total_adjust); + SDL_LockAudio(); - // Calculate how much space is available in the ring buffer (for diagnostics) - int available; - if (snd.frame_in >= snd.frame_out) { - available = snd.frame_count - (snd.frame_in - snd.frame_out) - 1; - } else { - available = snd.frame_out - snd.frame_in - 1; - } + // Determine resampling ratio + float total_adjust; + if (should_use_rate_control) { + // Dynamic rate control: adjust based on buffer fill + total_adjust = SND_calculateRateAdjust(); - // Warn if buffer is nearly full (indicates rate control failure) - // The resampler will handle buffer full gracefully (partial write + save state) - if (available < estimated_output) { - LOG_warn( - "Audio buffer nearly full: %d available, %d needed (fill=%.0f%%) - rate control may " - "be failing\n", - available, estimated_output, SND_getBufferFillLevel() * 100.0f); - } + // Track cumulative adjust for diagnostics + snd.cumulative_total_adjust += total_adjust; + snd.total_adjust_count++; + } else { + // No rate control: fixed 1.0 ratio + total_adjust = 1.0f; + } - // Set up ring buffer wrapper for the resampler - AudioRingBuffer ring = { - .frames = snd.buffer, - .capacity = snd.frame_count, - .write_pos = snd.frame_in, - .read_pos = snd.frame_out, - }; + // Estimate output size for diagnostics + int estimated_output = + AudioResampler_estimateOutput(&snd.resampler, frame_count, total_adjust); - // Resample with combined adjustment (base correction + dynamic rate control) - ResampleResult result = - AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, total_adjust); + // Calculate available space + int available; + if (snd.frame_in >= snd.frame_out) { + available = snd.frame_count - (snd.frame_in - snd.frame_out) - 1; + } else { + available = snd.frame_out - snd.frame_in - 1; + } - // Update ring buffer write position - snd.frame_in = ring.write_pos; + // Warn if buffer nearly full + if (available < estimated_output) { + LOG_warn("Audio buffer nearly full: %d available, %d needed (fill=%.0f%%)\n", available, + estimated_output, SND_getBufferFillLevel() * 100.0f); + } - // Track sample flow for diagnostics - snd.samples_in += result.frames_consumed; // Input samples consumed by resampler - snd.samples_written += result.frames_written; // Output samples written to buffer + // Resample into ring buffer + AudioRingBuffer ring = { + .frames = snd.buffer, + .capacity = snd.frame_count, + .write_pos = snd.frame_in, + .read_pos = snd.frame_out, + }; - // Track cumulative total_adjust for window-averaged comparisons - snd.cumulative_total_adjust += total_adjust; - snd.total_adjust_count++; + ResampleResult result = + AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, total_adjust); - // Note: frame_filled is managed by the audio callback (SND_audioCallback) - // to track what has been consumed. We don't update it here. + snd.frame_in = ring.write_pos; + snd.samples_in += result.frames_consumed; + snd.samples_written += result.frames_written; - SDL_UnlockAudio(); + SDL_UnlockAudio(); - return result.frames_consumed; -#endif + return result.frames_consumed; + } } /** @@ -2084,9 +2072,17 @@ void SND_init(double sample_rate, double frame_rate) { // plat_sound_init LOG_debug("Current audio driver: %s\n", SDL_GetCurrentAudioDriver()); #endif + // Preserve sync mode callbacks across reinit + SND_SyncCallback saved_rate_control = snd.should_use_rate_control; + SND_SyncCallback saved_block_audio = snd.should_block_audio; + memset(&snd, 0, sizeof(struct SND_Context)); snd.frame_rate = frame_rate; + // Restore callbacks + snd.should_use_rate_control = saved_rate_control; + snd.should_block_audio = saved_block_audio; + SDL_AudioSpec spec_in; SDL_AudioSpec spec_out; @@ -2166,15 +2162,17 @@ void SND_resetUnderrunCount(void) { * This prevents the integral from accumulating N times when cores use * per-sample audio callbacks (audio_sample_callback instead of batch). * Some cores (e.g., 64-bit snes9x) call audio ~535 times per frame. + * + * No-op in audio-clock mode (no rate control needed). */ void SND_newFrame(void) { -#ifdef SYNC_MODE_AUDIOCLOCK - // No-op in audioclock mode - no rate control needed - return; -#else if (!snd.initialized) return; + // Check if rate control should run (vsync mode only) + if (!snd.should_use_rate_control || !snd.should_use_rate_control()) + return; + SDL_LockAudio(); float fill = SND_getBufferFillLevel(); @@ -2191,7 +2189,6 @@ void SND_newFrame(void) { snd.rate_integral = -SND_INTEGRAL_CLAMP; SDL_UnlockAudio(); -#endif } /** @@ -2227,7 +2224,6 @@ SND_Snapshot SND_getSnapshot(void) { snap.rate_adjust = snd.last_rate_adjust; snap.total_adjust = snd.last_rate_adjust; snap.rate_integral = snd.rate_integral; - snap.rate_boost = snd.last_rate_boost; snap.rate_control_d = SND_RATE_CONTROL_D; snap.rate_control_ki = SND_RATE_CONTROL_KI; snap.error_avg = snd.error_avg; @@ -2349,6 +2345,21 @@ void SND_setMinLatency(unsigned latency_ms) { SDL_UnlockAudio(); } +/** + * Configure sync mode callbacks for runtime adaptive behavior. + * + * The audio system uses these callbacks to adapt its behavior based on + * the current sync mode (audio-clock vs vsync). + * + * @param should_use_rate_control Callback returning true if audio rate control should run + * @param should_block_audio Callback returning true if audio writes should block + */ +void SND_setSyncCallbacks(SND_SyncCallback should_use_rate_control, + SND_SyncCallback should_block_audio) { + snd.should_use_rate_control = should_use_rate_control; + snd.should_block_audio = should_block_audio; +} + /////////////////////////////// // Input - Lid detection (clamshell devices) /////////////////////////////// diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index 4bfee1df..f41095a1 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -854,6 +854,27 @@ void SND_resetUnderrunCount(void); */ void SND_newFrame(void); +/** + * Callback type for sync mode queries. + * + * Used by audio system to query sync manager for runtime mode decisions. + * + * @return true if the feature should be enabled, false otherwise + */ +typedef bool (*SND_SyncCallback)(void); + +/** + * Configure sync mode callbacks for runtime adaptive behavior. + * + * The audio system uses these callbacks to adapt its behavior based on + * the current sync mode (audio-clock vs vsync). + * + * @param should_use_rate_control Callback returning true if audio rate control should run + * @param should_block_audio Callback returning true if audio writes should block + */ +void SND_setSyncCallbacks(SND_SyncCallback should_use_rate_control, + SND_SyncCallback should_block_audio); + /** * Shuts down the audio subsystem. */ @@ -896,7 +917,6 @@ typedef struct { float rate_adjust; // Dynamic rate control adjustment (1.0 ± d) float total_adjust; // Same as rate_adjust (no separate corrections) float rate_integral; // PI controller integral term (drift correction) - float rate_boost; // Cubic safety boost multiplier (1.0 at center, up to 4.0 at limits) float rate_control_d; // Proportional gain float rate_control_ki; // Integral gain float error_avg; // Smoothed error (for debugging integral behavior) diff --git a/workspace/all/common/api_types.h b/workspace/all/common/api_types.h index 1fd7169e..dc008fde 100644 --- a/workspace/all/common/api_types.h +++ b/workspace/all/common/api_types.h @@ -8,6 +8,7 @@ #ifndef __API_TYPES_H__ #define __API_TYPES_H__ +#include #include /////////////////////////////// diff --git a/workspace/all/common/cpu.h b/workspace/all/common/cpu.h index 36f059d1..93c5b831 100644 --- a/workspace/all/common/cpu.h +++ b/workspace/all/common/cpu.h @@ -55,6 +55,9 @@ #define CPU_PANIC_GRACE_FRAMES 60 // Frames to ignore underruns after freq change (~1s at 60fps) #define CPU_PANIC_GRACE_MAX_UNDERRUNS 5 // Max underruns during grace before panic anyway #define CPU_STABILITY_DECAY_WINDOWS 8 // Stable windows before decaying panic counts (~4s) +#define CPU_AUDIO_CLOCK_REDUCE_WINDOWS \ + 8 // Windows before reduce in audio-clock mode (~4s) \ + // Higher than normal (4) since util metrics are unreliable #define CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE 40 // Min audio buffer % to allow reduce /** diff --git a/workspace/all/player/Makefile b/workspace/all/player/Makefile index 45356f5f..9ba61f33 100644 --- a/workspace/all/player/Makefile +++ b/workspace/all/player/Makefile @@ -31,7 +31,7 @@ SOURCE = $(TARGET).c ../common/scaler.c ../common/utils.c ../common/nointro_pars player_video_convert.c player_rotation.c player_config.c player_context.c \ player_menu.c player_env.c player_game.c player_scaler.c player_core.c \ ../common/gl_video.c \ - frame_pacer.c \ + sync_manager.c \ ../../$(PLATFORM)/platform/platform.c # Add shared rendering modules diff --git a/workspace/all/player/frame_pacer.c b/workspace/all/player/frame_pacer.c deleted file mode 100644 index 726e6240..00000000 --- a/workspace/all/player/frame_pacer.c +++ /dev/null @@ -1,170 +0,0 @@ -/** - * frame_pacer.c - Display-agnostic frame pacing implementation - * - * Uses Q16.16 fixed-point arithmetic for precision without float drift. - * Q16.16 means: 16 bits integer, 16 bits fraction (multiply by 65536). - */ - -#include "frame_pacer.h" -#include "log.h" -#include "utils.h" // For getMicroseconds -#include - -// Platform function we need - declared in api.h but we avoid including it -// to keep frame_pacer testable without SDL dependencies -extern double PLAT_getDisplayHz(void); - -// Q16.16 conversion factor -#define Q16_SHIFT 16 -#define Q16_SCALE 65536.0 - -void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz) { - // Fallback to 60Hz if display_hz detection failed - if (display_hz <= 0.0) { - display_hz = 60.0; - } - - // Store original game fps for potential reinit with measured Hz - pacer->game_fps = game_fps; - - // Convert to Q16.16 fixed-point for precise integer math - // 59.73fps becomes 3,913,359 (59.73 * 65536) - pacer->game_fps_q16 = (int32_t)(game_fps * Q16_SCALE); - pacer->display_hz_q16 = (int32_t)(display_hz * Q16_SCALE); - - // Initialize accumulator to display_hz so first vsync triggers a step - // This avoids showing a black/stale frame on startup - pacer->accumulator = pacer->display_hz_q16; - - // Initialize vsync measurement state - pacer->last_vsync_time = 0; - pacer->measured_hz = 0.0; - pacer->vsync_samples = 0; - - // Direct mode if rates are within tolerance - // This handles 59.94fps @ 60Hz, etc. - double diff = fabs(game_fps - display_hz) / display_hz; - pacer->direct_mode = (diff < FRAME_PACER_TOLERANCE); -} - -bool FramePacer_step(FramePacer* pacer) { - // Direct mode: always step - if (pacer->direct_mode) { - return true; - } - - // Bresenham accumulator: check threshold THEN add - // Since we initialized to display_hz, first call will step - if (pacer->accumulator >= pacer->display_hz_q16) { - pacer->accumulator -= pacer->display_hz_q16; - pacer->accumulator += pacer->game_fps_q16; - return true; - } - - // Not enough accumulated - repeat frame - pacer->accumulator += pacer->game_fps_q16; - return false; -} - -void FramePacer_reset(FramePacer* pacer) { - // Reset to display_hz so next vsync triggers a step - pacer->accumulator = pacer->display_hz_q16; -} - -bool FramePacer_isDirectMode(const FramePacer* pacer) { - return pacer->direct_mode; -} - -double FramePacer_getDisplayHz(void) { - // Use platform-provided display Hz directly. - // On SDL2 platforms, this queries SDL_GetCurrentDisplayMode(). - // On SDL1 or platforms where SDL doesn't know, this returns a hardcoded value. - return PLAT_getDisplayHz(); -} - -// Smoothing factor for EMA: 0.01 = very smooth (100 frame time constant) -// Lower values = more stable but slower to converge -#define VSYNC_EMA_ALPHA 0.01 - -// Minimum Hz to accept (reject outliers from frame drops) -#define VSYNC_MIN_HZ 50.0 -// Maximum Hz to accept (reject outliers from fast presents) -#define VSYNC_MAX_HZ 120.0 - -void FramePacer_recordVsync(FramePacer* pacer) { - uint64_t now = getMicroseconds(); - - if (pacer->last_vsync_time > 0) { - // Calculate interval in seconds (getMicroseconds returns µs) - double interval = (double)(now - pacer->last_vsync_time) / 1000000.0; - - // Convert to Hz - double hz = 1.0 / interval; - - // Reject outliers (frame drops, fast presents, etc.) - if (hz >= VSYNC_MIN_HZ && hz <= VSYNC_MAX_HZ) { - pacer->vsync_samples++; - - if (pacer->measured_hz == 0.0) { - // First sample: initialize directly - pacer->measured_hz = hz; - } else { - // Exponential moving average for stability - pacer->measured_hz = - pacer->measured_hz * (1.0 - VSYNC_EMA_ALPHA) + hz * VSYNC_EMA_ALPHA; - } - - // Log when measurement becomes stable - if (pacer->vsync_samples == FRAME_PACER_VSYNC_WARMUP) { - double reported_hz = PLAT_getDisplayHz(); - LOG_info("Vsync measurement stable: %.3fHz (reported: %.1fHz, diff: %.2f%%)\n", - pacer->measured_hz, reported_hz, - fabs(pacer->measured_hz - reported_hz) / reported_hz * 100.0); - } - - // Check for drift and reinit if needed (both at warmup and periodically after) - // Check every 300 samples after warmup to catch drift - if (pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP && - (pacer->vsync_samples == FRAME_PACER_VSYNC_WARMUP || - pacer->vsync_samples % 300 == 0)) { - double current_hz = pacer->display_hz_q16 / Q16_SCALE; - double diff = fabs(pacer->measured_hz - current_hz) / current_hz; - if (diff > 0.001) { // >0.1% difference - LOG_info("Display Hz drift detected: %.3f -> %.3f (%.2f%% change)\n", - current_hz, pacer->measured_hz, diff * 100.0); - - // Update display Hz in Q16.16 - pacer->display_hz_q16 = (int32_t)(pacer->measured_hz * Q16_SCALE); - - // Reset accumulator to new display_hz to avoid frame skip glitches - // When Hz changes, the old accumulator state is invalid - pacer->accumulator = pacer->display_hz_q16; - - // Re-evaluate direct mode with new Hz - double fps_diff = - fabs(pacer->game_fps - pacer->measured_hz) / pacer->measured_hz; - bool should_be_direct = (fps_diff < FRAME_PACER_TOLERANCE); - if (pacer->direct_mode != should_be_direct) { - LOG_info("Frame pacer mode changed: %s -> %s\n", - pacer->direct_mode ? "direct" : "paced", - should_be_direct ? "direct" : "paced"); - pacer->direct_mode = should_be_direct; - } - } - } - } - } - - pacer->last_vsync_time = now; -} - -double FramePacer_getMeasuredHz(const FramePacer* pacer) { - if (pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP) { - return pacer->measured_hz; - } - return 0.0; // Not enough samples yet -} - -bool FramePacer_isMeasurementStable(const FramePacer* pacer) { - return pacer->vsync_samples >= FRAME_PACER_VSYNC_WARMUP; -} diff --git a/workspace/all/player/frame_pacer.h b/workspace/all/player/frame_pacer.h deleted file mode 100644 index c4585c08..00000000 --- a/workspace/all/player/frame_pacer.h +++ /dev/null @@ -1,159 +0,0 @@ -/** - * frame_pacer.h - Display-agnostic frame pacing - * - * Decouples emulation timing from display refresh rate using a Bresenham-style - * fixed-point accumulator. Determines each vsync whether to step emulation or - * repeat the previous frame. - * - * Example: 60fps game on 72Hz display - * - Vsync 1: acc >= hz -> step, acc -= hz (first frame always steps) - * - Vsync 2: acc < hz -> repeat - * - Vsync 3: acc >= hz -> step, acc -= hz - * - Result: 5 steps per 6 vsyncs (83.3%) = 60fps - * - * Benefits: - * - Q16.16 fixed-point: no floating-point drift, stable forever - * - Direct mode bypass: zero overhead on 60Hz displays - * - Pure functions: fully testable, no SDL/globals - */ - -#ifndef __FRAME_PACER_H__ -#define __FRAME_PACER_H__ - -#include -#include - -/** - * Tolerance for "exact match" detection (direct mode bypass). - * - * Based on RetroArch's dynamic rate control research (Arntzen, 2012): - * - Audio pitch changes ≤0.5% are inaudible to most listeners - * - RetroArch recommends d = 0.2% to 0.5% for rate control - * - Beyond ~0.5% mismatch, "other methods should be employed" - * - * Using 1% as a compromise - allows direct mode for well-matched displays - * while triggering frame pacing for displays with noticeable drift. - * After vsync measurement, the pacer may switch modes based on actual Hz. - * - * Examples at 1% tolerance: - * - 59.94fps @ 60Hz → direct mode (0.1% diff) ✓ - * - 60.0fps @ 60.5Hz → direct mode (0.83% diff) ✓ - * - 60.0fps @ 61Hz → paced mode (1.6% diff) - * - 50.0fps @ 60Hz → paced mode (16.7% diff) - */ -#define FRAME_PACER_TOLERANCE 0.01 - -/** - * Number of vsync samples before measurement is considered stable. - * At 60Hz, 120 samples = 2 seconds of measurement. - */ -#define FRAME_PACER_VSYNC_WARMUP 120 - -/** - * Frame pacing state. - * - * Uses Q16.16 fixed-point (multiply by 65536) to preserve fractional precision - * while avoiding floating point drift. For example, 59.73fps becomes 3,913,359. - */ -typedef struct { - int32_t game_fps_q16; // Game FPS in Q16.16 fixed-point - int32_t display_hz_q16; // Display Hz in Q16.16 fixed-point - int32_t accumulator; // Bresenham accumulator (Q16.16) - bool direct_mode; // True if fps ~= hz (skip accumulator) - - // Vsync measurement state - uint64_t last_vsync_time; // Performance counter at last vsync - double measured_hz; // Exponential moving average of measured Hz - int vsync_samples; // Number of samples collected - double game_fps; // Original game fps (for reinit) -} FramePacer; - -/** - * Initialize pacer for given game and display rates. - * - * Automatically detects if rates are close enough to use direct mode - * (within FRAME_PACER_TOLERANCE). - * - * Accumulator is initialized to display_hz so the first vsync always - * triggers a step (avoids showing a black/stale frame). - * - * @param pacer Pacer state to initialize - * @param game_fps Game's target FPS (e.g., 60.0, 59.94, 50.0) - * @param display_hz Display refresh rate in Hz (e.g., 60.0, 72.0) - */ -void FramePacer_init(FramePacer* pacer, double game_fps, double display_hz); - -/** - * Call once per vsync. Returns true if emulation should step. - * - * In direct mode, always returns true. - * In paced mode, uses Bresenham accumulator to decide. - * - * @param pacer Pacer state (accumulator will be modified) - * @return true if core.run() should be called, false to repeat last frame - */ -bool FramePacer_step(FramePacer* pacer); - -/** - * Reset accumulator to initial state (display_hz). - * - * Call on game load, state load, or any timing discontinuity. - * Ensures first frame after reset will step. - * - * @param pacer Pacer state to reset - */ -void FramePacer_reset(FramePacer* pacer); - -/** - * Check if pacer is in direct mode. - * - * @param pacer Pacer state - * @return true if direct mode (no pacing needed) - */ -bool FramePacer_isDirectMode(const FramePacer* pacer); - -/** - * Gets display refresh rate for frame pacing. - * - * Calls PLAT_getDisplayHz() which either: - * - Queries SDL_GetCurrentDisplayMode() on SDL2 platforms - * - Returns a hardcoded value for the platform's panel - * - * @return Display Hz (e.g., 60.0, 72.0, 73.0) - */ -double FramePacer_getDisplayHz(void); - -/** - * Record vsync timing after present. - * - * Call this immediately after GFX_present() or SDL_RenderPresent() returns. - * Measures time between vsyncs to determine actual display refresh rate. - * - * After FRAME_PACER_VSYNC_WARMUP samples, the measured Hz becomes stable. - * If measured Hz differs significantly from reported Hz, the pacer - * automatically reinitializes with the measured value. - * - * @param pacer Pacer state to update - */ -void FramePacer_recordVsync(FramePacer* pacer); - -/** - * Get measured display Hz. - * - * Returns the measured refresh rate based on vsync timing. - * Before enough samples are collected, returns 0.0. - * - * @param pacer Pacer state - * @return Measured Hz, or 0.0 if not yet measured - */ -double FramePacer_getMeasuredHz(const FramePacer* pacer); - -/** - * Check if vsync measurement is stable. - * - * @param pacer Pacer state - * @return true if enough samples collected for stable measurement - */ -bool FramePacer_isMeasurementStable(const FramePacer* pacer); - -#endif // __FRAME_PACER_H__ diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 302880f3..429bd0dd 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -57,7 +57,6 @@ #include "../common/cpu.h" #include "api.h" #include "defines.h" -#include "frame_pacer.h" #include "gl_video.h" #include "launcher_file_utils.h" #include "libretro.h" @@ -82,6 +81,7 @@ #include "player_video_convert.h" #include "render_common.h" #include "scaler.h" +#include "sync_manager.h" #include "utils.h" /////////////////////////////////////// @@ -157,10 +157,11 @@ static CPUState auto_cpu_state; static CPUConfig auto_cpu_config; static uint64_t auto_cpu_last_frame_start = 0; // For measuring core.run() time -// Frame Pacing State -// Decouples emulation from display refresh for non-60Hz displays (e.g., M17 @ 72Hz). -// See frame_pacer.h for algorithm details. -static FramePacer frame_pacer; +// Sync Manager State +// Manages audio/video synchronization mode (audio-clock vs vsync). +// Starts in audio-clock mode (safe), switches to vsync if compatible (<1% Hz mismatch). +// See sync_manager.h for details. +static SyncManager sync_manager; // Background thread for applying CPU changes without blocking main loop static pthread_t auto_cpu_thread; @@ -1269,9 +1270,6 @@ void setOverclock(int i) { } } -// Vsync rate for diagnostics (currently unused, would need measurement to populate) -static float current_vsync_hz = 0; - /** * Updates auto CPU scaling based on frame timing (core.run() execution time). * @@ -1279,15 +1277,23 @@ static float current_vsync_hz = 0; * Uses the 90th percentile of frame execution times to determine CPU utilization, * which directly measures emulation performance independent of audio/display timing. * - * Granular Mode Algorithm: - * - Performance scales linearly with frequency - * - Boost: Jump to predicted optimal frequency (no step limit) - * - Reduce: Limited to max_step_down indices to prevent underruns - * - Panic: Boost by panic_step_up on underrun, with cooldown + * Three scaling modes (selected at init based on hardware capabilities): + * - Topology: Multi-cluster CPUs with PerfStates (big.LITTLE) + * - Granular: Single-cluster with fine-grained frequency steps + * - Fallback: Simple 3-level low/medium/high scaling + * + * All modes use the same basic algorithm: + * - Measure utilization as frame_time / frame_budget + * - Boost after sustained high util (>85% for boost_windows) + * - Reduce after sustained low util (<55% for reduce_windows) + * - Panic boost on audio underrun (immediate, with cooldown) * - * Fallback Mode Algorithm (3 levels): - * - Count consecutive high/low util windows - * - Boost after 2 high-util windows (~1s), reduce after 4 low-util windows (~2s) + * Audio Clock mode special handling: + * In Audio Clock sync mode, blocking audio writes make utilization metrics + * unreliable (frame time includes blocking wait). Instead of util-based + * decisions, we use conservative time-based reduction: after 8 stable windows + * (~4s), step down one level. This prevents wasting power while avoiding + * aggressive changes that could cause underruns. */ static void updateAutoCPU(void) { // Skip if not in auto mode or during special states @@ -1432,9 +1438,6 @@ static void updateAutoCPU(void) { util = 200; // Cap at 200% for sanity } - // Get buffer fill for reduce decisions - unsigned buffer_fill = SND_getBufferOccupancy(); - if (auto_cpu_state.use_topology) { // Topology mode: step through PerfStates one at a time // Unlike granular mode, we don't predict - just step conservatively @@ -1444,7 +1447,20 @@ static void updateAutoCPU(void) { auto_cpu_state.panic_cooldown--; } - if (util > auto_cpu_config.util_high) { + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + auto_cpu_state.low_util_windows++; + if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && + auto_cpu_state.panic_cooldown == 0 && current_state > 0) { + int new_state = current_state - 1; + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE state %d→%d (AC mode)\n", current_state, new_state); + } + } else if (util > auto_cpu_config.util_high) { // Need more performance - step up auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; @@ -1467,8 +1483,7 @@ static void updateAutoCPU(void) { // Only reduce if: enough windows, cooldown expired, buffer healthy int reduce_ok = (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && - (auto_cpu_state.panic_cooldown == 0) && (current_state > 0) && - (buffer_fill >= auto_cpu_config.min_buffer_for_reduce); + (auto_cpu_state.panic_cooldown == 0) && (current_state > 0); if (reduce_ok) { // Step down by max_step_down (usually 1) @@ -1478,8 +1493,8 @@ static void updateAutoCPU(void) { auto_cpu_setTargetState(new_state); auto_cpu_state.low_util_windows = 0; // No grace period on reduce - if we underrun, frequency is too slow - LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%% buf=%u%%)\n", current_state, - new_state, util, buffer_fill); + LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, + new_state, util); } } else { // In sweet spot - reset counters @@ -1492,15 +1507,13 @@ static void updateAutoCPU(void) { if (++debug_window_count_topo >= 4) { debug_window_count_topo = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug( - "Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% state=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, util, - current_state, max_state); + LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% state=%d/%d\n", + snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_state, + max_state); } } else if (auto_cpu_state.use_granular) { - // Granular mode: use linear performance scaling to find optimal frequency - // Performance scales linearly with frequency, so: - // new_util = current_util * (current_freq / new_freq) + // Granular mode: step through available frequencies one at a time + // Skips frequencies that have caused repeated underruns (panic-blocked) int current_freq = auto_cpu_state.frequencies[current_idx]; @@ -1509,7 +1522,30 @@ static void updateAutoCPU(void) { auto_cpu_state.panic_cooldown--; } - if (util > auto_cpu_config.util_high) { + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + auto_cpu_state.low_util_windows++; + if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && + auto_cpu_state.panic_cooldown == 0 && current_idx > 0) { + int new_idx = current_idx - 1; + // Skip blocked frequencies + while (new_idx >= 0 && + auto_cpu_state.panic_count[new_idx] >= CPU_PANIC_THRESHOLD) { + new_idx--; + } + if (new_idx >= 0) { + int new_freq = auto_cpu_state.frequencies[new_idx]; + auto_cpu_setTargetIndex(new_idx); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE %d→%d kHz (AC mode)\n", current_freq, new_freq); + } else { + auto_cpu_state.low_util_windows = 0; + } + } + } else if (util > auto_cpu_config.util_high) { // Need more performance - step up auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; @@ -1533,8 +1569,7 @@ static void updateAutoCPU(void) { // Only reduce if: enough windows, cooldown expired, buffer healthy int reduce_ok = (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows) && - (auto_cpu_state.panic_cooldown == 0) && (current_idx > 0) && - (buffer_fill >= auto_cpu_config.min_buffer_for_reduce); + (auto_cpu_state.panic_cooldown == 0) && (current_idx > 0); if (reduce_ok) { // Step down by 1 - simple and predictable @@ -1554,8 +1589,8 @@ static void updateAutoCPU(void) { auto_cpu_setTargetIndex(new_idx); auto_cpu_state.low_util_windows = 0; // No grace period on reduce - if we underrun, frequency is too slow - LOG_debug("Auto CPU: REDUCE %d→%d kHz (util=%u%% buf=%u%%)\n", current_freq, - new_freq, util, buffer_fill); + LOG_debug("Auto CPU: REDUCE %d→%d kHz (util=%u%%)\n", current_freq, + new_freq, util); } } } else { @@ -1569,10 +1604,10 @@ static void updateAutoCPU(void) { if (++debug_window_count >= 4) { debug_window_count = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% freq=%dkHz " + LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% freq=%dkHz " "idx=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, - util, current_freq, current_idx, max_idx); + snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_freq, + current_idx, max_idx); } } else { // Fallback mode: 3-level scaling (original algorithm) @@ -1582,7 +1617,20 @@ static void updateAutoCPU(void) { auto_cpu_state.panic_cooldown--; } - if (util > auto_cpu_config.util_high) { + // Check if we're in Audio Clock mode (blocking audio makes util unreliable) + bool in_audio_clock = (SyncManager_getMode(&sync_manager) == SYNC_MODE_AUDIO_CLOCK); + + if (in_audio_clock) { + // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + auto_cpu_state.low_util_windows++; + if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && + auto_cpu_state.panic_cooldown == 0 && current_level > 0) { + int new_level = current_level - 1; + auto_cpu_setTargetLevel(new_level); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE level %d (AC mode)\n", new_level); + } + } else if (util > auto_cpu_config.util_high) { auto_cpu_state.high_util_windows++; auto_cpu_state.low_util_windows = 0; } else if (util < auto_cpu_config.util_low) { @@ -1598,9 +1646,9 @@ static void updateAutoCPU(void) { if (++debug_window_count_fallback >= 4) { debug_window_count_fallback = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f boost=%.2f adj=%.4f util=%u%% level=%d\n", - snap.fill_pct, snap.rate_integral, snap.rate_boost, snap.total_adjust, - util, current_level); + LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% level=%d\n", + snap.fill_pct, snap.rate_integral, snap.total_adjust, util, + current_level); } // Boost if sustained high utilization @@ -1616,14 +1664,12 @@ static void updateAutoCPU(void) { // Reduce if sustained low utilization, buffer healthy (respects panic cooldown) if (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows && - auto_cpu_state.panic_cooldown == 0 && current_level > 0 && - buffer_fill >= auto_cpu_config.min_buffer_for_reduce) { + auto_cpu_state.panic_cooldown == 0 && current_level > 0) { int new_level = current_level - 1; auto_cpu_setTargetLevel(new_level); auto_cpu_state.low_util_windows = 0; // No grace period on reduce - if we underrun, frequency is too slow - LOG_debug("Auto CPU: REDUCE level %d (util=%u%% buf=%u%%)\n", new_level, util, - buffer_fill); + LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); } } @@ -3968,16 +4014,16 @@ static void generateDebugHUDText(DebugHUDText* text, int src_w, int src_h, int s } // Top-left: FPS, sync mode, and rate control adjustment - // Modes: AC = audio clock, VS = vsync direct (fps≈hz), FP = frame paced (fps≠hz) + // Modes: AC = audio clock, VS = vsync with rate control // Rate adjustment shows audio stretch: >1.0 = running fast, <1.0 = running slow float rate_pct = (rate_adj_display - 1.0f) * 100.0f; -#ifdef SYNC_MODE_AUDIOCLOCK - (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f AC", fps_double); -#else - const char* sync_mode = FramePacer_isDirectMode(&frame_pacer) ? "VS" : "FP"; - (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f %s %+.1f%%", fps_double, sync_mode, - rate_pct); -#endif + SyncMode current_mode = SyncManager_getMode(&sync_manager); + if (current_mode == SYNC_MODE_AUDIO_CLOCK) { + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f AC", fps_double); + } else { + (void)snprintf(text->top_left, sizeof(text->top_left), "%.1f VS %+.1f%%", fps_double, + rate_pct); + } // Top-right: Source resolution (void)snprintf(text->top_right, sizeof(text->top_right), "%ix%i", src_w, src_h); @@ -5922,7 +5968,7 @@ static void Menu_saveState(void) { static void Menu_loadState(void) { PlayerMenu_loadState(PlayerContext_get()); - FramePacer_reset(&frame_pacer); // Reset accumulator after state load + // Note: Sync manager doesn't need reset after state load (no persistent accumulator) } static void Menu_scale(SDL_Surface* src, SDL_Surface* dst) { @@ -6195,12 +6241,118 @@ static void showFatalError(void) { } } -// Main loop implementation selected at compile-time based on sync mode -#ifdef SYNC_MODE_AUDIOCLOCK -#include "player_loop_audioclock.inc" -#else -#include "player_loop_vsync.inc" -#endif +// Sync mode callbacks for audio system +static bool sync_shouldUseRateControl(void) { + return SyncManager_shouldUseRateControl(&sync_manager); +} + +static bool sync_shouldBlockAudio(void) { + return SyncManager_shouldBlockAudio(&sync_manager); +} + +/** + * Unified main loop with runtime-adaptive sync mode. + */ +static void run_main_loop(void) { + double display_hz = PLAT_getDisplayHz(); + SyncManager_init(&sync_manager, core.fps, display_hz); + SND_setSyncCallbacks(sync_shouldUseRateControl, sync_shouldBlockAudio); + + LOG_info("Starting main loop: %.2ffps @ %.1fHz (mode: %s)\n", core.fps, display_hz, + SyncManager_getModeName(SyncManager_getMode(&sync_manager))); + + PWR_warn(1); + PWR_disableAutosleep(); + + GFX_clearAll(); + GFX_present(NULL); + + LOG_debug("Special_init"); + Special_init(); + + LOG_debug("Entering main loop"); + sec_start = SDL_GetTicks(); + + while (!quit) { + GFX_startFrame(); + input_polled_this_frame = 0; + + int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; + + for (int run = 0; run < runs_this_vsync; run++) { + bool should_run_core = + !show_menu && + ((run == 0) ? (fast_forward || SyncManager_shouldRunCore(&sync_manager)) + : fast_forward); + + if (should_run_core) { + if (video_state.frame_time_cb) { + retro_usec_t frame_now = getMicroseconds(); + retro_usec_t delta; + if (fast_forward) { + delta = video_state.frame_time_ref; + } else { + if (video_state.frame_time_last == 0) { + delta = video_state.frame_time_ref; + } else { + delta = frame_now - video_state.frame_time_last; + } + video_state.frame_time_last = frame_now; + } + video_state.frame_time_cb(delta); + } + + if (core.audio_buffer_status) { + if (fast_forward) { + core.audio_buffer_status(false, 0, false); + } else { + unsigned occupancy = SND_getBufferOccupancy(); + core.audio_buffer_status(true, occupancy, occupancy < 25); + } + } + + if (!fast_forward) { + SND_newFrame(); + } + + uint64_t frame_start = getMicroseconds(); + GLVideo_bindFBO(); + core.run(); + uint64_t frame_time = getMicroseconds() - frame_start; + + if (overclock == 3 && !fast_forward && !show_menu) { + auto_cpu_state + .frame_times[auto_cpu_state.frame_time_index % CPU_FRAME_BUFFER_SIZE] = + frame_time; + auto_cpu_state.frame_time_index++; + } + } + } + + if (!GLVideo_isEnabled()) { + GFX_present(&renderer); + frame_ready_for_flip = 0; + } + + SyncManager_recordVsync(&sync_manager); + + limitFF(); + trackFPS(); + updateAutoCPU(); + + input_poll_callback(); + + if (show_menu) { + Menu_loop(); + + if (GLVideo_isEnabled()) { + GLVideo_bindFBO(); + } + } + + hdmimon(); + } +} int main(int argc, char* argv[]) { // Initialize logging early (reads LOG_FILE and LOG_SYNC from environment) diff --git a/workspace/all/player/player_loop_audioclock.inc b/workspace/all/player/player_loop_audioclock.inc deleted file mode 100644 index c2eafb2b..00000000 --- a/workspace/all/player/player_loop_audioclock.inc +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Audio-driven main loop with blocking audio writes. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is defined. - * - * Timing approach: - * - Audio hardware clock is the timing source - * - Core runs every loop iteration (no frame pacing) - * - SND_batchSamples() blocks when buffer is full (up to 10ms) - * - Audio callback drains buffer at hardware audio rate - * - Natural backpressure from audio blocking rate-limits emulation - * - * For devices with unstable vsync (e.g., M17). - */ - -static void run_main_loop(void) { - LOG_info("Using audioclock sync mode (audio-driven timing)\n"); - - PWR_warn(1); - PWR_disableAutosleep(); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (audioclock mode)"); - sec_start = SDL_GetTicks(); - uint32_t last_ff_flip = 0; - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Always run core - audio blocking in SND_batchSamples() handles timing. - // When audio buffer is full, the core will block (up to 10ms) waiting for - // the audio callback to consume samples. This naturally rate-limits emulation - // to match the audio hardware clock. - // - // During fast-forward: audio is skipped (no blocking), limitFF() controls speed. - - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Note: SND_newFrame() is not called here because audioclock mode uses - // blocking audio writes for timing, not the PI rate controller. - - // Measure frame execution time for auto CPU scaling - // Don't run core while menu is active - uint64_t frame_start = getMicroseconds(); - if (!show_menu) { - GLVideo_bindFBO(); - core.run(); - } - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state - .frame_times[auto_cpu_state.frame_time_index % CPU_FRAME_BUFFER_SIZE] = - frame_time; - auto_cpu_state.frame_time_index++; - } - - // Present frame - // During FF: throttle vsync to avoid blocking (limitFF controls speed instead) - // Normal: present every frame (may or may not wait for vsync depending on platform) - // Skip for HW rendering - frame already presented via GLVideo_present() - if (!GLVideo_isEnabled()) { - if (fast_forward) { - uint32_t now = SDL_GetTicks(); - if (now - last_ff_flip >= 30) { // Visual update every 30ms - GFX_present(&renderer); - frame_ready_for_flip = 0; - last_ff_flip = now; - } - } else { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - } - - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - LOG_debug("Main loop: show_menu=1, entering Menu_loop"); - Menu_loop(); - LOG_debug("Main loop: returned from Menu_loop"); - } - - hdmimon(); - } -} diff --git a/workspace/all/player/player_loop_vsync.inc b/workspace/all/player/player_loop_vsync.inc deleted file mode 100644 index 07bfd253..00000000 --- a/workspace/all/player/player_loop_vsync.inc +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Vsync-driven main loop with frame pacing and audio rate control. - * - * This loop is included by player.c when SYNC_MODE_AUDIOCLOCK is not defined. - * - * Timing approach: - * - Display vsync is the timing source (GFX_present blocks until vsync) - * - Frame pacer uses Bresenham accumulator to decide when to step emulation - * - Audio rate control maintains buffer at 50% using dual-timescale PI controller - * - Works with any display Hz / game fps mismatch - * - * For devices with stable vsync. - */ - -static void run_main_loop(void) { - // Initialize frame pacer with display Hz - double display_hz = FramePacer_getDisplayHz(); - FramePacer_init(&frame_pacer, core.fps, display_hz); - LOG_info("Frame pacer: %.2ffps @ %.2fHz (%s) [Q16: %d/%d]\n", core.fps, display_hz, - FramePacer_isDirectMode(&frame_pacer) ? "direct" : "vsync-driven Bresenham", - frame_pacer.game_fps_q16, frame_pacer.display_hz_q16); - - // Keep audio rate control ENABLED with frame pacing. - // The integral term is slow enough (300-frame average) that it won't fight - // the per-frame step/repeat pattern. It learns the average vsync rate and - // compensates for hardware drift (e.g., 58.7Hz vs 60Hz nominal). - // Without rate control, vsync variance causes continuous audio underruns. - - PWR_warn(1); - PWR_disableAutosleep(); - - // force a vsync immediately before loop - // for better frame pacing? - GFX_clearAll(); - GFX_present(NULL); - - LOG_debug("Special_init"); - Special_init(); // after config - - LOG_debug("Entering main loop (vsync mode)"); - sec_start = SDL_GetTicks(); - while (!quit) { - GFX_startFrame(); - input_polled_this_frame = 0; // Reset at start of frame - - // Frame pacing: Bresenham accumulator decides whether to run core this vsync. - // Vsync (from GFX_present) is the timing source - each loop iteration = one display refresh. - // Core runs at its natural rate (e.g., 60fps), display refreshes at panel Hz (e.g., 72Hz). - // When display Hz > game fps: some frames are repeated (re-presented). - // - // During fast-forward: run core multiple times per vsync to achieve speedup. - // max_ff_speed: 0=2x, 1=3x, 2=4x, 3=5x - int runs_this_vsync = fast_forward ? (max_ff_speed + 2) : 1; - - for (int run = 0; run < runs_this_vsync; run++) { - // First run uses frame pacer, subsequent FF runs always execute - // Don't run core while menu is active - bool should_run_core = !show_menu && ((run == 0) ? (fast_forward || FramePacer_step(&frame_pacer)) - : fast_forward); - - if (should_run_core) { - // Call frame time callback if registered (per libretro spec) - if (video_state.frame_time_cb) { - retro_usec_t frame_now = getMicroseconds(); - retro_usec_t delta; - if (fast_forward) { - // Use reference time during FF, don't update frame_time_last - // to avoid timing discontinuity when FF ends - delta = video_state.frame_time_ref; - } else { - if (video_state.frame_time_last == 0) { - delta = video_state.frame_time_ref; - } else { - delta = frame_now - video_state.frame_time_last; - } - video_state.frame_time_last = frame_now; - } - video_state.frame_time_cb(delta); - } - - // Report audio buffer status to core for frameskip decisions - // During FF, report audio inactive (no output during FF) - if (core.audio_buffer_status) { - if (fast_forward) { - core.audio_buffer_status(false, 0, false); - } else { - unsigned occupancy = SND_getBufferOccupancy(); - core.audio_buffer_status(true, occupancy, occupancy < 25); - } - } - - // Update audio rate control integral (once per frame) - // Skip during FF to prevent integral windup (audio is skipped) - if (!fast_forward) { - SND_newFrame(); - } - - // Measure frame execution time for auto CPU scaling - uint64_t frame_start = getMicroseconds(); - GLVideo_bindFBO(); - core.run(); - uint64_t frame_time = getMicroseconds() - frame_start; - - // Store frame time for auto CPU scaling analysis - if (overclock == 3 && !fast_forward && !show_menu) { - auto_cpu_state.frame_times[auto_cpu_state.frame_time_index % - CPU_FRAME_BUFFER_SIZE] = frame_time; - auto_cpu_state.frame_time_index++; - } - } - } - - // Always present for vsync timing - when !should_run_core, re-presents previous frame - // Skip for HW rendering - frame already presented via GLVideo_present() - if (!GLVideo_isEnabled()) { - GFX_present(&renderer); - frame_ready_for_flip = 0; - } - - // Record vsync timing for Hz measurement (must be right after present) - FramePacer_recordVsync(&frame_pacer); - - // Track performance (only once per vsync, not per FF run) - limitFF(); - trackFPS(); - updateAutoCPU(); - - // Fallback input poll - ensures MENU button and shortcuts work even when - // core doesn't call input_poll_callback (e.g., showing error screens). - // Guard inside callback prevents double execution. - input_poll_callback(); - - if (show_menu) { - Menu_loop(); - - // Rebind FBO for HW cores after menu (menu uses FBO 0) - if (GLVideo_isEnabled()) { - GLVideo_bindFBO(); - } - } - - hdmimon(); - } -} diff --git a/workspace/all/player/sync_manager.c b/workspace/all/player/sync_manager.c new file mode 100644 index 00000000..589cc873 --- /dev/null +++ b/workspace/all/player/sync_manager.c @@ -0,0 +1,171 @@ +/** + * sync_manager.c - Audio/video synchronization mode management implementation + */ + +#include "sync_manager.h" +#include "log.h" +#include "utils.h" // getMicroseconds +#include + +// Number of vsync samples before measurement is considered stable +// 120 samples (~2s at 60Hz): Long enough for EMA to converge and filter +// initial jitter, short enough that users don't notice startup delay +#define SYNC_WARMUP_SAMPLES 120 + +// Check for drift every 300 frames (~5 seconds at 60fps) +// Balance between responsiveness to actual drift and avoiding false positives +// from temporary frame drops or CPU frequency transitions +#define SYNC_DRIFT_CHECK_INTERVAL 300 + +// Tolerance for mode selection (1% mismatch) +// Based on RetroArch research (Arntzen, 2012): +// - Audio pitch changes ≤0.5% are inaudible to most listeners +// - Beyond ~1% mismatch, frame pacing or audio-clock should be used +// Using 1% as a conservative threshold for mode switching +#define SYNC_MODE_TOLERANCE 0.01 + +// Exponential moving average smoothing factor +// α=0.01 gives ~100-sample half-life: filters frame-drop spikes while +// still tracking genuine Hz drift within ~2 seconds. Lower would be more +// stable but slower to detect drift; higher would be noisier. +#define SYNC_EMA_ALPHA 0.01 + +// Outlier rejection bounds (50-120 Hz) +#define SYNC_MIN_HZ 50.0 +#define SYNC_MAX_HZ 120.0 + +void SyncManager_init(SyncManager* manager, double game_fps, double display_hz) { + // Start in AUDIO_CLOCK mode (safe default) + manager->mode = SYNC_MODE_AUDIO_CLOCK; + manager->game_fps = game_fps; + manager->display_hz = (display_hz > 0.0) ? display_hz : 60.0; + manager->measured_hz = 0.0; + manager->measurement_samples = 0; + manager->measurement_stable = false; + manager->last_drift_check = 0; + manager->last_vsync_time = 0; + + LOG_info("Sync: Starting in %s mode (%.2ffps @ %.1fHz reported)", + SyncManager_getModeName(manager->mode), manager->game_fps, manager->display_hz); +} + +void SyncManager_recordVsync(SyncManager* manager) { + uint64_t now = getMicroseconds(); + + // First call - just record timestamp + if (manager->last_vsync_time == 0) { + manager->last_vsync_time = now; + return; + } + + // Calculate interval and Hz + double interval = (double)(now - manager->last_vsync_time) / 1000000.0; + + // Protect against division by zero (identical timestamps) + if (interval <= 0.0) { + manager->last_vsync_time = now; + return; + } + + double hz = 1.0 / interval; + + // Reject outliers (frame drops, fast presents) + if (hz < SYNC_MIN_HZ || hz > SYNC_MAX_HZ) { + manager->last_vsync_time = now; + return; + } + + // Update measured Hz using exponential moving average + if (manager->measured_hz == 0.0) { + manager->measured_hz = hz; // First valid sample + } else { + manager->measured_hz = manager->measured_hz * (1.0 - SYNC_EMA_ALPHA) + hz * SYNC_EMA_ALPHA; + } + + manager->measurement_samples++; + manager->last_vsync_time = now; + + // Check if measurement just became stable + if (!manager->measurement_stable && manager->measurement_samples >= SYNC_WARMUP_SAMPLES) { + manager->measurement_stable = true; + + LOG_info( + "Sync: Measurement stable after %d samples: %.3fHz (reported: %.1fHz, diff: %.2f%%)", + manager->measurement_samples, manager->measured_hz, manager->display_hz, + fabs(manager->measured_hz - manager->display_hz) / manager->display_hz * 100.0); + + // Try switching to vsync mode if compatible + double mismatch = fabs(manager->measured_hz - manager->game_fps) / manager->game_fps; + if (mismatch < SYNC_MODE_TOLERANCE) { + manager->mode = SYNC_MODE_VSYNC; + LOG_info("Sync: Switching to %s mode (%.3fHz within 1%% of %.2ffps)", + SyncManager_getModeName(manager->mode), manager->measured_hz, + manager->game_fps); + } else { + LOG_info("Sync: Staying in %s mode (%.3fHz differs by %.2f%% from %.2ffps)", + SyncManager_getModeName(manager->mode), manager->measured_hz, mismatch * 100.0, + manager->game_fps); + } + } + + // Monitor for drift in vsync mode (check every 5 seconds) + if (manager->measurement_stable && manager->mode == SYNC_MODE_VSYNC) { + manager->last_drift_check++; + + if (manager->last_drift_check >= SYNC_DRIFT_CHECK_INTERVAL) { + manager->last_drift_check = 0; + + // Check if measured Hz has drifted beyond tolerance + double mismatch = fabs(manager->measured_hz - manager->game_fps) / manager->game_fps; + if (mismatch >= SYNC_MODE_TOLERANCE) { + LOG_info("Sync: Drift detected! %.3fHz now differs by %.2f%% from %.2ffps", + manager->measured_hz, mismatch * 100.0, manager->game_fps); + LOG_info("Sync: Switching to %s mode (fallback for unstable display)", + SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); + manager->mode = SYNC_MODE_AUDIO_CLOCK; + } + } + } +} + +bool SyncManager_shouldRunCore(const SyncManager* manager) { + // Always run core every frame in both modes + // AUDIO_CLOCK: blocking audio provides timing + // VSYNC: vsync provides timing + return true; +} + +SyncMode SyncManager_getMode(const SyncManager* manager) { + return manager->mode; +} + +const char* SyncManager_getModeName(SyncMode mode) { + switch (mode) { + case SYNC_MODE_AUDIO_CLOCK: + return "Audio Clock"; + case SYNC_MODE_VSYNC: + return "Vsync"; + default: + return "Unknown"; + } +} + +bool SyncManager_shouldUseRateControl(const SyncManager* manager) { + // Only use rate control in vsync mode + // Audio clock mode uses blocking writes for timing + return manager->mode == SYNC_MODE_VSYNC; +} + +bool SyncManager_shouldBlockAudio(const SyncManager* manager) { + // Only block audio in audio clock mode + // Vsync mode uses non-blocking writes with rate control + return manager->mode == SYNC_MODE_AUDIO_CLOCK; +} + +double SyncManager_getMeasuredHz(const SyncManager* manager) { + return manager->measurement_stable ? manager->measured_hz : 0.0; +} + +bool SyncManager_isMeasurementStable(const SyncManager* manager) { + return manager->measurement_stable; +} diff --git a/workspace/all/player/sync_manager.h b/workspace/all/player/sync_manager.h new file mode 100644 index 00000000..862bb2a8 --- /dev/null +++ b/workspace/all/player/sync_manager.h @@ -0,0 +1,176 @@ +/** + * sync_manager.h - Audio/video synchronization mode management + * + * Manages runtime switching between audio-clock and vsync timing modes. + * + * Strategy: + * - Start in AUDIO_CLOCK (safe, works on all hardware) + * - Measure actual display refresh rate via vsync timing + * - Switch to VSYNC if compatible (< 1% mismatch from game fps) + * - Monitor for drift, fall back to AUDIO_CLOCK if needed + * + * This eliminates the need for: + * - Frame pacing (Bresenham accumulator) - was problematic at >5% mismatch + * - Compile-time SYNC_MODE selection - now runtime adaptive + * - Aggressive audio rate control - only light adjustment in vsync mode + */ + +#ifndef __SYNC_MANAGER_H__ +#define __SYNC_MANAGER_H__ + +#include +#include + +/** + * Synchronization mode determines timing source. + */ +typedef enum { + /** + * Audio-clock mode: Audio hardware drives timing. + * + * Core runs every frame, audio writes block when buffer full. + * Natural backpressure from blocking maintains timing. + * No audio rate control needed. + * + * Benefits: + * - Works with any display refresh rate (no fps/Hz matching needed) + * - Frame duplication instead of frame skipping (less visible) + * - Audio buffer naturally stable (no rate control oscillation) + * + * Used when: + * - Initial startup (safe default) + * - Display Hz mismatch > 1% from game fps + * - Display Hz unstable (drift detected) + */ + SYNC_MODE_AUDIO_CLOCK, + + /** + * Vsync mode: Display vsync drives timing. + * + * GFX_present() blocks until vsync, providing frame timing. + * Core runs every frame (no pacing), light audio rate control + * adjusts pitch ±0.5% to maintain buffer at 50%. + * + * Benefits: + * - Minimal input latency (1 frame) + * - Perfect frame pacing when fps ≈ Hz + * - No frame duplication artifacts + * + * Used when: + * - Display Hz within 1% of game fps + * - Display Hz is stable (no drift) + */ + SYNC_MODE_VSYNC +} SyncMode; + +/** + * Sync manager state. + */ +typedef struct { + SyncMode mode; // Current sync mode + double game_fps; // Game target fps (e.g., 60.0, 59.94) + double display_hz; // Reported display Hz from SDL + double measured_hz; // Actual measured Hz from vsync timing + int measurement_samples; // Number of vsync measurements collected + bool measurement_stable; // True after enough samples collected + uint32_t last_drift_check; // Frames since last drift check (resets at interval) + uint64_t last_vsync_time; // Microsecond timestamp of last vsync +} SyncManager; + +/** + * Initialize sync manager. + * + * Starts in AUDIO_CLOCK mode (safe default). + * Begins vsync measurement in background. + * + * @param manager Manager state to initialize + * @param game_fps Game target fps (e.g., 60.0) + * @param display_hz Display refresh rate from SDL (e.g., 60.0) + */ +void SyncManager_init(SyncManager* manager, double game_fps, double display_hz); + +/** + * Record vsync timing and update sync mode if needed. + * + * Call this immediately after GFX_present() returns. + * Measures actual display refresh rate and switches modes when appropriate. + * + * Mode transitions: + * - AUDIO_CLOCK → VSYNC: After 120 samples if mismatch < 1% + * - VSYNC → AUDIO_CLOCK: If drift > 1% detected + * + * @param manager Manager state to update + */ +void SyncManager_recordVsync(SyncManager* manager); + +/** + * Check if core should run this frame. + * + * AUDIO_CLOCK: Always returns true (core runs every frame) + * VSYNC: Always returns true (core runs every frame, no pacing) + * + * This exists for API consistency and future extensibility. + * + * @param manager Manager state + * @return true if core.run() should be called + */ +bool SyncManager_shouldRunCore(const SyncManager* manager); + +/** + * Get current sync mode. + * + * @param manager Manager state + * @return Current sync mode + */ +SyncMode SyncManager_getMode(const SyncManager* manager); + +/** + * Get mode name for logging/display. + * + * @param mode Sync mode + * @return Human-readable mode name ("Audio Clock" or "Vsync") + */ +const char* SyncManager_getModeName(SyncMode mode); + +/** + * Check if audio rate control should be active. + * + * AUDIO_CLOCK: No rate control (blocking writes handle timing) + * VSYNC: Yes (light rate control for ±0.5% adjustment) + * + * @param manager Manager state + * @return true if audio rate control should run + */ +bool SyncManager_shouldUseRateControl(const SyncManager* manager); + +/** + * Check if audio writes should block. + * + * AUDIO_CLOCK: Yes (blocking provides timing backpressure) + * VSYNC: No (vsync provides timing, audio is just output) + * + * @param manager Manager state + * @return true if SND_batchSamples should block when buffer full + */ +bool SyncManager_shouldBlockAudio(const SyncManager* manager); + +/** + * Get measured display Hz. + * + * Returns actual measured Hz after enough samples collected. + * Before measurement stable, returns 0.0. + * + * @param manager Manager state + * @return Measured Hz, or 0.0 if not yet measured + */ +double SyncManager_getMeasuredHz(const SyncManager* manager); + +/** + * Check if measurement is stable. + * + * @param manager Manager state + * @return true if enough samples collected for reliable measurement + */ +bool SyncManager_isMeasurementStable(const SyncManager* manager); + +#endif // __SYNC_MANAGER_H__ From 8e9011981c286b9546c92ab70c255ba033e50a63 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Wed, 7 Jan 2026 20:53:51 -0800 Subject: [PATCH 10/11] Simplify audio sync to pure proportional control with true blocking. Audio rate control: - Replace dual-timescale PI controller with pure P control (Arntzen algorithm) - Remove integral term, error smoothing, and per-frame SND_newFrame() updates - Reduce d parameter from 1.2% to 0.8% (larger buffer provides more headroom) - Enable rate control in both modes as buffer health mechanism Audio blocking: - Replace SDL_Delay polling with true SDL_CondWait blocking in audio-clock mode - Add SDL mutex/cond for thread-safe audio writes - Block at 90% buffer full, wake on callback drain signal - Pre-fill buffer to 50% at startup to prevent initial underruns Audio buffer: - Increase from 4096 to 6400 samples (~133ms, matches RetroArch 128ms default) - Provides headroom for CPU frequency transitions and timing variance sync_manager: - Replace EMA smoothing with circular buffer and stddev-based convergence - Measurement stable when stddev/mean < 1% (typically ~60 samples) - Timeout after 1800 samples (~30s) if never converges - Add 512-sample circular buffer for rolling statistics Vsync control: - Add GLVideo_setVsync() to enable/disable vsync at runtime - Disable vsync in audio-clock mode (blocking is sole timing source) - Enable vsync in vsync mode (display-driven timing) - Update vsync setting on mode transitions CPU scaling (all 3 modes): - Add panic counting for topology and fallback modes (was granular-only) - Add skip-blocked-states logic to all reduction paths - Use time-based probing in AC mode with buffer-guided timing - Fix stability decay to use mode-appropriate indices --- docs/audio-rate-control.md | 105 ++------ docs/auto-cpu-scaling.md | 97 ++++--- docs/changes.md | 18 +- docs/libretro-compliance.md | 16 +- tests/unit/all/player/test_sync_manager.c | 60 +++-- workspace/all/common/api.c | 301 ++++++++++++---------- workspace/all/common/api.h | 7 +- workspace/all/common/cpu.h | 6 +- workspace/all/common/defines.h | 12 +- workspace/all/common/gl_video.c | 14 + workspace/all/common/gl_video.h | 17 ++ workspace/all/player/player.c | 217 ++++++++++++---- workspace/all/player/sync_manager.c | 184 ++++++++----- workspace/all/player/sync_manager.h | 32 ++- 14 files changed, 663 insertions(+), 423 deletions(-) diff --git a/docs/audio-rate-control.md b/docs/audio-rate-control.md index 36dbfdc2..73f31669 100644 --- a/docs/audio-rate-control.md +++ b/docs/audio-rate-control.md @@ -19,10 +19,10 @@ LessUI uses a runtime-adaptive approach that measures the actual display refresh ### Two Sync Modes -| Mode | Timing Source | Audio Handling | When Used | -| --------------- | --------------------- | ---------------------------------- | -------------------------------- | -| **Audio Clock** | Blocking audio writes | Fixed ratio (no rate control) | Startup default, Hz mismatch >1% | -| **Vsync** | Display vsync | PI rate control (±0.5% adjustment) | Hz mismatch <1% from game fps | +| Mode | Timing Source | Audio Handling | When Used | +| --------------- | --------------------- | --------------------------------- | -------------------------------- | +| **Audio Clock** | Blocking audio writes | Fixed ratio (no rate control) | Startup default, Hz mismatch >1% | +| **Vsync** | Display vsync | P rate control (±1.2% max adjust) | Hz mismatch <1% from game fps | ### Mode Selection Algorithm @@ -48,20 +48,20 @@ When display Hz differs significantly from game fps (>1%), rate control cannot c - Works with any display refresh rate - Audio buffer stays naturally stable -- No PI controller oscillation or windup +- No controller oscillation or windup ## Vsync Mode (Rate Control Active) When display Hz closely matches game fps (<1%), vsync provides timing and rate control keeps the audio buffer stable. -### Arntzen's Core Formula +### Arntzen's Proportional Control -The paper's pure proportional control adjusts resampling ratio based on buffer fill: +The paper's proportional control adjusts resampling ratio based on buffer fill: -``` -error = 1 - 2×fill -adjustment = error × d -ratio = 1 - adjustment +```c +error = 1 - 2 * fill; // +1 when empty, 0 at half, -1 when full +adjustment = error * d; // Bounded by ±d +ratio = 1 - adjustment; // Resampling ratio ``` **Behavior:** @@ -72,37 +72,21 @@ ratio = 1 - adjustment The paper proves this converges exponentially to a stable equilibrium. -### Our Extension: Dual-Timescale PI Controller - -Pure proportional control works when the host display/audio clocks match the emulated system. On cheap handheld hardware, persistent clock mismatches cause the buffer to settle away from 50%. - -We extend Arntzen with an integral term on a **separate, slower timescale**: - -```c -// Fast timescale (proportional): immediate response to buffer jitter -float error = 1.0f - 2.0f * fill; -float p_term = error * d; +### Why Pure P Works -// Slow timescale (integral): learns persistent clock offset over ~5 seconds -error_avg = α * error + (1-α) * error_avg; // Smooth error first -integral += error_avg * ki; // Then integrate -integral = clamp(integral, -0.02, +0.02); // Limit to ±2% +Our 1% Hz tolerance for vsync mode ensures we're within the paper's "reasonably close" bounds: -// Combined adjustment -float adjustment = p_term + integral; -``` +- **Arntzen tested with:** 0.36% Hz mismatch, d=0.5% → 1.4x headroom +- **Our parameters:** up to 1% Hz mismatch, d=0.8% → 1.25x headroom better than Arntzen's ratio -**Key insight**: Original PI failed because both terms operated on the same timescale, causing them to fight. By smoothing error before integrating (~5 seconds), the integral only sees persistent trends, not per-frame noise. +The 1% gate ensures devices in vsync mode have mismatch bounded within what proportional control can handle. Devices outside this range fall back to audio-clock mode where rate control isn't needed. ### Parameters -| Parameter | Value | Purpose | -| ---------- | -------- | ------------------------------------------------------ | -| **d** | 1.0% | Proportional gain. Handles frame-to-frame jitter. | -| **ki** | 0.00005 | Integral gain. Learns persistent clock offset. | -| **α** | 0.003 | Error smoothing (~333 frames / 5.5 seconds at 60fps). | -| **clamp** | ±2% | Max integral correction. Handles hardware clock drift. | -| **buffer** | 5 frames | ~83ms latency. Headroom for timing variance. | +| Parameter | Value | Purpose | +| ---------- | -------- | ----------------------------------------------------------- | +| **d** | 0.8% | Proportional gain. Handles frame-to-frame jitter. | +| **buffer** | 8 frames | ~133ms latency. Matches RetroArch handheld default (128ms). | ## Implementation Details @@ -124,35 +108,6 @@ bool should_use_rate_control = !should_block && snd.should_use_rate_control(); This decouples the audio system from sync mode decisions. -### Per-Frame Integral Update - -The integral must update **once per frame**, not once per audio batch. Some cores (e.g., 64-bit snes9x) use per-sample audio callbacks, calling `SND_batchSamples()` ~535 times per frame. Without this fix, effective ki = 535× intended, causing wild oscillation. - -```c -// Called once per frame from main loop, before core.run() -void SND_newFrame(void) { - // Skip in audio-clock mode (no rate control) - if (!snd.should_use_rate_control || !snd.should_use_rate_control()) - return; - - SDL_LockAudio(); - - float fill = SND_getBufferFillLevel(); - float error = 1.0f - 2.0f * fill; - - // Update smoothed error and integral (once per frame) - error_avg = α * error + (1-α) * error_avg; - integral += error_avg * ki; - integral = clamp(integral, -0.02, +0.02); - - SDL_UnlockAudio(); -} -``` - -### Thread Safety - -Rate control state is shared between the main thread (integral updates) and audio thread (buffer reads). All shared state access requires `SDL_LockAudio()` to prevent torn reads on 64-bit ARM where float operations aren't atomic. - ### Sample Rate Policy Platforms must respect the core's native sample rate: @@ -165,28 +120,12 @@ int PLAT_pickSampleRate(int requested, int max) { Forcing a different rate (e.g., always 48kHz when core wants 32kHz) causes unnecessary resampling and wider buffer swings. -## Tuning Results - -Tested across three platforms with different timing characteristics: - -| Device | Fill | Variance | Integral | Underruns | Notes | -| ---------- | ---- | -------- | -------- | --------- | ---------------------------- | -| rg35xxplus | 59% | ±8% | +0.15% | 0 | Rock solid | -| tg5040 | 61% | ±16% | -0.71% | 0 | Integral learns clock offset | -| miyoomini | 64% | ±14% | +0.42% | 0 | Fixed by sample rate policy | - -**Key findings:** - -- d=0.010 (1.0%) is optimal for handheld timing variance (paper's 0.2-0.5% is for desktop) -- Integral converges in ~15-20 seconds to steady-state offset -- Each device has different clock characteristics that the integral learns - ## Code References - Sync manager: `workspace/all/player/sync_manager.c` (mode selection, Hz measurement) -- PI controller: `workspace/all/common/api.c` (`SND_calculateRateAdjust`, `SND_newFrame`) +- Rate control: `workspace/all/common/api.c` (`SND_calculateRateAdjust`) - Sync callbacks: `workspace/all/common/api.c` (`SND_setSyncCallbacks`) -- Parameters: `workspace/all/common/api.c` (SND_RATE_CONTROL_D, SND_RATE_CONTROL_KI, etc.) +- Parameters: `workspace/all/common/defines.h` (`SND_RATE_CONTROL_D`) - Resampler: `workspace/all/common/audio_resampler.c` - Sample rate policy: `workspace//platform/platform.c` (`PLAT_pickSampleRate`) diff --git a/docs/auto-cpu-scaling.md b/docs/auto-cpu-scaling.md index 715b1332..0dedbacf 100644 --- a/docs/auto-cpu-scaling.md +++ b/docs/auto-cpu-scaling.md @@ -169,10 +169,10 @@ while (!quit) { ### Two-Layer Architecture -| Layer | Handles | Magnitude | Speed | -| --------------------- | -------------------------- | ----------------------------- | ---------- | -| **Rate control (PI)** | Jitter + persistent drift | ±1% (proportional) + integral | Per-frame | -| **CPU scaling** | Sustained performance gaps | 10-50%+ | Per-second | +| Layer | Handles | Magnitude | Speed | +| -------------------- | -------------------------- | -------------------- | ---------- | +| **Rate control (P)** | Frame-to-frame jitter | ±0.8% (proportional) | Per-frame | +| **CPU scaling** | Sustained performance gaps | 10-50%+ | Per-second | Rate control handles small timing variations. CPU scaling handles sustained performance problems that rate control can't fix. @@ -445,23 +445,20 @@ if (SND_getUnderrunCount() > last_underrun_count) { The **d parameter** determines how much pitch adjustment the rate control algorithm can apply for jitter compensation. See [docs/audio-rate-control.md](audio-rate-control.md) for the full algorithm derivation. -**Current implementation (PI Controller):** +**Current implementation (Proportional Controller):** ```c -// Rate control gains (api.c) -#define SND_RATE_CONTROL_D_DEFAULT 0.010f // 1.0% - proportional gain -#define SND_RATE_CONTROL_KI 0.00005f // integral gain (drift correction) -#define SND_ERROR_AVG_ALPHA 0.003f // error smoothing (~333 frame average) -#define SND_INTEGRAL_CLAMP 0.02f // ±2% max drift correction +// Rate control gain (defines.h) +#define SND_RATE_CONTROL_D 0.012f // 1.2% max pitch adjustment ``` -**Why dual-timescale PI controller works:** +**Why pure proportional control works:** -- Error smoothing (α=0.003) filters jitter before it reaches the integral term -- Proportional term (d=1.0%) provides immediate response to buffer level changes -- Integral term operates on slower timescale, learning persistent clock offset -- Integral clamped to ±2% handles hardware clock mismatch up to ±2% -- P and I can't fight because they operate on different timescales +- Vsync mode only activates when display Hz is within 1% of game fps +- With d=1.2% and max 1% mismatch, we have 1.2x headroom (similar to Arntzen's 1.4x) +- Proportional term provides immediate response to buffer level changes +- Buffer settles at stable equilibrium (may not be exactly 50%, but stable) +- Devices outside 1% tolerance fall back to audio-clock mode (no rate control needed) ### Audio Buffer Size @@ -472,7 +469,7 @@ snd.buffer_video_frames = 5; snd.frame_count = snd.buffer_video_frames * snd.sample_rate_in / snd.frame_rate; ``` -With the PI controller, the buffer settles near 50-65% fill depending on device clock characteristics, providing headroom for jitter and ~42ms effective latency. +With proportional control, the buffer settles at a stable equilibrium. The 8-frame buffer (~133ms) provides substantial headroom for CPU frequency transitions and timing variance. ## Benchmark Methodology @@ -525,24 +522,21 @@ The discovered frequency steps and performance data come from a custom CPU bench ## Tuning Status -| Parameter | Current | Notes | -| ----------------- | ------------------- | ------------------------------------------------- | -| Rate control d | 1.0% | Proportional gain - handles frame-to-frame jitter | -| Rate control ki | 0.00005 | Integral gain - learns persistent clock offset | -| Error smoothing α | 0.003 (~333 frames) | Separates P and I timescales | -| Integral clamp | ±2% | Max drift correction (handles hardware variance) | -| Audio buffer | 5 frames (~83ms) | Effective latency ~42ms at 50% fill | -| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | -| Utilization high | 85% | Frame time >85% of budget = boost | -| Utilization low | 55% | Frame time <55% of budget = reduce | -| Target util | 70% | Target utilization after frequency change | -| Max step down | 1 | Max frequency steps when reducing | -| Panic step up | 2 | Frequency steps on underrun emergency | -| Min frequency | 400 MHz | Floor for frequency scaling | -| Boost windows | 2 (~1s) | Fast response to performance issues | -| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | -| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | -| Percentile | 90th | Ignores outliers (loading screens) | +| Parameter | Current | Notes | +| ---------------- | ------------------ | -------------------------------------------------------- | +| Rate control d | 0.8% | Proportional gain - gentler than 1.2% with larger buffer | +| Audio buffer | 8 frames (~133ms) | Matches RetroArch handheld default, CPU scaling headroom | +| Window size | 30 frames (~500ms) | Filters noise, responsive to changes | +| Utilization high | 85% | Frame time >85% of budget = boost | +| Utilization low | 55% | Frame time <55% of budget = reduce | +| Target util | 70% | Target utilization after frequency change | +| Max step down | 1 | Max frequency steps when reducing | +| Panic step up | 2 | Frequency steps on underrun emergency | +| Min frequency | 400 MHz | Floor for frequency scaling | +| Boost windows | 2 (~1s) | Fast response to performance issues | +| Reduce windows | 4 (~2s) | Conservative to prevent oscillation | +| Startup grace | 300 frames (~5s) | Starts at max freq, then scales | +| Percentile | 90th | Ignores outliers (loading screens) | ### Display Rate Handling @@ -554,9 +548,9 @@ Display refresh rate is queried from SDL at init via `SDL_GetCurrentDisplayMode( | tg5040 | 60 Hz | 60.10 Hz (NES) | 60/60.10 = 0.9983 | | miyoomini | 60 Hz | 60.10 Hz (NES) | 60/60.10 = 0.9983 | -**Note:** SDL typically reports rounded integer refresh rates (60 Hz). The actual display rate may vary slightly (59.71-60.5 Hz measured via vsync timing). The PI controller's integral term learns and corrects for any mismatch over time. +**Note:** SDL typically reports rounded integer refresh rates (60 Hz). The actual display rate may vary slightly (59.71-60.5 Hz measured via vsync timing). The sync manager measures actual Hz and gates vsync mode to within 1% of game fps. -**How it works:** The PI controller adjusts the resampling ratio based on buffer fill. The proportional term (d) handles jitter, while the integral term slowly learns the persistent timing offset to maintain exactly 50% buffer fill. +**How it works:** The proportional controller adjusts the resampling ratio based on buffer fill. Buffer below 50% → produce more samples; above 50% → produce fewer. This converges to stable equilibrium. ### Debug HUD @@ -615,7 +609,7 @@ After implementing the unified RateMeter system with dual clock correction (disp - Low quality: frame timing drops → auto scaler correctly reduces CPU - The system responds to actual emulation workload, not arbitrary core labels -3. **No feedback loops** - Buffer fill is influenced by the PI controller rate adjustment and dynamic buffer sizing. Using it for CPU scaling would create two control systems fighting over the same signal. +3. **No feedback loops** - Buffer fill is influenced by the rate control adjustment and dynamic buffer sizing. Using it for CPU scaling would create two control systems fighting over the same signal. **The two-layer separation is optimal:** @@ -757,6 +751,33 @@ Comprehensive analysis of benchmark data from all platforms revealed optimizatio **Analysis output:** See `scripts/analyze-cpu-bands.py` and `scripts/analyze-frequency-strategies.py` for detailed frequency analysis and strategy comparison. +### Audio Clock Mode Buffer Range + +In Audio Clock mode, the CPU scaler can't rely on utilization metrics (blocking audio makes frame timing unreliable). Instead, it uses **time-based probing** with buffer-guided timing. + +**Problem discovered (TG5050):** When display Hz differs significantly from game fps (e.g., 62.9Hz vs 60.1fps), and `SDL_GL_SetSwapInterval(0)` doesn't actually disable vsync: + +1. Buffer fills due to timing mismatch (display Hz > game fps) +2. Utilization appears artificially high (~90%) because blocking time inflates frame time +3. CPU never reduces because util never drops below threshold + +**Solution:** In Audio Clock mode, use time-based probing with buffer-guided timing: + +| Buffer Level | Wait Time | Rationale | +| ------------ | ---------- | ---------------------------------------------------- | +| < 40% | N/A | Don't reduce (need headroom for transition) | +| 40-75% | 8 windows | Normal timing - reduce after ~4 seconds of stability | +| > 75% | 16 windows | Pathological timing - wait ~8 seconds before probing | + +**Key insight:** We can't trust utilization metrics in AC mode, so we: + +1. Probe by reducing after a stability period (time-based, not util-based) +2. Rely on the panic path to boost back if reduction causes underruns +3. Use buffer level to guide timing - high buffer gets longer wait because it indicates + problematic timing where reductions are more likely to cause issues + +**Files changed:** `workspace/all/player/player.c` (all three CPU scaling modes) + ### Threshold Validation The 55% LOW threshold and 85% HIGH threshold were chosen empirically but are now validated: diff --git a/docs/changes.md b/docs/changes.md index 4438f827..45c6a6e3 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -110,19 +110,15 @@ Extracted duplicated rendering code from platform files into shared modules, wit **Replaced basic audio handling with adaptive resampling and rate control.** - **Linear interpolation resampling** for smooth audio at any sample rate -- **Dual-timescale PI controller** for stable audio synchronization - - Smooths error signal (0.9) to filter jitter - - Quadratic integral weighting for faster convergence far from 50% - - Integral clamped to ±1% for persistent hardware drift correction -- **Dynamic rate control** that adjusts playback speed to prevent buffer underruns - - Parameters tuned for handheld timing variance (d=0.010, 5-frame buffer) - - `SND_newFrame()` updates integral once per frame to prevent over-accumulation +- **Proportional rate control** (Arntzen algorithm) for stable audio synchronization + - Adjusts resampling ratio based on buffer fill level + - Parameters tuned for handheld timing variance (d=1.2%, 5-frame buffer) +- **Runtime-adaptive sync system** that measures display Hz and selects appropriate mode + - Vsync mode when display Hz within 1% of game fps (rate control active) + - Audio-clock mode otherwise (blocking writes, no rate control needed) - **Audio buffer status callback** enabling cores to implement frameskip -- **Dual sync modes** with compile-time selection: - - **Vsync mode** (default): Frame pacing via Bresenham accumulator, non-blocking audio writes - - **Audioclock mode** (M17): Audio hardware clock drives timing, blocking writes when buffer full -The audioclock mode fixes audio stuttering on devices with unstable vsync (like M17). +The runtime-adaptive system automatically selects audio-clock mode for devices with unstable vsync, ensuring smooth audio on all hardware. ### Removed Legacy Audio Code diff --git a/docs/libretro-compliance.md b/docs/libretro-compliance.md index c5e6ed90..1949bf9f 100644 --- a/docs/libretro-compliance.md +++ b/docs/libretro-compliance.md @@ -403,14 +403,14 @@ This section compares LessUI's libretro implementation against [RetroArch](https ### Audio Callbacks (`retro_audio_sample_t`, `retro_audio_sample_batch_t`) -| Aspect | RetroArch | LessUI | Match | -| ------------------- | ------------------------------------------------------------- | --------------------------------------------------- | ----- | -| **Sample callback** | Accumulates to conversion buffer, flushes at chunk size | Passes directly to `SND_batchSamples` | ✅ | -| **Batch callback** | Processes in chunks up to `AUDIO_CHUNK_SIZE_NONBLOCKING >> 1` | Processes via ring buffer in `SND_batchSamples` | ✅ | -| **Return value** | Returns frames processed | Returns frames processed (or `frames` during FF) | ✅ | -| **Resampling** | Converts to float, applies DSP, resamples with rate control | Linear interpolation resampler with PI rate control | ✅ | -| **Rate control** | Monitors buffer space, adjusts ratio dynamically | Dual-timescale PI controller (Arntzen-based) | ✅ | -| **Fast-forward** | Adjusts ratio with EMA smoothing, clamps to 0.0625-16x range | Skips audio entirely during fast-forward | ⚠️ | +| Aspect | RetroArch | LessUI | Match | +| ------------------- | ------------------------------------------------------------- | -------------------------------------------------- | ----- | +| **Sample callback** | Accumulates to conversion buffer, flushes at chunk size | Passes directly to `SND_batchSamples` | ✅ | +| **Batch callback** | Processes in chunks up to `AUDIO_CHUNK_SIZE_NONBLOCKING >> 1` | Processes via ring buffer in `SND_batchSamples` | ✅ | +| **Return value** | Returns frames processed | Returns frames processed (or `frames` during FF) | ✅ | +| **Resampling** | Converts to float, applies DSP, resamples with rate control | Linear interpolation resampler with P rate control | ✅ | +| **Rate control** | Monitors buffer space, adjusts ratio dynamically | Proportional controller (Arntzen algorithm) | ✅ | +| **Fast-forward** | Adjusts ratio with EMA smoothing, clamps to 0.0625-16x range | Skips audio entirely during fast-forward | ⚠️ | **Note:** LessUI skips audio during fast-forward rather than pitch-adjusting like RetroArch. This is simpler and appropriate for handheld use where audio fidelity during FF is less important. diff --git a/tests/unit/all/player/test_sync_manager.c b/tests/unit/all/player/test_sync_manager.c index c66ea9a6..dbe8e3bf 100644 --- a/tests/unit/all/player/test_sync_manager.c +++ b/tests/unit/all/player/test_sync_manager.c @@ -3,11 +3,12 @@ * * Tests the runtime-adaptive sync mode switching including: * - Initialization (starts in AUDIO_CLOCK mode) - * - Vsync measurement with EMA smoothing + * - Vsync measurement with circular buffer and stddev-based convergence * - Mode switching based on measured Hz * - Drift detection and fallback to AUDIO_CLOCK * - shouldRunCore (always returns true) - * - shouldUseRateControl/shouldBlockAudio based on mode + * - shouldUseRateControl (always true, both modes use rate control) + * - shouldBlockAudio based on mode */ #include "unity.h" @@ -80,17 +81,19 @@ void test_first_vsync_just_records_timestamp(void) { TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); } -void test_second_vsync_calculates_hz(void) { - // First call +void test_second_vsync_records_interval(void) { + // First call - just records timestamp mock_time_us = 1000000; SyncManager_recordVsync(&manager); + TEST_ASSERT_EQUAL(0, manager.sample_count); - // Second call - 16.667ms later (60Hz) + // Second call - 16.667ms later (60Hz) - records first interval mock_time_us = 1016667; SyncManager_recordVsync(&manager); - // Should have initial Hz measurement (not averaged yet, first sample) - TEST_ASSERT_FLOAT_WITHIN(0.1, 60.0, manager.measured_hz); + // Should have recorded the interval (measured_hz only set when stable) + TEST_ASSERT_EQUAL(1, manager.sample_count); + TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); // Not stable yet } void test_rejects_outlier_too_low(void) { @@ -103,7 +106,7 @@ void test_rejects_outlier_too_low(void) { // Should be rejected, no measurement TEST_ASSERT_EQUAL_FLOAT(0.0, manager.measured_hz); - TEST_ASSERT_EQUAL(0, manager.measurement_samples); + TEST_ASSERT_EQUAL(0, manager.sample_count); } void test_rejects_outlier_too_high(void) { @@ -164,20 +167,21 @@ void test_stays_in_audio_clock_when_incompatible(void) { TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); } -void test_measurement_stable_after_120_samples(void) { +void test_measurement_stable_after_60_samples(void) { mock_time_us = 1000000; SyncManager_recordVsync(&manager); - // Need 120 samples after initial baseline - for (int i = 0; i < 120; i++) { + // With consistent samples, should converge after SYNC_MIN_SAMPLES (60) + // Not stable until we have 60+ samples with low stddev + for (int i = 0; i < 59; i++) { mock_time_us += 16667; SyncManager_recordVsync(&manager); - if (i < 119) { - TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); - } + TEST_ASSERT_FALSE(SyncManager_isMeasurementStable(&manager)); } - // After 120 samples - now stable + // 60th sample - should now be stable (consistent samples = low stddev) + mock_time_us += 16667; + SyncManager_recordVsync(&manager); TEST_ASSERT_TRUE(SyncManager_isMeasurementStable(&manager)); } @@ -219,9 +223,10 @@ void test_should_use_rate_control_in_vsync_mode(void) { TEST_ASSERT_TRUE(SyncManager_shouldUseRateControl(&manager)); } -void test_should_not_use_rate_control_in_audio_clock(void) { +void test_should_use_rate_control_in_audio_clock_too(void) { + // Both modes now use rate control as buffer health mechanism manager.mode = SYNC_MODE_AUDIO_CLOCK; - TEST_ASSERT_FALSE(SyncManager_shouldUseRateControl(&manager)); + TEST_ASSERT_TRUE(SyncManager_shouldUseRateControl(&manager)); } void test_should_block_audio_in_audio_clock_mode(void) { @@ -264,21 +269,22 @@ void test_mode_name_vsync(void) { // Edge Cases /////////////////////////////// -void test_ema_smooths_noisy_measurements(void) { +void test_mean_smooths_noisy_measurements(void) { mock_time_us = 1000000; SyncManager_recordVsync(&manager); - // Alternate between 59Hz and 61Hz (simulating jitter) - for (int i = 0; i < 120; i++) { + // Alternate between 59.5Hz and 60.5Hz (simulating light jitter) + // stddev/mean < 1% so it should still converge + for (int i = 0; i < 60; i++) { if (i % 2 == 0) { - mock_time_us += 16949; // 59Hz + mock_time_us += 16807; // 59.5Hz } else { - mock_time_us += 16393; // 61Hz + mock_time_us += 16529; // 60.5Hz } SyncManager_recordVsync(&manager); } - // EMA should smooth to ~60Hz + // Mean should be ~60Hz double measured = SyncManager_getMeasuredHz(&manager); TEST_ASSERT_FLOAT_WITHIN(1.0, 60.0, measured); } @@ -314,7 +320,7 @@ int main(void) { // Vsync measurement tests RUN_TEST(test_first_vsync_just_records_timestamp); - RUN_TEST(test_second_vsync_calculates_hz); + RUN_TEST(test_second_vsync_records_interval); RUN_TEST(test_rejects_outlier_too_low); RUN_TEST(test_rejects_outlier_too_high); RUN_TEST(test_rejects_zero_interval); @@ -322,13 +328,13 @@ int main(void) { // Mode switching tests RUN_TEST(test_switches_to_vsync_when_compatible); RUN_TEST(test_stays_in_audio_clock_when_incompatible); - RUN_TEST(test_measurement_stable_after_120_samples); + RUN_TEST(test_measurement_stable_after_60_samples); RUN_TEST(test_drift_detection_switches_back_to_audio_clock); // API tests RUN_TEST(test_should_run_core_always_returns_true); RUN_TEST(test_should_use_rate_control_in_vsync_mode); - RUN_TEST(test_should_not_use_rate_control_in_audio_clock); + RUN_TEST(test_should_use_rate_control_in_audio_clock_too); RUN_TEST(test_should_block_audio_in_audio_clock_mode); RUN_TEST(test_should_not_block_audio_in_vsync_mode); RUN_TEST(test_get_measured_hz_returns_zero_when_not_stable); @@ -337,7 +343,7 @@ int main(void) { RUN_TEST(test_mode_name_vsync); // Edge cases - RUN_TEST(test_ema_smooths_noisy_measurements); + RUN_TEST(test_mean_smooths_noisy_measurements); RUN_TEST(test_drift_check_only_after_stable); return UNITY_END(); diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 4e816c2d..06fddce4 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -1678,14 +1678,6 @@ void GFX_blitText(TTF_Font* ttf_font, char* str, int leading, SDL_Color color, S // SND_RATE_CONTROL_D is defined in defines.h (platforms can override) // See docs/audio-rate-control.md for tuning guidance. -// Dual-timescale PI controller: integral operates on smoothed error to avoid fighting proportional -// ki: Integral gain - very slow accumulation for persistent drift only -// alpha: Error smoothing factor (~300 frame average, ~5 seconds at 60fps) -// clamp: Max integral magnitude (handles up to ±2% persistent clock mismatch) -#define SND_RATE_CONTROL_KI 0.00005f -#define SND_ERROR_AVG_ALPHA 0.003f -#define SND_INTEGRAL_CLAMP 0.02f - // SND_BUFFER_SAMPLES is defined in defines.h (platforms can override) // Sound context manages the ring buffer and resampling @@ -1703,6 +1695,12 @@ static struct SND_Context { int frame_out; // Read position int frame_filled; // Last consumed position + // Thread synchronization for blocking audio writes + // In audio-clock mode, writers block via SDL_CondWait when buffer is full. + // The callback signals when space becomes available. + SDL_mutex* mutex; // Protects buffer access + SDL_cond* cond; // Signals space available (for blocking writes) + // Linear interpolation resampler with dynamic rate control AudioResampler resampler; @@ -1719,9 +1717,7 @@ static struct SND_Context { double cumulative_total_adjust; // Sum of total_adjust values applied uint64_t total_adjust_count; // Number of total_adjust applications - // Rate control state (persistent across frames) - float rate_integral; // PI integral term (accumulates from smoothed error) - float error_avg; // Smoothed error for slow integral timescale + // Rate control state float last_rate_adjust; // Last computed adjustment (for snapshot without side effects) // SDL callback timing diagnostics @@ -1743,18 +1739,25 @@ static struct SND_Context { * Reads samples from the ring buffer and writes them to the output stream. * If buffer runs dry, repeats last sample or outputs silence. * + * Thread synchronization: + * - Uses snd.mutex to protect buffer access + * - Signals snd.cond after draining to wake blocked writers (audio-clock mode) + * * @param userdata Unused user data pointer * @param stream Output audio buffer to fill * @param len Length of output buffer in bytes * * @note Runs on SDL's audio thread, not the main thread */ -static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // plat_sound_callback - - // return (void)memset(stream,0,len); // TODO: tmp, silent +static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { + (void)userdata; - if (snd.frame_count == 0) + if (snd.frame_count == 0 || !snd.mutex) { + memset(stream, 0, len); return; + } + + SDL_LockMutex(snd.mutex); int16_t* out = (int16_t*)stream; len /= (sizeof(int16_t) * 2); @@ -1797,7 +1800,7 @@ static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // pla // Log underrun with context (every occurrence - these are critical events) float fill_before = (float)(requested - len) / (float)snd.frame_count * 100.0f; - LOG_warn("Audio underrun #%u: needed %d more samples (had %d/%d, fill was %.0f%%)\n", + LOG_warn("Audio underrun #%u: needed %d more samples (had %d/%d, fill was %.0f%%)", snd.underrun_count, len, requested - len, requested, fill_before); if (snd.frame_filled >= 0 && snd.frame_filled < (int)snd.frame_count) { @@ -1813,39 +1816,56 @@ static void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // pla memset(out, 0, len * sizeof(int16_t) * 2); } } + + // Signal writers that space is available (wakes blocked audio-clock writers) + SDL_CondSignal(snd.cond); + + SDL_UnlockMutex(snd.mutex); } /** * Allocates the audio ring buffer. * - * Buffer size is SND_BUFFER_SAMPLES (~83ms at 48kHz with 4000 samples). - * Locks audio thread during resize to prevent corruption. + * Buffer size is SND_BUFFER_SAMPLES (~133ms at 48kHz, ~8 video frames at 60fps). + * Locks mutex during resize to prevent corruption. * - * @note Called during init + * @note Called during init (before audio thread starts) */ static void SND_resizeBuffer(void) { snd.frame_count = SND_BUFFER_SAMPLES; if (snd.frame_count == 0) return; - SDL_LockAudio(); + // Lock mutex if available (may be called during init before mutex exists) + if (snd.mutex) + SDL_LockMutex(snd.mutex); int buffer_bytes = snd.frame_count * sizeof(SND_Frame); void* new_buffer = realloc(snd.buffer, buffer_bytes); if (!new_buffer) { LOG_error("Failed to allocate audio buffer (%d bytes)\n", buffer_bytes); - SDL_UnlockAudio(); + if (snd.mutex) + SDL_UnlockMutex(snd.mutex); return; } snd.buffer = new_buffer; memset(snd.buffer, 0, buffer_bytes); - snd.frame_in = 0; + // Pre-fill buffer to 50% with silence to give headroom at startup. + // This prevents immediate underruns before the core has a chance to + // produce real audio samples. The silence will be gradually replaced + // as the core submits audio. + snd.frame_in = snd.frame_count / 2; snd.frame_out = 0; snd.frame_filled = snd.frame_count - 1; - SDL_UnlockAudio(); + if (snd.mutex) + SDL_UnlockMutex(snd.mutex); + + LOG_info("Audio buffer allocated: %d samples (%d bytes, ~%.1fms at %dHz, pre-filled to 50%%)", + snd.frame_count, buffer_bytes, (float)snd.frame_count / snd.sample_rate_out * 1000.0f, + snd.sample_rate_out); } /** @@ -1868,61 +1888,56 @@ static float SND_getBufferFillLevel(void) { } /** - * Calculate dynamic rate adjustment using simple dual-timescale PI controller. - * - * Based on RetroArch's dynamic rate control algorithm (Arntzen, 2012). - * Uses dual-timescale PI control: fast proportional term for immediate - * response, slow integral term for persistent drift compensation. + * Calculate dynamic rate adjustment using proportional control. * - * Algorithm: - * error = (1 - 2*fill) // Positive when buffer low - * p_term = error * d // Fast: frame-to-frame response - * error_avg = α*error + (1-α)*error_avg // Smooth over ~300 frames - * integral += error_avg * ki // Slow: learns steady offset - * adjustment = p_term + integral + * Based on Arntzen's "Dynamic Rate Control for Retro Game Emulators" (2012). + * Pure proportional control adjusts resampling ratio based on buffer fill: * - * Works well for < 1% mismatch between game fps and display Hz. - * Beyond 1%, audio-clock mode should be used instead. + * error = (1 - 2*fill) // +1 when empty, 0 at half, -1 when full + * adjustment = error * d // Bounded by ±d * - * Tuning guide: - * d: Proportional gain (0.005-0.025) - higher = faster response, more pitch variation - * ki: Integral gain (0.00005) - very slow, learns persistent drift - * α: Error smoothing (0.003) - ~300 frame average + * Buffer behavior: + * - Empty (fill=0): error=+1 → produce MORE samples → fill buffer + * - Half (fill=0.5): error=0 → maintain equilibrium + * - Full (fill=1): error=-1 → produce FEWER samples → drain buffer * - * Our resampler divides by ratio_adjust (larger = fewer outputs), so: - * ratio_adjust = 1 - adjustment + * The paper proves this converges to stable equilibrium. Only used when + * display Hz is within 1% of game fps (sync manager gates this). * - * @return Rate adjustment factor for resampler step size + * @return Rate adjustment factor for resampler (1.0 - adjustment) */ static float SND_calculateRateAdjust(void) { float fill = SND_getBufferFillLevel(); - - // Arntzen error formula: positive when buffer low, negative when high - // Buffer low (fill<0.5) → produce more samples (adjustment > 0) - // Buffer high (fill>0.5) → produce fewer samples (adjustment < 0) float error = 1.0f - 2.0f * fill; - - // Fast timescale (proportional): immediate response to buffer level changes - float p_term = error * SND_RATE_CONTROL_D; - - // Slow timescale (integral): persistent offset learned in SND_newFrame() - float adjustment = p_term + snd.rate_integral; + float adjustment = error * SND_RATE_CONTROL_D; // Invert for our resampler convention (larger ratio = fewer outputs) snd.last_rate_adjust = 1.0f - adjustment; return snd.last_rate_adjust; } +/** + * Helper to calculate available write space in FIFO. + * Caller must hold snd.mutex. + */ +static int SND_getWriteAvailable(void) { + if (snd.frame_in >= snd.frame_out) { + return snd.frame_count - (snd.frame_in - snd.frame_out) - 1; + } else { + return snd.frame_out - snd.frame_in - 1; + } +} + /** * Writes a batch of audio samples to the ring buffer. * * Runtime adaptive behavior based on sync mode: * * Audio-clock mode (should_block_audio = true): - * - Blocks when buffer is full (up to 10ms) - * - Audio hardware clock drives emulation timing + * - TRUE blocking via SDL_CondWait when buffer is full + * - Audio hardware clock drives emulation timing naturally * - Fixed 1.0 resampling ratio (no dynamic rate control) - * - For devices with unstable vsync (>1% mismatch) + * - Blocking provides frame pacing without SDL_Delay * * Vsync mode (should_block_audio = false): * - Non-blocking with dynamic rate control @@ -1933,10 +1948,8 @@ static float SND_calculateRateAdjust(void) { * @param frame_count Number of frames in array * @return Number of frames consumed */ -size_t SND_batchSamples(const SND_Frame* frames, - size_t frame_count) { // plat_sound_write / plat_sound_write_resample - - if (snd.frame_count == 0) +size_t SND_batchSamples(const SND_Frame* frames, size_t frame_count) { + if (snd.frame_count == 0 || !snd.mutex) return 0; // Check sync mode via callback (defaults to vsync mode if not set) @@ -1946,24 +1959,45 @@ size_t SND_batchSamples(const SND_Frame* frames, if (should_block) { // ======================================================================== - // AUDIO-CLOCK MODE: Blocking writes with audio hardware timing + // AUDIO-CLOCK MODE: TRUE blocking when buffer is full // ======================================================================== + // + // Strategy: Write to FIFO until full, then block via SDL_CondWait. + // The audio callback signals the cond when it drains samples. + // This provides natural backpressure from the audio hardware clock - + // no SDL_Delay needed; blocking IS the timing mechanism. - SDL_LockAudio(); + SDL_LockMutex(snd.mutex); size_t consumed = 0; while (frame_count > 0) { - int tries = 0; - - // Wait for audio callback to drain buffer (up to 10ms) - while (tries < 10 && snd.frame_in == snd.frame_filled) { - tries++; - SDL_UnlockAudio(); - SDL_Delay(1); - SDL_LockAudio(); + // Calculate available space + int available = SND_getWriteAvailable(); + + // If buffer is nearly full (>90%), block until callback drains + int overflow_threshold = snd.frame_count / 10; // 10% available = 90% full + while (available < overflow_threshold) { + // Block with timeout to allow checking for shutdown + // 100ms timeout: long enough to not spin, short enough for responsive shutdown + int wait_result = SDL_CondWaitTimeout(snd.cond, snd.mutex, 100); + (void)wait_result; + + // Check if we're shutting down (SND_quit sets initialized=0) + if (!snd.initialized) { + SDL_UnlockMutex(snd.mutex); + return consumed; + } + + available = SND_getWriteAvailable(); } - // Write samples with fixed 1.0 ratio (no rate control) + // Audio-clock mode: gentle rate control as buffer health mechanism. + // Like RetroArch, we use proportional rate control (±0.5%) in both modes + // to handle timing variations when true blocking can't provide pacing + // (e.g., platform can't disable vsync). This is gentle enough that + // underruns will still occur if CPU is truly too slow. + float ratio = SND_calculateRateAdjust(); + AudioRingBuffer ring = { .frames = snd.buffer, .capacity = snd.frame_count, @@ -1972,7 +2006,7 @@ size_t SND_batchSamples(const SND_Frame* frames, }; ResampleResult result = - AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, 1.0f); + AudioResampler_resample(&snd.resampler, &ring, frames, frame_count, ratio); snd.frame_in = ring.write_pos; snd.samples_in += result.frames_consumed; @@ -1983,7 +2017,8 @@ size_t SND_batchSamples(const SND_Frame* frames, consumed += result.frames_consumed; } - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); + return consumed; } else { @@ -1991,7 +2026,7 @@ size_t SND_batchSamples(const SND_Frame* frames, // VSYNC MODE: Non-blocking with dynamic rate control // ======================================================================== - SDL_LockAudio(); + SDL_LockMutex(snd.mutex); // Determine resampling ratio float total_adjust; @@ -2012,16 +2047,11 @@ size_t SND_batchSamples(const SND_Frame* frames, AudioResampler_estimateOutput(&snd.resampler, frame_count, total_adjust); // Calculate available space - int available; - if (snd.frame_in >= snd.frame_out) { - available = snd.frame_count - (snd.frame_in - snd.frame_out) - 1; - } else { - available = snd.frame_out - snd.frame_in - 1; - } + int available = SND_getWriteAvailable(); // Warn if buffer nearly full if (available < estimated_output) { - LOG_warn("Audio buffer nearly full: %d available, %d needed (fill=%.0f%%)\n", available, + LOG_warn("Audio buffer nearly full: %d available, %d needed (fill=%.0f%%)", available, estimated_output, SND_getBufferFillLevel() * 100.0f); } @@ -2040,7 +2070,7 @@ size_t SND_batchSamples(const SND_Frame* frames, snd.samples_in += result.frames_consumed; snd.samples_written += result.frames_written; - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return result.frames_consumed; } @@ -2083,6 +2113,13 @@ void SND_init(double sample_rate, double frame_rate) { // plat_sound_init snd.should_use_rate_control = saved_rate_control; snd.should_block_audio = saved_block_audio; + // Create synchronization primitives for blocking audio writes + snd.mutex = SDL_CreateMutex(); + snd.cond = SDL_CreateCond(); + if (!snd.mutex || !snd.cond) { + LOG_error("Failed to create audio sync primitives"); + } + SDL_AudioSpec spec_in; SDL_AudioSpec spec_out; @@ -2117,14 +2154,16 @@ void SND_init(double sample_rate, double frame_rate) { // plat_sound_init * Gets current audio buffer fill level as a percentage. * * Used by libretro cores for audio-based frameskip decisions. - * Thread-safe: locks audio to read consistent buffer state. + * Thread-safe: locks mutex to read consistent buffer state. * * @return Fill level 0-100 (0 = empty, 100 = full) */ unsigned SND_getBufferOccupancy(void) { - SDL_LockAudio(); + if (!snd.mutex) + return 0; + SDL_LockMutex(snd.mutex); float fill = SND_getBufferFillLevel(); - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return (unsigned)(fill * 100.0f); } @@ -2137,10 +2176,17 @@ unsigned SND_getBufferOccupancy(void) { * For auto CPU scaling, underruns are an emergency signal - if rate control * stress is high AND underruns are occurring, immediate CPU boost is needed. * + * Thread-safe: locks mutex to read consistent value. + * * @return Number of underruns since SND_init() or last SND_resetUnderrunCount() */ unsigned SND_getUnderrunCount(void) { - return snd.underrun_count; + if (!snd.mutex) + return 0; + SDL_LockMutex(snd.mutex); + unsigned count = snd.underrun_count; + SDL_UnlockMutex(snd.mutex); + return count; } /** @@ -2148,53 +2194,32 @@ unsigned SND_getUnderrunCount(void) { * * Call after handling an underrun event (e.g., after boosting CPU) * to track new underruns going forward. + * + * Thread-safe: locks mutex to write consistent value. */ void SND_resetUnderrunCount(void) { + if (!snd.mutex) + return; + SDL_LockMutex(snd.mutex); snd.underrun_count = 0; + SDL_UnlockMutex(snd.mutex); } /** * Signals start of a new video frame for audio rate control. * - * Updates the PI integral term once per frame based on current buffer fill. - * Call once per frame before core.run() produces audio. - * - * This prevents the integral from accumulating N times when cores use - * per-sample audio callbacks (audio_sample_callback instead of batch). - * Some cores (e.g., 64-bit snes9x) call audio ~535 times per frame. - * - * No-op in audio-clock mode (no rate control needed). + * Previously used for PI integral updates, now a no-op since we use + * pure proportional control (Arntzen algorithm). Kept for API compatibility. */ void SND_newFrame(void) { - if (!snd.initialized) - return; - - // Check if rate control should run (vsync mode only) - if (!snd.should_use_rate_control || !snd.should_use_rate_control()) - return; - - SDL_LockAudio(); - - float fill = SND_getBufferFillLevel(); - float error = 1.0f - 2.0f * fill; - - // Update smoothed error and integral (once per frame) - snd.error_avg = SND_ERROR_AVG_ALPHA * error + (1.0f - SND_ERROR_AVG_ALPHA) * snd.error_avg; - snd.rate_integral += snd.error_avg * SND_RATE_CONTROL_KI; - - // Clamp integral to prevent windup (handles up to ±2% clock mismatch) - if (snd.rate_integral > SND_INTEGRAL_CLAMP) - snd.rate_integral = SND_INTEGRAL_CLAMP; - if (snd.rate_integral < -SND_INTEGRAL_CLAMP) - snd.rate_integral = -SND_INTEGRAL_CLAMP; - - SDL_UnlockAudio(); + // No-op: pure proportional control doesn't need per-frame state updates + (void)0; } /** * Captures an atomic snapshot of all audio state for diagnostics. * - * All values are read while holding the audio lock to ensure consistency. + * All values are read while holding the mutex to ensure consistency. * Includes buffer state, sample flow counters, and rate control parameters. * * @return Snapshot of current audio state @@ -2202,7 +2227,10 @@ void SND_newFrame(void) { SND_Snapshot SND_getSnapshot(void) { SND_Snapshot snap = {0}; - SDL_LockAudio(); + if (!snd.mutex) + return snap; + + SDL_LockMutex(snd.mutex); // Timestamp for delta calculations snap.timestamp_us = getMicroseconds(); @@ -2219,14 +2247,11 @@ SND_Snapshot SND_getSnapshot(void) { snap.samples_consumed = snd.samples_consumed; snap.samples_requested = snd.samples_requested; - // Rate control parameters (PI controller - read last computed values to avoid side effects) + // Rate control parameters (proportional control - read last computed value) snap.frame_rate = snd.frame_rate; snap.rate_adjust = snd.last_rate_adjust; snap.total_adjust = snd.last_rate_adjust; - snap.rate_integral = snd.rate_integral; snap.rate_control_d = SND_RATE_CONTROL_D; - snap.rate_control_ki = SND_RATE_CONTROL_KI; - snap.error_avg = snd.error_avg; // Resampler state snap.sample_rate_in = snd.sample_rate_in; @@ -2256,7 +2281,7 @@ SND_Snapshot SND_getSnapshot(void) { snap.callback_avg_interval_ms = 0; } - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return snap; } @@ -2264,16 +2289,31 @@ SND_Snapshot SND_getSnapshot(void) { /** * Shuts down the audio subsystem and frees resources. * - * Pauses audio, closes SDL audio device, frees ring buffer. + * Sets initialized=0 first to signal blocked writers to exit, + * then pauses audio, closes device, and frees resources. * Safe to call even if audio was never initialized. */ -void SND_quit(void) { // plat_sound_finish +void SND_quit(void) { if (!snd.initialized) return; + // Signal shutdown first - wakes any blocked SDL_CondWaitTimeout + snd.initialized = 0; + + // Pause and close audio - SDL_CloseAudio waits for callback to complete SDL_PauseAudio(1); SDL_CloseAudio(); + // Destroy synchronization primitives (safe now that callback has stopped) + if (snd.cond) { + SDL_DestroyCond(snd.cond); + snd.cond = NULL; + } + if (snd.mutex) { + SDL_DestroyMutex(snd.mutex); + snd.mutex = NULL; + } + if (snd.buffer) { free(snd.buffer); snd.buffer = NULL; @@ -2324,13 +2364,16 @@ void SND_setMinLatency(unsigned latency_ms) { LOG_info("SET_MINIMUM_AUDIO_LATENCY: %ums - resizing buffer from %zu to %zu samples", latency_ms, snd.frame_count, required_samples); - SDL_LockAudio(); + if (!snd.mutex) + return; + + SDL_LockMutex(snd.mutex); size_t buffer_bytes = required_samples * sizeof(SND_Frame); void* new_buffer = realloc(snd.buffer, buffer_bytes); if (!new_buffer) { LOG_error("Failed to allocate audio buffer (%zu bytes)", buffer_bytes); - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); return; } snd.buffer = new_buffer; @@ -2342,7 +2385,7 @@ void SND_setMinLatency(unsigned latency_ms) { snd.frame_out = 0; snd.frame_filled = snd.frame_count - 1; - SDL_UnlockAudio(); + SDL_UnlockMutex(snd.mutex); } /** diff --git a/workspace/all/common/api.h b/workspace/all/common/api.h index f41095a1..11ecfbcf 100644 --- a/workspace/all/common/api.h +++ b/workspace/all/common/api.h @@ -850,7 +850,7 @@ void SND_resetUnderrunCount(void); /** * Signals start of a new video frame for audio rate control. - * Call once per frame before core.run() to limit integral updates. + * Currently a no-op (pure proportional control). Kept for API compatibility. */ void SND_newFrame(void); @@ -912,14 +912,11 @@ typedef struct { uint64_t samples_consumed; // Total samples consumed by audio callback uint64_t samples_requested; // Total samples requested by SDL callback - // Rate control parameters (PI controller based on Arntzen algorithm) + // Rate control parameters (proportional control based on Arntzen algorithm) float frame_rate; // Core frame rate (e.g., 60.0988) float rate_adjust; // Dynamic rate control adjustment (1.0 ± d) float total_adjust; // Same as rate_adjust (no separate corrections) - float rate_integral; // PI controller integral term (drift correction) float rate_control_d; // Proportional gain - float rate_control_ki; // Integral gain - float error_avg; // Smoothed error (for debugging integral behavior) // Resampler state int sample_rate_in; // Input sample rate (from core) diff --git a/workspace/all/common/cpu.h b/workspace/all/common/cpu.h index 93c5b831..ae47b719 100644 --- a/workspace/all/common/cpu.h +++ b/workspace/all/common/cpu.h @@ -55,9 +55,9 @@ #define CPU_PANIC_GRACE_FRAMES 60 // Frames to ignore underruns after freq change (~1s at 60fps) #define CPU_PANIC_GRACE_MAX_UNDERRUNS 5 // Max underruns during grace before panic anyway #define CPU_STABILITY_DECAY_WINDOWS 8 // Stable windows before decaying panic counts (~4s) -#define CPU_AUDIO_CLOCK_REDUCE_WINDOWS \ - 8 // Windows before reduce in audio-clock mode (~4s) \ - // Higher than normal (4) since util metrics are unreliable +// Windows before reduce in audio-clock mode (~4s) +// Higher than normal (4) since util metrics are unreliable +#define CPU_AUDIO_CLOCK_REDUCE_WINDOWS 8 #define CPU_DEFAULT_MIN_BUFFER_FOR_REDUCE 40 // Min audio buffer % to allow reduce /** diff --git a/workspace/all/common/defines.h b/workspace/all/common/defines.h index 897a1444..e6960b58 100644 --- a/workspace/all/common/defines.h +++ b/workspace/all/common/defines.h @@ -371,23 +371,25 @@ /** * Audio ring buffer size in samples (stereo frames). - * Controls how much audio is buffered ahead (~85ms at 48kHz with 4096 samples). - * Lower values reduce latency, higher values prevent underruns on slow devices. + * Controls how much audio is buffered ahead (~133ms at 48kHz, ~8 video frames at 60fps). + * Matches RetroArch's 128ms default for handheld devices. + * Provides headroom for CPU frequency scaling and timing variance. * Platforms can override this in platform.h if needed. */ #ifndef SND_BUFFER_SAMPLES -#define SND_BUFFER_SAMPLES 4096 +#define SND_BUFFER_SAMPLES 6400 #endif /** * Rate control proportional gain (d parameter from Arntzen paper). * Controls maximum pitch deviation for buffer level compensation. * Higher values = more aggressive correction, faster response to jitter. - * Paper recommends 0.2-0.5%, but handhelds need 1.0-1.5% due to timing variance. + * Paper recommends 0.2-0.5%, handhelds typically use 0.5-1.0%. + * Set to 0.8% - gentler than previous 1.2% since larger buffer provides more headroom. * Platforms can override this in platform.h if needed. */ #ifndef SND_RATE_CONTROL_D -#define SND_RATE_CONTROL_D 0.012f +#define SND_RATE_CONTROL_D 0.008f #endif /////////////////////////////// diff --git a/workspace/all/common/gl_video.c b/workspace/all/common/gl_video.c index e26e4936..55a5cff4 100644 --- a/workspace/all/common/gl_video.c +++ b/workspace/all/common/gl_video.c @@ -1974,6 +1974,20 @@ void GLVideo_swapBuffers(void) { SDL_GL_SwapWindow(window); } +int GLVideo_setVsync(int enabled) { + if (!gl_state.context_ready) { + return -1; + } + + int result = SDL_GL_SetSwapInterval(enabled ? 1 : 0); + if (result == 0) { + LOG_info("GL video: vsync %s", enabled ? "enabled" : "disabled"); + } else { + LOG_warn("GL video: failed to set vsync: %s", SDL_GetError()); + } + return result; +} + void GLVideo_clear(void) { if (!gl_state.context_ready) { return; diff --git a/workspace/all/common/gl_video.h b/workspace/all/common/gl_video.h index 7be3e112..991835ae 100644 --- a/workspace/all/common/gl_video.h +++ b/workspace/all/common/gl_video.h @@ -284,6 +284,18 @@ void GLVideo_presentSurface(SDL_Surface* surface); */ void GLVideo_swapBuffers(void); +/** + * Set vsync mode for the GL context. + * + * In audio-clock mode, vsync should be disabled so audio blocking + * is the sole timing source. In vsync mode, vsync should be enabled + * for tear-free rendering. + * + * @param enabled 1 to enable vsync, 0 to disable + * @return 0 on success, -1 on failure + */ +int GLVideo_setVsync(int enabled); + /** * Clear the screen to black. * @@ -462,6 +474,11 @@ static inline void GLVideo_presentSurface(SDL_Surface* surface) { static inline void GLVideo_swapBuffers(void) {} +static inline int GLVideo_setVsync(int enabled) { + (void)enabled; + return -1; +} + static inline void GLVideo_clear(void) {} static inline void GLVideo_renderHUD(const uint32_t* pixels, int width, int height, int screen_w, diff --git a/workspace/all/player/player.c b/workspace/all/player/player.c index 429bd0dd..7706c9c8 100644 --- a/workspace/all/player/player.c +++ b/workspace/all/player/player.c @@ -1355,10 +1355,18 @@ static void updateAutoCPU(void) { // Underrun detected - track panic and boost unsigned audio_fill = SND_getBufferOccupancy(); - // Track panic at current frequency (for failsafe blocking). - // If a frequency can't keep up, all lower frequencies are also blocked - // because lower freq = less CPU throughput = guaranteed worse performance. - if (auto_cpu_state.use_granular && current_idx >= 0 && current_idx < CPU_MAX_FREQUENCIES) { + // Track panic at current state/frequency (for failsafe blocking). + // If a state can't keep up, it gets blocked after CPU_PANIC_THRESHOLD panics. + if (auto_cpu_state.use_topology && current_state >= 0 && + current_state < CPU_MAX_FREQUENCIES) { + auto_cpu_state.panic_count[current_state]++; + + if (auto_cpu_state.panic_count[current_state] >= CPU_PANIC_THRESHOLD) { + LOG_warn("Auto CPU: BLOCKING state %d after %d panics (audio=%u%%)\n", + current_state, auto_cpu_state.panic_count[current_state], audio_fill); + } + } else if (auto_cpu_state.use_granular && current_idx >= 0 && + current_idx < CPU_MAX_FREQUENCIES) { auto_cpu_state.panic_count[current_idx]++; if (auto_cpu_state.panic_count[current_idx] >= CPU_PANIC_THRESHOLD) { @@ -1389,6 +1397,16 @@ static void updateAutoCPU(void) { auto_cpu_state.frequencies[current_idx], auto_cpu_state.frequencies[new_idx], audio_fill); } else { + // Fallback mode - track panic at current level + if (current_level >= 0 && current_level < 3) { + auto_cpu_state.panic_count[current_level]++; + + if (auto_cpu_state.panic_count[current_level] >= CPU_PANIC_THRESHOLD) { + LOG_warn("Auto CPU: BLOCKING level %d after %d panics (audio=%u%%)\n", + current_level, auto_cpu_state.panic_count[current_level], audio_fill); + } + } + int new_level = current_level + auto_cpu_config.panic_step_up; if (new_level > 2) new_level = 2; @@ -1452,13 +1470,42 @@ static void updateAutoCPU(void) { if (in_audio_clock) { // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // We use time-based probing: after stability period, try reducing. + // If it causes underruns, panic path will boost back. + // + // Buffer level guides timing: + // - 40-75%: Normal range, reduce after 8 windows (~4s) + // - >75%: High buffer (Hz/fps mismatch), reduce after 16 windows (~8s) + // Longer delay because high buffer means timing is pathological + // - <40%: Don't reduce (need headroom for transition) auto_cpu_state.low_util_windows++; - if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && - auto_cpu_state.panic_cooldown == 0 && current_state > 0) { + unsigned audio_fill = SND_getBufferOccupancy(); + + // Determine required stability windows based on buffer level + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; // 8 windows (~4s) + if (audio_fill > 75) { + // High buffer = pathological timing, be more conservative + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; // 16 windows (~8s) + } + + bool buffer_ok = (audio_fill >= 40); // Lower bound only - need headroom + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_state > 0) { int new_state = current_state - 1; - auto_cpu_setTargetState(new_state); - auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE state %d→%d (AC mode)\n", current_state, new_state); + // Skip blocked states (too many panics at that state) + while (new_state >= 0 && + auto_cpu_state.panic_count[new_state] >= CPU_PANIC_THRESHOLD) { + new_state--; + } + if (new_state >= 0) { + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE state %d→%d (AC mode, buf=%u%%, wait=%d)\n", + current_state, new_state, audio_fill, required_windows); + } else { + // All lower states blocked, just reset counter + auto_cpu_state.low_util_windows = 0; + } } } else if (util > auto_cpu_config.util_high) { // Need more performance - step up @@ -1490,11 +1537,21 @@ static void updateAutoCPU(void) { int new_state = current_state - auto_cpu_config.max_step_down; if (new_state < 0) new_state = 0; - auto_cpu_setTargetState(new_state); - auto_cpu_state.low_util_windows = 0; - // No grace period on reduce - if we underrun, frequency is too slow - LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, - new_state, util); + // Skip blocked states (too many panics at that state) + while (new_state >= 0 && + auto_cpu_state.panic_count[new_state] >= CPU_PANIC_THRESHOLD) { + new_state--; + } + if (new_state >= 0) { + auto_cpu_setTargetState(new_state); + auto_cpu_state.low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE state %d→%d (util=%u%%)\n", current_state, + new_state, util); + } else { + // All lower states blocked + auto_cpu_state.low_util_windows = 0; + } } } else { // In sweet spot - reset counters @@ -1507,9 +1564,8 @@ static void updateAutoCPU(void) { if (++debug_window_count_topo >= 4) { debug_window_count_topo = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% state=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_state, - max_state); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% state=%d/%d\n", snap.fill_pct, + snap.total_adjust, util, current_state, max_state); } } else if (auto_cpu_state.use_granular) { // Granular mode: step through available frequencies one at a time @@ -1527,9 +1583,19 @@ static void updateAutoCPU(void) { if (in_audio_clock) { // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // Use time-based probing with buffer-guided timing. auto_cpu_state.low_util_windows++; - if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && - auto_cpu_state.panic_cooldown == 0 && current_idx > 0) { + unsigned audio_fill = SND_getBufferOccupancy(); + + // High buffer = pathological timing, wait longer before probing + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; + if (audio_fill > 75) { + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; + } + + bool buffer_ok = (audio_fill >= 40); + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_idx > 0) { int new_idx = current_idx - 1; // Skip blocked frequencies while (new_idx >= 0 && @@ -1540,7 +1606,8 @@ static void updateAutoCPU(void) { int new_freq = auto_cpu_state.frequencies[new_idx]; auto_cpu_setTargetIndex(new_idx); auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE %d→%d kHz (AC mode)\n", current_freq, new_freq); + LOG_debug("Auto CPU: REDUCE %d→%d kHz (AC mode, buf=%u%%, wait=%d)\n", + current_freq, new_freq, audio_fill, required_windows); } else { auto_cpu_state.low_util_windows = 0; } @@ -1604,10 +1671,9 @@ static void updateAutoCPU(void) { if (++debug_window_count >= 4) { debug_window_count = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% freq=%dkHz " - "idx=%d/%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, current_freq, - current_idx, max_idx); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% freq=%dkHz idx=%d/%d\n", + snap.fill_pct, snap.total_adjust, util, current_freq, current_idx, + max_idx); } } else { // Fallback mode: 3-level scaling (original algorithm) @@ -1622,13 +1688,33 @@ static void updateAutoCPU(void) { if (in_audio_clock) { // Audio Clock: time-based reduction (util is unreliable due to blocking audio) + // Use time-based probing with buffer-guided timing. auto_cpu_state.low_util_windows++; - if (auto_cpu_state.low_util_windows >= CPU_AUDIO_CLOCK_REDUCE_WINDOWS && - auto_cpu_state.panic_cooldown == 0 && current_level > 0) { + unsigned audio_fill = SND_getBufferOccupancy(); + + // High buffer = pathological timing, wait longer before probing + int required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS; + if (audio_fill > 75) { + required_windows = CPU_AUDIO_CLOCK_REDUCE_WINDOWS * 2; + } + + bool buffer_ok = (audio_fill >= 40); + if (auto_cpu_state.low_util_windows >= required_windows && + auto_cpu_state.panic_cooldown == 0 && buffer_ok && current_level > 0) { int new_level = current_level - 1; - auto_cpu_setTargetLevel(new_level); - auto_cpu_state.low_util_windows = 0; - LOG_debug("Auto CPU: REDUCE level %d (AC mode)\n", new_level); + // Skip blocked levels + while (new_level >= 0 && + auto_cpu_state.panic_count[new_level] >= CPU_PANIC_THRESHOLD) { + new_level--; + } + if (new_level >= 0) { + auto_cpu_setTargetLevel(new_level); + auto_cpu_state.low_util_windows = 0; + LOG_debug("Auto CPU: REDUCE level %d (AC mode, buf=%u%%, wait=%d)\n", + new_level, audio_fill, required_windows); + } else { + auto_cpu_state.low_util_windows = 0; + } } } else if (util > auto_cpu_config.util_high) { auto_cpu_state.high_util_windows++; @@ -1646,9 +1732,8 @@ static void updateAutoCPU(void) { if (++debug_window_count_fallback >= 4) { debug_window_count_fallback = 0; SND_Snapshot snap = SND_getSnapshot(); - LOG_debug("Auto CPU: fill=%u%% int=%.4f adj=%.4f util=%u%% level=%d\n", - snap.fill_pct, snap.rate_integral, snap.total_adjust, util, - current_level); + LOG_debug("Auto CPU: fill=%u%% adj=%.4f util=%u%% level=%d\n", snap.fill_pct, + snap.total_adjust, util, current_level); } // Boost if sustained high utilization @@ -1666,10 +1751,20 @@ static void updateAutoCPU(void) { if (auto_cpu_state.low_util_windows >= auto_cpu_config.reduce_windows && auto_cpu_state.panic_cooldown == 0 && current_level > 0) { int new_level = current_level - 1; - auto_cpu_setTargetLevel(new_level); - auto_cpu_state.low_util_windows = 0; - // No grace period on reduce - if we underrun, frequency is too slow - LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); + // Skip blocked levels + while (new_level >= 0 && + auto_cpu_state.panic_count[new_level] >= CPU_PANIC_THRESHOLD) { + new_level--; + } + if (new_level >= 0) { + auto_cpu_setTargetLevel(new_level); + auto_cpu_state.low_util_windows = 0; + // No grace period on reduce - if we underrun, frequency is too slow + LOG_debug("Auto CPU: REDUCE level %d (util=%u%%)\n", new_level, util); + } else { + // All lower levels blocked + auto_cpu_state.low_util_windows = 0; + } } } @@ -1677,13 +1772,30 @@ static void updateAutoCPU(void) { // If we reached here, no panic happened during this window auto_cpu_state.stability_streak++; if (auto_cpu_state.stability_streak >= CPU_STABILITY_DECAY_WINDOWS) { - // Earned stability - decay panic counts for current freq and above only - // Being stable at 600MHz proves 800/1000/1200 are fine too, but not 400MHz + // Earned stability - decay panic counts for current state/freq and above only + // Being stable at a state proves higher states are fine too, but not lower ones int decayed = 0; - for (int i = current_idx; i < auto_cpu_state.freq_count; i++) { - if (auto_cpu_state.panic_count[i] > 0) { - auto_cpu_state.panic_count[i]--; - decayed++; + if (auto_cpu_state.use_topology) { + for (int i = current_state; i < auto_cpu_state.topology.state_count; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + } else if (auto_cpu_state.use_granular) { + for (int i = current_idx; i < auto_cpu_state.freq_count; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } + } + } else { + // Fallback mode: decay for current level and above + for (int i = current_level; i < 3; i++) { + if (auto_cpu_state.panic_count[i] > 0) { + auto_cpu_state.panic_count[i]--; + decayed++; + } } } if (decayed > 0) { @@ -6258,8 +6370,14 @@ static void run_main_loop(void) { SyncManager_init(&sync_manager, core.fps, display_hz); SND_setSyncCallbacks(sync_shouldUseRateControl, sync_shouldBlockAudio); - LOG_info("Starting main loop: %.2ffps @ %.1fHz (mode: %s)\n", core.fps, display_hz, - SyncManager_getModeName(SyncManager_getMode(&sync_manager))); + // Set vsync based on sync mode: + // - Audio-clock mode: disable vsync so audio blocking is the sole timing source + // - Vsync mode: enable vsync for tear-free rendering with display-driven timing + bool use_vsync = (SyncManager_getMode(&sync_manager) != SYNC_MODE_AUDIO_CLOCK); + GLVideo_setVsync(use_vsync ? 1 : 0); + + LOG_info("Starting main loop: %.2ffps @ %.1fHz (mode: %s, vsync=%s)\n", core.fps, display_hz, + SyncManager_getModeName(SyncManager_getMode(&sync_manager)), use_vsync ? "on" : "off"); PWR_warn(1); PWR_disableAutosleep(); @@ -6336,6 +6454,17 @@ static void run_main_loop(void) { SyncManager_recordVsync(&sync_manager); + // Update vsync if sync mode changed (e.g., audio-clock → vsync transition) + { + static SyncMode prev_mode = SYNC_MODE_AUDIO_CLOCK; + SyncMode curr_mode = SyncManager_getMode(&sync_manager); + if (curr_mode != prev_mode) { + int vsync_enabled = (curr_mode != SYNC_MODE_AUDIO_CLOCK) ? 1 : 0; + GLVideo_setVsync(vsync_enabled); + prev_mode = curr_mode; + } + } + limitFF(); trackFPS(); updateAutoCPU(); diff --git a/workspace/all/player/sync_manager.c b/workspace/all/player/sync_manager.c index 589cc873..1a7ba381 100644 --- a/workspace/all/player/sync_manager.c +++ b/workspace/all/player/sync_manager.c @@ -6,47 +6,45 @@ #include "log.h" #include "utils.h" // getMicroseconds #include +#include -// Number of vsync samples before measurement is considered stable -// 120 samples (~2s at 60Hz): Long enough for EMA to converge and filter -// initial jitter, short enough that users don't notice startup delay -#define SYNC_WARMUP_SAMPLES 120 +// Minimum samples before checking stability +// 60 samples (~1s at 60Hz): Need enough for meaningful stddev +#define SYNC_MIN_SAMPLES 60 + +// Maximum samples before giving up on convergence +// 1800 samples (~30s at 60Hz): If not stable by then, display is unstable +#define SYNC_MAX_SAMPLES 1800 + +// Stability threshold (stddev/mean ratio) +// 1% relative deviation indicates stable measurement +#define SYNC_STABILITY_THRESHOLD 0.01 + +// Progress logging interval (DEBUG only) +// Log every 60 samples to show convergence progress +#define SYNC_LOG_INTERVAL 60 // Check for drift every 300 frames (~5 seconds at 60fps) -// Balance between responsiveness to actual drift and avoiding false positives -// from temporary frame drops or CPU frequency transitions #define SYNC_DRIFT_CHECK_INTERVAL 300 // Tolerance for mode selection (1% mismatch) -// Based on RetroArch research (Arntzen, 2012): -// - Audio pitch changes ≤0.5% are inaudible to most listeners -// - Beyond ~1% mismatch, frame pacing or audio-clock should be used -// Using 1% as a conservative threshold for mode switching #define SYNC_MODE_TOLERANCE 0.01 -// Exponential moving average smoothing factor -// α=0.01 gives ~100-sample half-life: filters frame-drop spikes while -// still tracking genuine Hz drift within ~2 seconds. Lower would be more -// stable but slower to detect drift; higher would be noisier. -#define SYNC_EMA_ALPHA 0.01 - // Outlier rejection bounds (50-120 Hz) #define SYNC_MIN_HZ 50.0 #define SYNC_MAX_HZ 120.0 void SyncManager_init(SyncManager* manager, double game_fps, double display_hz) { + memset(manager, 0, sizeof(SyncManager)); + // Start in AUDIO_CLOCK mode (safe default) manager->mode = SYNC_MODE_AUDIO_CLOCK; manager->game_fps = game_fps; manager->display_hz = (display_hz > 0.0) ? display_hz : 60.0; - manager->measured_hz = 0.0; - manager->measurement_samples = 0; - manager->measurement_stable = false; - manager->last_drift_check = 0; - manager->last_vsync_time = 0; LOG_info("Sync: Starting in %s mode (%.2ffps @ %.1fHz reported)", SyncManager_getModeName(manager->mode), manager->game_fps, manager->display_hz); + LOG_info("Sync: Measuring vsync timing..."); } void SyncManager_recordVsync(SyncManager* manager) { @@ -58,41 +56,105 @@ void SyncManager_recordVsync(SyncManager* manager) { return; } - // Calculate interval and Hz - double interval = (double)(now - manager->last_vsync_time) / 1000000.0; + // Calculate frame interval + uint64_t interval = now - manager->last_vsync_time; + manager->last_vsync_time = now; - // Protect against division by zero (identical timestamps) - if (interval <= 0.0) { - manager->last_vsync_time = now; + // Reject zero intervals (duplicate timestamps) + if (interval == 0) { return; } - double hz = 1.0 / interval; - - // Reject outliers (frame drops, fast presents) + // Reject outliers based on Hz (frame drops, fast presents) + double hz = 1000000.0 / (double)interval; if (hz < SYNC_MIN_HZ || hz > SYNC_MAX_HZ) { - manager->last_vsync_time = now; return; } - // Update measured Hz using exponential moving average - if (manager->measured_hz == 0.0) { - manager->measured_hz = hz; // First valid sample - } else { - manager->measured_hz = manager->measured_hz * (1.0 - SYNC_EMA_ALPHA) + hz * SYNC_EMA_ALPHA; + // Store interval in circular buffer + manager->frame_intervals[manager->write_index] = interval; + manager->write_index = (manager->write_index + 1) % SYNC_SAMPLE_BUFFER_SIZE; + manager->sample_count++; + + // Skip measurement logic if already stable + if (manager->measurement_stable) { + // Monitor for drift in vsync mode + if (manager->mode == SYNC_MODE_VSYNC) { + manager->last_drift_check++; + + if (manager->last_drift_check >= SYNC_DRIFT_CHECK_INTERVAL) { + manager->last_drift_check = 0; + + // Recalculate current Hz from buffer + int samples = (manager->sample_count < SYNC_SAMPLE_BUFFER_SIZE) + ? manager->sample_count + : SYNC_SAMPLE_BUFFER_SIZE; + uint64_t sum = 0; + for (int i = 0; i < samples; i++) { + sum += manager->frame_intervals[i]; + } + double mean = (double)sum / samples; + double current_hz = 1000000.0 / mean; + + // Check if drifted beyond tolerance + double mismatch = fabs(current_hz - manager->game_fps) / manager->game_fps; + if (mismatch >= SYNC_MODE_TOLERANCE) { + LOG_info("Sync: Drift detected! %.3fHz now differs by %.2f%% from %.2ffps", + current_hz, mismatch * 100.0, manager->game_fps); + LOG_info("Sync: Switching to %s mode (fallback for unstable display)", + SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); + manager->mode = SYNC_MODE_AUDIO_CLOCK; + } + } + } + return; + } + + // Check for convergence (need minimum samples first) + if (manager->sample_count < SYNC_MIN_SAMPLES) { + return; } - manager->measurement_samples++; - manager->last_vsync_time = now; + // Calculate statistics from circular buffer + int samples = (manager->sample_count < SYNC_SAMPLE_BUFFER_SIZE) ? manager->sample_count + : SYNC_SAMPLE_BUFFER_SIZE; - // Check if measurement just became stable - if (!manager->measurement_stable && manager->measurement_samples >= SYNC_WARMUP_SAMPLES) { + // Calculate mean + uint64_t sum = 0; + for (int i = 0; i < samples; i++) { + sum += manager->frame_intervals[i]; + } + double mean = (double)sum / samples; + + // Calculate standard deviation + double variance_sum = 0.0; + for (int i = 0; i < samples; i++) { + double diff = (double)manager->frame_intervals[i] - mean; + variance_sum += diff * diff; + } + double stddev = sqrt(variance_sum / (samples - 1)); + + // Calculate confidence (relative stddev) + double confidence = stddev / mean; + double measured_hz = 1000000.0 / mean; + + // Progress logging (DEBUG only) + if (manager->sample_count % SYNC_LOG_INTERVAL == 0) { + LOG_debug("Sync: %d samples, mean=%.3fHz, confidence=%.3f%% (%s)", manager->sample_count, + measured_hz, confidence * 100.0, + confidence < SYNC_STABILITY_THRESHOLD ? "STABLE" : "measuring..."); + } + + // Check for stability + if (confidence < SYNC_STABILITY_THRESHOLD) { + // Measurement converged! manager->measurement_stable = true; + manager->measured_hz = measured_hz; + manager->measurement_confidence = confidence; - LOG_info( - "Sync: Measurement stable after %d samples: %.3fHz (reported: %.1fHz, diff: %.2f%%)", - manager->measurement_samples, manager->measured_hz, manager->display_hz, - fabs(manager->measured_hz - manager->display_hz) / manager->display_hz * 100.0); + LOG_info("Sync: Measurement stable after %d samples: %.3fHz ± %.2f%%", + manager->sample_count, manager->measured_hz, + manager->measurement_confidence * 100.0); // Try switching to vsync mode if compatible double mismatch = fabs(manager->measured_hz - manager->game_fps) / manager->game_fps; @@ -106,25 +168,20 @@ void SyncManager_recordVsync(SyncManager* manager) { SyncManager_getModeName(manager->mode), manager->measured_hz, mismatch * 100.0, manager->game_fps); } + + return; } - // Monitor for drift in vsync mode (check every 5 seconds) - if (manager->measurement_stable && manager->mode == SYNC_MODE_VSYNC) { - manager->last_drift_check++; - - if (manager->last_drift_check >= SYNC_DRIFT_CHECK_INTERVAL) { - manager->last_drift_check = 0; - - // Check if measured Hz has drifted beyond tolerance - double mismatch = fabs(manager->measured_hz - manager->game_fps) / manager->game_fps; - if (mismatch >= SYNC_MODE_TOLERANCE) { - LOG_info("Sync: Drift detected! %.3fHz now differs by %.2f%% from %.2ffps", - manager->measured_hz, mismatch * 100.0, manager->game_fps); - LOG_info("Sync: Switching to %s mode (fallback for unstable display)", - SyncManager_getModeName(SYNC_MODE_AUDIO_CLOCK)); - manager->mode = SYNC_MODE_AUDIO_CLOCK; - } - } + // Timeout: give up if not stable after max samples + if (manager->sample_count >= SYNC_MAX_SAMPLES) { + manager->measurement_stable = true; // Stop trying + manager->measured_hz = measured_hz; + manager->measurement_confidence = confidence; + + LOG_info( + "Sync: Measurement unstable after %d samples (confidence %.2f%% > 1%%), staying in %s " + "mode", + manager->sample_count, confidence * 100.0, SyncManager_getModeName(manager->mode)); } } @@ -151,9 +208,10 @@ const char* SyncManager_getModeName(SyncMode mode) { } bool SyncManager_shouldUseRateControl(const SyncManager* manager) { - // Only use rate control in vsync mode - // Audio clock mode uses blocking writes for timing - return manager->mode == SYNC_MODE_VSYNC; + // Both modes use rate control (±0.8%) as buffer health mechanism + // This handles timing variations when true blocking can't provide pacing + (void)manager; + return true; } bool SyncManager_shouldBlockAudio(const SyncManager* manager) { diff --git a/workspace/all/player/sync_manager.h b/workspace/all/player/sync_manager.h index 862bb2a8..f826639c 100644 --- a/workspace/all/player/sync_manager.h +++ b/workspace/all/player/sync_manager.h @@ -63,6 +63,10 @@ typedef enum { SYNC_MODE_VSYNC } SyncMode; +// Vsync measurement circular buffer size +// 512 samples (~8 seconds at 60fps) provides good rolling window +#define SYNC_SAMPLE_BUFFER_SIZE 512 + /** * Sync manager state. */ @@ -70,9 +74,16 @@ typedef struct { SyncMode mode; // Current sync mode double game_fps; // Game target fps (e.g., 60.0, 59.94) double display_hz; // Reported display Hz from SDL - double measured_hz; // Actual measured Hz from vsync timing - int measurement_samples; // Number of vsync measurements collected - bool measurement_stable; // True after enough samples collected + + // Vsync timing measurement (circular buffer) + uint64_t frame_intervals[SYNC_SAMPLE_BUFFER_SIZE]; // Frame time deltas in microseconds + int sample_count; // Total samples collected (may exceed buffer size) + int write_index; // Next write position in circular buffer + + bool measurement_stable; // True when stddev converged + double measured_hz; // Calculated from buffer mean (valid when stable) + double measurement_confidence; // Stddev/mean ratio (lower = better) + uint32_t last_drift_check; // Frames since last drift check (resets at interval) uint64_t last_vsync_time; // Microsecond timestamp of last vsync } SyncManager; @@ -95,8 +106,14 @@ void SyncManager_init(SyncManager* manager, double game_fps, double display_hz); * Call this immediately after GFX_present() returns. * Measures actual display refresh rate and switches modes when appropriate. * + * Measurement approach: + * - Continuously collects frame intervals in circular buffer + * - Calculates mean and stddev from buffer samples + * - Measurement stable when: samples >= 60 AND stddev/mean < 1% + * - Gives up after 1800 samples (~30s) if never converges + * * Mode transitions: - * - AUDIO_CLOCK → VSYNC: After 120 samples if mismatch < 1% + * - AUDIO_CLOCK → VSYNC: When measurement stable and mismatch < 1% * - VSYNC → AUDIO_CLOCK: If drift > 1% detected * * @param manager Manager state to update @@ -135,11 +152,12 @@ const char* SyncManager_getModeName(SyncMode mode); /** * Check if audio rate control should be active. * - * AUDIO_CLOCK: No rate control (blocking writes handle timing) - * VSYNC: Yes (light rate control for ±0.5% adjustment) + * Both modes use rate control (±0.8%) as a buffer health mechanism. + * This handles timing variations when true blocking can't provide pacing + * (e.g., platform can't disable vsync, loop runs at display Hz). * * @param manager Manager state - * @return true if audio rate control should run + * @return true if audio rate control should run (always true) */ bool SyncManager_shouldUseRateControl(const SyncManager* manager); From 20212876857f3e7a6716abdf34b946590e9fa203 Mon Sep 17 00:00:00 2001 From: Nick Chapman Date: Wed, 7 Jan 2026 21:12:41 -0800 Subject: [PATCH 11/11] Minor clean up. --- workspace/all/common/api.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/workspace/all/common/api.c b/workspace/all/common/api.c index 06fddce4..5c568bac 100644 --- a/workspace/all/common/api.c +++ b/workspace/all/common/api.c @@ -3438,7 +3438,11 @@ int PWR_setCPUFrequency_sysfs(int freq_khz) { static int compare_cluster_by_max_khz(const void* a, const void* b) { const CPUCluster* ca = (const CPUCluster*)a; const CPUCluster* cb = (const CPUCluster*)b; - return ca->max_khz - cb->max_khz; + if (ca->max_khz < cb->max_khz) + return -1; + if (ca->max_khz > cb->max_khz) + return 1; + return 0; } /** @@ -3538,10 +3542,13 @@ static int parse_related_cpus(const char* path, int* cpu_mask, int* cpu_count) { ptr++; } - // Add CPUs to mask + // Add CPUs to mask (check for duplicates) for (int cpu = start; cpu <= end && cpu < 32; cpu++) { - *cpu_mask |= (1 << cpu); - (*cpu_count)++; + int bit = 1 << cpu; + if (!(*cpu_mask & bit)) { + *cpu_mask |= bit; + (*cpu_count)++; + } } // Skip comma if present