From c0622aff9fca362e10a7d604e3490184d0cca0e6 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 12 Jan 2026 17:17:37 +0100 Subject: [PATCH 1/2] Generate a new gprof with CPP sample --- src/samples/gprof/basic/Makefile.sample | 5 +- src/samples/gprof/basic_cpp/Makefile.sample | 20 ++++ src/samples/gprof/basic_cpp/README.md | 72 +++++++++++++ src/samples/gprof/basic_cpp/main.cpp | 107 ++++++++++++++++++++ 4 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 src/samples/gprof/basic_cpp/Makefile.sample create mode 100644 src/samples/gprof/basic_cpp/README.md create mode 100644 src/samples/gprof/basic_cpp/main.cpp diff --git a/src/samples/gprof/basic/Makefile.sample b/src/samples/gprof/basic/Makefile.sample index 54e8ff026b..c597da1eb6 100644 --- a/src/samples/gprof/basic/Makefile.sample +++ b/src/samples/gprof/basic/Makefile.sample @@ -1,7 +1,7 @@ TARGET = gprofbasic OBJS = main.o -INCDIR = +INCDIR = CFLAGS = -O2 -Wall -pg -g CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti ASFLAGS = $(CFLAGS) @@ -9,6 +9,9 @@ ASFLAGS = $(CFLAGS) LIBDIR = LDFLAGS = -pg -g +BUILD_PRX = 1 +PSP_FW_VERSION = 600 + EXTRA_TARGETS = EBOOT.PBP PSP_EBOOT_TITLE = GProf Basic Example diff --git a/src/samples/gprof/basic_cpp/Makefile.sample b/src/samples/gprof/basic_cpp/Makefile.sample new file mode 100644 index 0000000000..5628bebd7f --- /dev/null +++ b/src/samples/gprof/basic_cpp/Makefile.sample @@ -0,0 +1,20 @@ +TARGET = gprofbasic_cpp +OBJS = main.o + +INCDIR = +CFLAGS = -O2 -Wall -pg -g +CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti +ASFLAGS = $(CFLAGS) + +LIBDIR = +LDFLAGS = -pg -g +LIBS = -lstdc++ + +BUILD_PRX = 1 +PSP_FW_VERSION = 600 + +EXTRA_TARGETS = EBOOT.PBP +PSP_EBOOT_TITLE = GProf Basic C++ Example + +PSPSDK=$(shell psp-config --pspsdk-path) +include $(PSPSDK)/lib/build.mak diff --git a/src/samples/gprof/basic_cpp/README.md b/src/samples/gprof/basic_cpp/README.md new file mode 100644 index 0000000000..5d97af9b8b --- /dev/null +++ b/src/samples/gprof/basic_cpp/README.md @@ -0,0 +1,72 @@ +Sample program to show how to use the `gprof` feature with C++. + +The requirements are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things work out of the box. For C++ programs, you also need to link with `-lstdc++`. + +This example demonstrates: +- Object-oriented programming with a `PrimeCalculator` class +- C++ standard library features (`std::vector`, `std::cout`) +- Profiling C++ code with gprof + +Firstly execute your program, then once program ends it will automatically generate a `gmon.out` file at CWD level. + +In order to inspect the content of the generated file you need to use the `psp-gprof` binary. + +For instance, following the next syntax: +``` +psp-gprof -b {binary.elf} gmon.out +``` + +like: +``` +psp-gprof -b gprofbasic_cpp.elf gmon.out +``` + + +It will show something like: +``` +Flat profile: + +Each sample counts as 0.001 seconds. + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 95.98 0.17 0.17 104728 0.00 0.00 PrimeCalculator::isPrime(int) const + 4.02 0.17 0.01 1 7.00 7.00 dummyFunction() + 0.00 0.17 0.00 1 0.00 174.00 main + 0.00 0.17 0.00 1 0.00 167.00 PrimeCalculator::sumOfSquareRoots(int) const + + + Call graph + + +granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds + +index % time self children called name + 0.00 0.17 1/1 _main [2] +[1] 100.0 0.00 0.17 1 main [1] + 0.00 0.17 1/1 PrimeCalculator::sumOfSquareRoots(int) const [4] + 0.01 0.00 1/1 dummyFunction() [5] +----------------------------------------------- + +[2] 100.0 0.00 0.17 _main [2] + 0.00 0.17 1/1 main [1] +----------------------------------------------- + 0.17 0.00 104728/104728 PrimeCalculator::sumOfSquareRoots(int) const [4] +[3] 96.0 0.17 0.00 104728 PrimeCalculator::isPrime(int) const [3] +----------------------------------------------- + 0.00 0.17 1/1 main [1] +[4] 96.0 0.00 0.17 1 PrimeCalculator::sumOfSquareRoots(int) const [4] + 0.17 0.00 104728/104728 PrimeCalculator::isPrime(int) const [3] +----------------------------------------------- + 0.01 0.00 1/1 main [1] +[5] 4.0 0.01 0.00 1 dummyFunction() [5] +----------------------------------------------- + + +Index by function name + + [5] dummyFunction() [1] main + [3] PrimeCalculator::isPrime(int) const + [4] PrimeCalculator::sumOfSquareRoots(int) const +``` + +Cheers diff --git a/src/samples/gprof/basic_cpp/main.cpp b/src/samples/gprof/basic_cpp/main.cpp new file mode 100644 index 0000000000..23fb79dcf1 --- /dev/null +++ b/src/samples/gprof/basic_cpp/main.cpp @@ -0,0 +1,107 @@ +/* + * PSP Software Development Kit - https://github.com/pspdev + * ----------------------------------------------------------------------- + * Licensed under the BSD license, see LICENSE in PSPSDK root for details. + * + * main.cpp - A basic C++ example for checking the GProf profiler. + * + * Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com + * + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +PSP_MODULE_INFO("GProf Basic C++ Example", 0, 1, 1); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); + +// Class to handle prime number operations +class PrimeCalculator +{ +public: + // Method to check if a number is prime + bool isPrime(int num) const + { + if (num <= 1) + return false; + if (num <= 3) + return true; + if (num % 2 == 0 || num % 3 == 0) + return false; + for (int i = 5; i * i <= num; i += 6) + { + if (num % i == 0 || num % (i + 2) == 0) + return false; + } + return true; + } + + // Method to compute the sum of square roots of the first N prime numbers + double sumOfSquareRoots(int N) const + { + int count = 0; + int num = 2; + double sum = 0.0; + + while (count < N) + { + if (isPrime(num)) + { + sum += std::sqrt(num); + count++; + } + num++; + } + return sum; + } + + // Method to get the first N prime numbers + std::vector getFirstNPrimes(int N) const + { + std::vector primes; + int num = 2; + + while (primes.size() < static_cast(N)) + { + if (isPrime(num)) + { + primes.push_back(num); + } + num++; + } + return primes; + } +}; + +void dummyFunction() +{ + for (int i = 0; i < 10000; i++) + { + std::cout << "."; + } + std::cout << std::endl; +} + +int main(int argc, char *argv[]) +{ + std::cout << "Hello from C++!" << std::endl; + + dummyFunction(); + + PrimeCalculator calculator; + int N = 10000; // Large number of primes to compute + + double sum = calculator.sumOfSquareRoots(N); + std::cout << "Sum of square roots of the first " << N + << " prime numbers is " << sum << std::endl; + + std::cout << "Goodbye from C++!" << std::endl; + + return 0; +} From 324c58030d86d6c1627a4328b45fe3e3ceeb8a28 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Fri, 9 Jan 2026 23:35:17 +0100 Subject: [PATCH 2/2] Fix gprof profiling to work with both PBP and PRX executables The profiling library was storing addresses that didn't match the ELF symbol table, causing psp-gprof to produce empty output. Changes: - Store ELF-compatible addresses in gmon.out instead of offset-based - Add PRX vs PBP detection using page alignment heuristic: - PRX: loaded at page-aligned addresses, linked at 0 - PBP: loaded at specific link address (not page-aligned) - Calculate relocation offset for PRX to convert runtime addresses back to link-time addresses that match ELF symbols - Update __mcount() and timer_handler() to use link-time addresses The fix ensures psp-gprof can correctly match profiling data with ELF symbols for both standard executables (PBP) and relocatable modules (PRX). tmp --- src/prof/prof.c | 81 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/src/prof/prof.c b/src/prof/prof.c index 8a42d76428..8e7574b91a 100644 --- a/src/prof/prof.c +++ b/src/prof/prof.c @@ -81,6 +81,17 @@ static int initialized = 0; extern int _ftext; extern int _etext; +/// _start is the entry point defined in both crt0.c and crt0_prx.c +extern int _start; + +/// module_start is only defined in PRX startup code (crt0_prx.c) as an alias for _start +/// Using weak reference allows us to detect PRX vs PBP at runtime +/// We also verify module_start == _start to handle the case where a PBP defines its own module_start +extern int module_start __attribute__((weak)); + +/// relocation offset: runtime_address - link_address (for PRX) +static unsigned int reloc_offset; + /* forward declarations */ __attribute__((__no_instrument_function__, __no_profile_instrument_function__)) void __gprof_cleanup(void); @@ -104,20 +115,44 @@ static void initialize() memset(&gp, '\0', sizeof(gp)); gp.state = GMON_PROF_ON; - /* For PRX modules, the code is relocated at load time. - &_ftext and &_etext give us runtime (relocated) addresses. - Link-time addresses start at 0 for PSP executables. - - Runtime: &_ftext = relocated_base, &_etext = relocated_base + text_size - Link-time: _ftext = 0, _etext = text_size - - So: lowpc_link = 0 - highpc_link = &_etext - &_ftext (which equals text_size) + /* Detect PRX vs PBP using the module_start symbol. + In crt0_prx.c, module_start is defined as an alias for _start, + so they share the same address. We check both conditions: + 1. module_start exists (not NULL) + 2. module_start and _start are at the same address + This handles edge cases where a PBP defines its own module_start function. + + For PRX (relocatable modules): + - Linked at address 0 (or small offset like 0x20 for C++) + - Relocated at load time to a PAGE-ALIGNED address (e.g., 0x08804000) + - reloc_offset = page-aligned load base + - link_addr = runtime_addr - reloc_offset + + For PBP (standard executables): + - Linked at address 0x08804000 + offset (typically _ftext = 0x0880403c) + - Loaded at same address (no relocation) + - reloc_offset = 0 + - link_addr = runtime_addr */ gp.lowpc = (unsigned int)&_ftext; gp.highpc = (unsigned int)&_etext; - gp.lowpc_link = 0; - gp.highpc_link = (unsigned int)&_etext - (unsigned int)&_ftext; + + unsigned int runtime_base = (unsigned int)&_ftext & 0x0FFFFFFF; + + if (&module_start != NULL && (void *)&module_start == (void *)&_start) { + /* PRX: code was relocated from near-zero to runtime address. + The load base is page-aligned, so we can compute it by masking + off the page offset. The link-time _ftext is the page offset. */ + unsigned int page_offset = runtime_base & 0xFFF; + reloc_offset = runtime_base - page_offset; /* Page-aligned load base */ + gp.lowpc_link = page_offset; /* Link-time _ftext */ + gp.highpc_link = (gp.highpc - gp.lowpc) + page_offset; + } else { + /* PBP: no relocation, addresses match ELF */ + reloc_offset = 0; + gp.lowpc_link = runtime_base; + gp.highpc_link = (unsigned int)&_etext & 0x0FFFFFFF; + } gp.textsize = gp.highpc - gp.lowpc; gp.hashfraction = HISTFRACTION; @@ -278,16 +313,17 @@ void __mcount(unsigned int frompc, unsigned int selfpc) return; } - /* Mask upper bits and convert to link-time addresses. - Link-time addresses = runtime addresses - gp.lowpc (since lowpc_link = 0) */ - frompc = (frompc & 0x0FFFFFFF) - gp.lowpc; - selfpc = (selfpc & 0x0FFFFFFF) - gp.lowpc; + /* Mask upper bits to normalize cached/uncached addresses, + then subtract relocation offset to get link-time addresses. */ + frompc = (frompc & 0x0FFFFFFF) - reloc_offset; + selfpc = (selfpc & 0x0FFFFFFF) - reloc_offset; - /* Check if within text section (using link-time range) */ - if (frompc <= gp.highpc_link) + /* Check if within text section (using link-time addresses) */ + if (frompc >= gp.lowpc_link && frompc < gp.highpc_link) { gp.pc = selfpc; - e = frompc / gp.hashfraction; + /* Arc index is based on offset within text section */ + e = (frompc - gp.lowpc_link) / gp.hashfraction; arc = gp.arcs + e; arc->frompc = frompc; arc->selfpc = selfpc; @@ -300,14 +336,15 @@ void __mcount(unsigned int frompc, unsigned int selfpc) __attribute__((__no_instrument_function__, __no_profile_instrument_function__)) static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common) { - unsigned int frompc = gp.pc; /* Already in link-time address */ + unsigned int pc = gp.pc; /* Already converted to link-time address by __mcount */ if (gp.state == GMON_PROF_ON) { - /* Check if within text section (using link-time range) */ - if (frompc <= gp.highpc_link) + /* Check if within text section (using link-time addresses) */ + if (pc >= gp.lowpc_link && pc < gp.highpc_link) { - int e = frompc / gp.hashfraction; + /* Sample index is based on offset within text section */ + int e = (pc - gp.lowpc_link) / gp.hashfraction; gp.samples[e]++; } }