Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 59 additions & 22 deletions src/prof/prof.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ static int initialized = 0;
extern int _ftext;
extern int _etext;

/// _start is the entry point defined in both crt0.c and crt0_prx.c
extern int _start;

/// module_start is only defined in PRX startup code (crt0_prx.c) as an alias for _start
/// Using weak reference allows us to detect PRX vs PBP at runtime
/// We also verify module_start == _start to handle the case where a PBP defines its own module_start
extern int module_start __attribute__((weak));

/// relocation offset: runtime_address - link_address (for PRX)
static unsigned int reloc_offset;

/* forward declarations */
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup(void);
Expand All @@ -104,20 +115,44 @@ static void initialize()
memset(&gp, '\0', sizeof(gp));
gp.state = GMON_PROF_ON;

/* For PRX modules, the code is relocated at load time.
&_ftext and &_etext give us runtime (relocated) addresses.
Link-time addresses start at 0 for PSP executables.

Runtime: &_ftext = relocated_base, &_etext = relocated_base + text_size
Link-time: _ftext = 0, _etext = text_size

So: lowpc_link = 0
highpc_link = &_etext - &_ftext (which equals text_size)
/* Detect PRX vs PBP using the module_start symbol.
In crt0_prx.c, module_start is defined as an alias for _start,
so they share the same address. We check both conditions:
1. module_start exists (not NULL)
2. module_start and _start are at the same address
This handles edge cases where a PBP defines its own module_start function.

For PRX (relocatable modules):
- Linked at address 0 (or small offset like 0x20 for C++)
- Relocated at load time to a PAGE-ALIGNED address (e.g., 0x08804000)
- reloc_offset = page-aligned load base
- link_addr = runtime_addr - reloc_offset

For PBP (standard executables):
- Linked at address 0x08804000 + offset (typically _ftext = 0x0880403c)
- Loaded at same address (no relocation)
- reloc_offset = 0
- link_addr = runtime_addr
*/
gp.lowpc = (unsigned int)&_ftext;
gp.highpc = (unsigned int)&_etext;
gp.lowpc_link = 0;
gp.highpc_link = (unsigned int)&_etext - (unsigned int)&_ftext;

unsigned int runtime_base = (unsigned int)&_ftext & 0x0FFFFFFF;

if (&module_start != NULL && (void *)&module_start == (void *)&_start) {
/* PRX: code was relocated from near-zero to runtime address.
The load base is page-aligned, so we can compute it by masking
off the page offset. The link-time _ftext is the page offset. */
unsigned int page_offset = runtime_base & 0xFFF;
reloc_offset = runtime_base - page_offset; /* Page-aligned load base */
gp.lowpc_link = page_offset; /* Link-time _ftext */
gp.highpc_link = (gp.highpc - gp.lowpc) + page_offset;
} else {
/* PBP: no relocation, addresses match ELF */
reloc_offset = 0;
gp.lowpc_link = runtime_base;
gp.highpc_link = (unsigned int)&_etext & 0x0FFFFFFF;
}
gp.textsize = gp.highpc - gp.lowpc;
gp.hashfraction = HISTFRACTION;

Expand Down Expand Up @@ -278,16 +313,17 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
return;
}

/* Mask upper bits and convert to link-time addresses.
Link-time addresses = runtime addresses - gp.lowpc (since lowpc_link = 0) */
frompc = (frompc & 0x0FFFFFFF) - gp.lowpc;
selfpc = (selfpc & 0x0FFFFFFF) - gp.lowpc;
/* Mask upper bits to normalize cached/uncached addresses,
then subtract relocation offset to get link-time addresses. */
frompc = (frompc & 0x0FFFFFFF) - reloc_offset;
selfpc = (selfpc & 0x0FFFFFFF) - reloc_offset;

/* Check if within text section (using link-time range) */
if (frompc <= gp.highpc_link)
/* Check if within text section (using link-time addresses) */
if (frompc >= gp.lowpc_link && frompc < gp.highpc_link)
{
gp.pc = selfpc;
e = frompc / gp.hashfraction;
/* Arc index is based on offset within text section */
e = (frompc - gp.lowpc_link) / gp.hashfraction;
arc = gp.arcs + e;
arc->frompc = frompc;
arc->selfpc = selfpc;
Expand All @@ -300,14 +336,15 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
{
unsigned int frompc = gp.pc; /* Already in link-time address */
unsigned int pc = gp.pc; /* Already converted to link-time address by __mcount */

if (gp.state == GMON_PROF_ON)
{
/* Check if within text section (using link-time range) */
if (frompc <= gp.highpc_link)
/* Check if within text section (using link-time addresses) */
if (pc >= gp.lowpc_link && pc < gp.highpc_link)
{
int e = frompc / gp.hashfraction;
/* Sample index is based on offset within text section */
int e = (pc - gp.lowpc_link) / gp.hashfraction;
gp.samples[e]++;
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/samples/gprof/basic/Makefile.sample
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
TARGET = gprofbasic
OBJS = main.o

INCDIR =
INCDIR =
CFLAGS = -O2 -Wall -pg -g
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)

LIBDIR =
LDFLAGS = -pg -g

BUILD_PRX = 1
PSP_FW_VERSION = 600

EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = GProf Basic Example

Expand Down
20 changes: 20 additions & 0 deletions src/samples/gprof/basic_cpp/Makefile.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
TARGET = gprofbasic_cpp
OBJS = main.o

INCDIR =
CFLAGS = -O2 -Wall -pg -g
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)

LIBDIR =
LDFLAGS = -pg -g
LIBS = -lstdc++

BUILD_PRX = 1
PSP_FW_VERSION = 600

EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = GProf Basic C++ Example

PSPSDK=$(shell psp-config --pspsdk-path)
include $(PSPSDK)/lib/build.mak
72 changes: 72 additions & 0 deletions src/samples/gprof/basic_cpp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
Sample program to show how to use the `gprof` feature with C++.

The requirements are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things work out of the box. For C++ programs, you also need to link with `-lstdc++`.

This example demonstrates:
- Object-oriented programming with a `PrimeCalculator` class
- C++ standard library features (`std::vector`, `std::cout`)
- Profiling C++ code with gprof

Firstly execute your program, then once program ends it will automatically generate a `gmon.out` file at CWD level.

In order to inspect the content of the generated file you need to use the `psp-gprof` binary.

For instance, following the next syntax:
```
psp-gprof -b {binary.elf} gmon.out
```

like:
```
psp-gprof -b gprofbasic_cpp.elf gmon.out
```


It will show something like:
```
Flat profile:

Each sample counts as 0.001 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
95.98 0.17 0.17 104728 0.00 0.00 PrimeCalculator::isPrime(int) const
4.02 0.17 0.01 1 7.00 7.00 dummyFunction()
0.00 0.17 0.00 1 0.00 174.00 main
0.00 0.17 0.00 1 0.00 167.00 PrimeCalculator::sumOfSquareRoots(int) const


Call graph


granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds

index % time self children called name
0.00 0.17 1/1 _main [2]
[1] 100.0 0.00 0.17 1 main [1]
0.00 0.17 1/1 PrimeCalculator::sumOfSquareRoots(int) const [4]
0.01 0.00 1/1 dummyFunction() [5]
-----------------------------------------------
<spontaneous>
[2] 100.0 0.00 0.17 _main [2]
0.00 0.17 1/1 main [1]
-----------------------------------------------
0.17 0.00 104728/104728 PrimeCalculator::sumOfSquareRoots(int) const [4]
[3] 96.0 0.17 0.00 104728 PrimeCalculator::isPrime(int) const [3]
-----------------------------------------------
0.00 0.17 1/1 main [1]
[4] 96.0 0.00 0.17 1 PrimeCalculator::sumOfSquareRoots(int) const [4]
0.17 0.00 104728/104728 PrimeCalculator::isPrime(int) const [3]
-----------------------------------------------
0.01 0.00 1/1 main [1]
[5] 4.0 0.01 0.00 1 dummyFunction() [5]
-----------------------------------------------


Index by function name

[5] dummyFunction() [1] main
[3] PrimeCalculator::isPrime(int) const
[4] PrimeCalculator::sumOfSquareRoots(int) const
```

Cheers
107 changes: 107 additions & 0 deletions src/samples/gprof/basic_cpp/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* main.cpp - A basic C++ example for checking the GProf profiler.
*
* Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com
*
*/
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <iostream>
#include <vector>

#include <pspmoduleinfo.h>
#include <pspthreadman.h>

PSP_MODULE_INFO("GProf Basic C++ Example", 0, 1, 1);
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU);

// Class to handle prime number operations
class PrimeCalculator
{
public:
// Method to check if a number is prime
bool isPrime(int num) const
{
if (num <= 1)
return false;
if (num <= 3)
return true;
if (num % 2 == 0 || num % 3 == 0)
return false;
for (int i = 5; i * i <= num; i += 6)
{
if (num % i == 0 || num % (i + 2) == 0)
return false;
}
return true;
}

// Method to compute the sum of square roots of the first N prime numbers
double sumOfSquareRoots(int N) const
{
int count = 0;
int num = 2;
double sum = 0.0;

while (count < N)
{
if (isPrime(num))
{
sum += std::sqrt(num);
count++;
}
num++;
}
return sum;
}

// Method to get the first N prime numbers
std::vector<int> getFirstNPrimes(int N) const
{
std::vector<int> primes;
int num = 2;

while (primes.size() < static_cast<size_t>(N))
{
if (isPrime(num))
{
primes.push_back(num);
}
num++;
}
return primes;
}
};

void dummyFunction()
{
for (int i = 0; i < 10000; i++)
{
std::cout << ".";
}
std::cout << std::endl;
}

int main(int argc, char *argv[])
{
std::cout << "Hello from C++!" << std::endl;

dummyFunction();

PrimeCalculator calculator;
int N = 10000; // Large number of primes to compute

double sum = calculator.sumOfSquareRoots(N);
std::cout << "Sum of square roots of the first " << N
<< " prime numbers is " << sum << std::endl;

std::cout << "Goodbye from C++!" << std::endl;

return 0;
}