Skip to content

Commit 324c580

Browse files
committed
Fix gprof profiling to work with both PBP and PRX executables
The profiling library was storing addresses that didn't match the ELF symbol table, causing psp-gprof to produce empty output. Changes: - Store ELF-compatible addresses in gmon.out instead of offset-based - Add PRX vs PBP detection using page alignment heuristic: - PRX: loaded at page-aligned addresses, linked at 0 - PBP: loaded at specific link address (not page-aligned) - Calculate relocation offset for PRX to convert runtime addresses back to link-time addresses that match ELF symbols - Update __mcount() and timer_handler() to use link-time addresses The fix ensures psp-gprof can correctly match profiling data with ELF symbols for both standard executables (PBP) and relocatable modules (PRX). tmp
1 parent c0622af commit 324c580

1 file changed

Lines changed: 59 additions & 22 deletions

File tree

src/prof/prof.c

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ static int initialized = 0;
8181
extern int _ftext;
8282
extern int _etext;
8383

84+
/// _start is the entry point defined in both crt0.c and crt0_prx.c
85+
extern int _start;
86+
87+
/// module_start is only defined in PRX startup code (crt0_prx.c) as an alias for _start
88+
/// Using weak reference allows us to detect PRX vs PBP at runtime
89+
/// We also verify module_start == _start to handle the case where a PBP defines its own module_start
90+
extern int module_start __attribute__((weak));
91+
92+
/// relocation offset: runtime_address - link_address (for PRX)
93+
static unsigned int reloc_offset;
94+
8495
/* forward declarations */
8596
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
8697
void __gprof_cleanup(void);
@@ -104,20 +115,44 @@ static void initialize()
104115
memset(&gp, '\0', sizeof(gp));
105116
gp.state = GMON_PROF_ON;
106117

107-
/* For PRX modules, the code is relocated at load time.
108-
&_ftext and &_etext give us runtime (relocated) addresses.
109-
Link-time addresses start at 0 for PSP executables.
110-
111-
Runtime: &_ftext = relocated_base, &_etext = relocated_base + text_size
112-
Link-time: _ftext = 0, _etext = text_size
113-
114-
So: lowpc_link = 0
115-
highpc_link = &_etext - &_ftext (which equals text_size)
118+
/* Detect PRX vs PBP using the module_start symbol.
119+
In crt0_prx.c, module_start is defined as an alias for _start,
120+
so they share the same address. We check both conditions:
121+
1. module_start exists (not NULL)
122+
2. module_start and _start are at the same address
123+
This handles edge cases where a PBP defines its own module_start function.
124+
125+
For PRX (relocatable modules):
126+
- Linked at address 0 (or small offset like 0x20 for C++)
127+
- Relocated at load time to a PAGE-ALIGNED address (e.g., 0x08804000)
128+
- reloc_offset = page-aligned load base
129+
- link_addr = runtime_addr - reloc_offset
130+
131+
For PBP (standard executables):
132+
- Linked at address 0x08804000 + offset (typically _ftext = 0x0880403c)
133+
- Loaded at same address (no relocation)
134+
- reloc_offset = 0
135+
- link_addr = runtime_addr
116136
*/
117137
gp.lowpc = (unsigned int)&_ftext;
118138
gp.highpc = (unsigned int)&_etext;
119-
gp.lowpc_link = 0;
120-
gp.highpc_link = (unsigned int)&_etext - (unsigned int)&_ftext;
139+
140+
unsigned int runtime_base = (unsigned int)&_ftext & 0x0FFFFFFF;
141+
142+
if (&module_start != NULL && (void *)&module_start == (void *)&_start) {
143+
/* PRX: code was relocated from near-zero to runtime address.
144+
The load base is page-aligned, so we can compute it by masking
145+
off the page offset. The link-time _ftext is the page offset. */
146+
unsigned int page_offset = runtime_base & 0xFFF;
147+
reloc_offset = runtime_base - page_offset; /* Page-aligned load base */
148+
gp.lowpc_link = page_offset; /* Link-time _ftext */
149+
gp.highpc_link = (gp.highpc - gp.lowpc) + page_offset;
150+
} else {
151+
/* PBP: no relocation, addresses match ELF */
152+
reloc_offset = 0;
153+
gp.lowpc_link = runtime_base;
154+
gp.highpc_link = (unsigned int)&_etext & 0x0FFFFFFF;
155+
}
121156
gp.textsize = gp.highpc - gp.lowpc;
122157
gp.hashfraction = HISTFRACTION;
123158

@@ -278,16 +313,17 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
278313
return;
279314
}
280315

281-
/* Mask upper bits and convert to link-time addresses.
282-
Link-time addresses = runtime addresses - gp.lowpc (since lowpc_link = 0) */
283-
frompc = (frompc & 0x0FFFFFFF) - gp.lowpc;
284-
selfpc = (selfpc & 0x0FFFFFFF) - gp.lowpc;
316+
/* Mask upper bits to normalize cached/uncached addresses,
317+
then subtract relocation offset to get link-time addresses. */
318+
frompc = (frompc & 0x0FFFFFFF) - reloc_offset;
319+
selfpc = (selfpc & 0x0FFFFFFF) - reloc_offset;
285320

286-
/* Check if within text section (using link-time range) */
287-
if (frompc <= gp.highpc_link)
321+
/* Check if within text section (using link-time addresses) */
322+
if (frompc >= gp.lowpc_link && frompc < gp.highpc_link)
288323
{
289324
gp.pc = selfpc;
290-
e = frompc / gp.hashfraction;
325+
/* Arc index is based on offset within text section */
326+
e = (frompc - gp.lowpc_link) / gp.hashfraction;
291327
arc = gp.arcs + e;
292328
arc->frompc = frompc;
293329
arc->selfpc = selfpc;
@@ -300,14 +336,15 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
300336
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
301337
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
302338
{
303-
unsigned int frompc = gp.pc; /* Already in link-time address */
339+
unsigned int pc = gp.pc; /* Already converted to link-time address by __mcount */
304340

305341
if (gp.state == GMON_PROF_ON)
306342
{
307-
/* Check if within text section (using link-time range) */
308-
if (frompc <= gp.highpc_link)
343+
/* Check if within text section (using link-time addresses) */
344+
if (pc >= gp.lowpc_link && pc < gp.highpc_link)
309345
{
310-
int e = frompc / gp.hashfraction;
346+
/* Sample index is based on offset within text section */
347+
int e = (pc - gp.lowpc_link) / gp.hashfraction;
311348
gp.samples[e]++;
312349
}
313350
}

0 commit comments

Comments
 (0)