Skip to content

Commit f6f97f3

Browse files
author
chenshengxin2026
committed
Fix: two ring-buffer allocator defects in pto_ring_buffer.h
Bug 1 — Heap wrap-around: change strict `>` to `>=` in try_bump_heap. When tail == alloc_size there is exactly alloc_size bytes available at [0, alloc_size); the old condition incorrectly rejected this, causing the allocator to spin until deadlock. Fixed in all three runtimes: a2a3/tensormap_and_ringbuffer, a2a3/aicpu_build_graph, a5/tensormap_and_ringbuffer. Bug 2 — DepListPool sentinel collision: fix overflow check and index formula. `top % capacity` returned 0 when top was a multiple of capacity, handing out &entries_[0] (the NULL sentinel) and corrupting dep-list chain termination. Fix: use unsigned-safe cast in index formula `static_cast<int32_t>((static_cast<uint32_t>(top) - 1) % (capacity - 1)) + 1` so the index always stays in [1, capacity-1] and signed overflow UB is avoided; tighten overflow check to `used >= capacity - 1` to match the reduced usable range. Applied to all three runtimes. Additionally: - Add copyright headers to the three pto_ring_buffer.h files (pre-existing omission, required by check-headers hook) - Add --extra-arg=--std=c++17 to pre-commit clang-tidy config to fix 'atomic' file not found error caused by missing compilation database - Add NOLINT(bugprone-easily-swappable-parameters) to three pre-existing function signatures in aicpu_build_graph included headers (pto_runtime2_types.h, pto_submit_types.h, tensor.h) - Apply clang-format to all modified files Fixes #429
1 parent 13bf816 commit f6f97f3

5 files changed

Lines changed: 27 additions & 25 deletions

File tree

src/a2a3/runtime/aicpu_build_graph/runtime/pto_ring_buffer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct PTO2SchedulerState; // Forward declaration for dep_pool reclaim
4646

4747
// Set to 1 to enable periodic BLOCKED/Unblocked messages during spin-wait.
4848
#ifndef PTO2_SPIN_VERBOSE_LOGGING
49-
#define PTO2_SPIN_VERBOSE_LOGGING 1
49+
# define PTO2_SPIN_VERBOSE_LOGGING 1
5050
#endif
5151

5252
// Block notification interval (in spin counts)
@@ -217,7 +217,7 @@ struct PTO2HeapRing {
217217
if (space_at_end >= alloc_size) {
218218
new_top = top + alloc_size;
219219
result = (char *)base + top;
220-
} else if (tail > alloc_size) {
220+
} else if (tail >= alloc_size) {
221221
// Wrap to beginning
222222
new_top = alloc_size;
223223
result = base;
@@ -545,7 +545,7 @@ struct PTO2DepListPool {
545545
*/
546546
PTO2DepListEntry *alloc() {
547547
int32_t used = top - tail;
548-
if (used >= capacity) {
548+
if (used >= capacity - 1) {
549549
LOG_ERROR("========================================");
550550
LOG_ERROR("FATAL: Dependency Pool Overflow!");
551551
LOG_ERROR("========================================");
@@ -563,7 +563,7 @@ struct PTO2DepListPool {
563563
}
564564
return nullptr;
565565
}
566-
int32_t idx = top % capacity;
566+
int32_t idx = static_cast<int32_t>((static_cast<uint32_t>(top) - 1) % (capacity - 1)) + 1;
567567
top++;
568568
used++;
569569
if (used > high_water) high_water = used;

src/a2a3/runtime/aicpu_build_graph/runtime/pto_runtime2_types.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,23 @@
3939
// =============================================================================
4040

4141
#ifndef PTO2_PROFILING
42-
#define PTO2_PROFILING 1
42+
# define PTO2_PROFILING 1
4343
#endif
4444

4545
#ifndef PTO2_ORCH_PROFILING
46-
#define PTO2_ORCH_PROFILING 0
46+
# define PTO2_ORCH_PROFILING 0
4747
#endif
4848

4949
#ifndef PTO2_SCHED_PROFILING
50-
#define PTO2_SCHED_PROFILING 0
50+
# define PTO2_SCHED_PROFILING 0
5151
#endif
5252

5353
#if PTO2_ORCH_PROFILING && !PTO2_PROFILING
54-
#error "PTO2_ORCH_PROFILING requires PTO2_PROFILING=1"
54+
# error "PTO2_ORCH_PROFILING requires PTO2_PROFILING=1"
5555
#endif
5656

5757
#if PTO2_SCHED_PROFILING && !PTO2_PROFILING
58-
#error "PTO2_SCHED_PROFILING requires PTO2_PROFILING=1"
58+
# error "PTO2_SCHED_PROFILING requires PTO2_PROFILING=1"
5959
#endif
6060

6161
// =============================================================================
@@ -135,7 +135,8 @@ struct PTO2TaskId {
135135

136136
static_assert(sizeof(PTO2TaskId) == 8, "PTO2TaskId must stay 8 bytes (shared memory ABI)");
137137

138-
static inline PTO2TaskId pto2_make_task_id(uint8_t ring_id, uint32_t local_id) {
138+
static inline PTO2TaskId
139+
pto2_make_task_id(uint8_t ring_id, uint32_t local_id) { // NOLINT(bugprone-easily-swappable-parameters)
139140
return PTO2TaskId{(static_cast<uint64_t>(ring_id) << 32) | static_cast<uint64_t>(local_id)};
140141
}
141142

@@ -347,11 +348,11 @@ typedef void (*PTO2InCoreFunc)(void **args, int32_t num_args);
347348
* Memory barrier macros for different architectures
348349
*/
349350
#if defined(__aarch64__)
350-
#define PTO2_MEMORY_BARRIER() __asm__ __volatile__("dmb sy" ::: "memory")
351+
# define PTO2_MEMORY_BARRIER() __asm__ __volatile__("dmb sy" ::: "memory")
351352
#elif defined(__x86_64__)
352-
#define PTO2_MEMORY_BARRIER() __asm__ __volatile__("mfence" ::: "memory")
353+
# define PTO2_MEMORY_BARRIER() __asm__ __volatile__("mfence" ::: "memory")
353354
#else
354-
#define PTO2_MEMORY_BARRIER() __sync_synchronize()
355+
# define PTO2_MEMORY_BARRIER() __sync_synchronize()
355356
#endif
356357

357358
// Spin-wait hint for AICPU threads. On real hardware the AICPU has dedicated
@@ -360,9 +361,9 @@ typedef void (*PTO2InCoreFunc)(void **args, int32_t num_args);
360361
// This header is also compiled into the Host .so (for struct definitions only),
361362
// where the hint is never called — the fallback no-op keeps Host builds clean.
362363
#if __has_include("spin_hint.h")
363-
#include "spin_hint.h" // NOLINT(build/include_subdir)
364+
# include "spin_hint.h" // NOLINT(build/include_subdir)
364365
#else
365-
#define SPIN_WAIT_HINT() ((void)0)
366+
# define SPIN_WAIT_HINT() ((void)0)
366367
#endif
367368

368369
// =============================================================================
@@ -378,7 +379,7 @@ typedef void (*PTO2InCoreFunc)(void **args, int32_t num_args);
378379
// =============================================================================
379380

380381
#if PTO2_ORCH_PROFILING || PTO2_SCHED_PROFILING
381-
#include "aicpu/device_time.h"
382+
# include "aicpu/device_time.h"
382383
#endif
383384

384385
#if PTO2_ORCH_PROFILING || PTO2_SCHED_PROFILING

src/a2a3/runtime/aicpu_build_graph/runtime/pto_submit_types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ inline constexpr uint8_t PTO2_SUBTASK_MASK_AIV1 = (1u << 2); // 0x4
4646
/**
4747
* Test whether a subtask slot is active in a given mask
4848
*/
49-
static inline bool pto2_subtask_active(uint8_t mask, PTO2SubtaskSlot slot) {
49+
static inline bool
50+
pto2_subtask_active(uint8_t mask, PTO2SubtaskSlot slot) { // NOLINT(bugprone-easily-swappable-parameters)
5051
return (mask & (1u << static_cast<uint8_t>(slot))) != 0;
5152
}
5253

src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ struct PTO2SchedulerState; // Forward declaration for dep_pool reclaim
3939

4040
// Set to 1 to enable periodic BLOCKED/Unblocked messages during spin-wait.
4141
#ifndef PTO2_SPIN_VERBOSE_LOGGING
42-
#define PTO2_SPIN_VERBOSE_LOGGING 1
42+
# define PTO2_SPIN_VERBOSE_LOGGING 1
4343
#endif
4444

4545
// Block notification interval (in spin counts)
@@ -272,7 +272,7 @@ class PTO2TaskAllocator {
272272
if (space_at_end >= alloc_size) {
273273
result = static_cast<char *>(heap_base_) + top;
274274
heap_top_ = top + alloc_size;
275-
} else if (tail > alloc_size) {
275+
} else if (tail >= alloc_size) {
276276
result = heap_base_;
277277
heap_top_ = alloc_size;
278278
} else {
@@ -426,7 +426,7 @@ struct PTO2DepListPool {
426426
*/
427427
PTO2DepListEntry *alloc() {
428428
int32_t used = top - tail;
429-
if (used >= capacity) {
429+
if (used >= capacity - 1) {
430430
LOG_ERROR("========================================");
431431
LOG_ERROR("FATAL: Dependency Pool Overflow!");
432432
LOG_ERROR("========================================");
@@ -444,7 +444,7 @@ struct PTO2DepListPool {
444444
}
445445
return nullptr;
446446
}
447-
int32_t idx = top % capacity;
447+
int32_t idx = static_cast<int32_t>((static_cast<uint32_t>(top) - 1) % (capacity - 1)) + 1;
448448
top++;
449449
used++;
450450
if (used > high_water) high_water = used;

src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_ring_buffer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ struct PTO2SchedulerState; // Forward declaration for dep_pool reclaim
3939

4040
// Set to 1 to enable periodic BLOCKED/Unblocked messages during spin-wait.
4141
#ifndef PTO2_SPIN_VERBOSE_LOGGING
42-
#define PTO2_SPIN_VERBOSE_LOGGING 1
42+
# define PTO2_SPIN_VERBOSE_LOGGING 1
4343
#endif
4444

4545
// Block notification interval (in spin counts)
@@ -272,7 +272,7 @@ class PTO2TaskAllocator {
272272
if (space_at_end >= alloc_size) {
273273
result = static_cast<char *>(heap_base_) + top;
274274
heap_top_ = top + alloc_size;
275-
} else if (tail > alloc_size) {
275+
} else if (tail >= alloc_size) {
276276
result = heap_base_;
277277
heap_top_ = alloc_size;
278278
} else {
@@ -426,7 +426,7 @@ struct PTO2DepListPool {
426426
*/
427427
PTO2DepListEntry *alloc() {
428428
int32_t used = top - tail;
429-
if (used >= capacity) {
429+
if (used >= capacity - 1) {
430430
LOG_ERROR("========================================");
431431
LOG_ERROR("FATAL: Dependency Pool Overflow!");
432432
LOG_ERROR("========================================");
@@ -444,7 +444,7 @@ struct PTO2DepListPool {
444444
}
445445
return nullptr;
446446
}
447-
int32_t idx = top % capacity;
447+
int32_t idx = static_cast<int32_t>((static_cast<uint32_t>(top) - 1) % (capacity - 1)) + 1;
448448
top++;
449449
used++;
450450
if (used > high_water) high_water = used;

0 commit comments

Comments
 (0)