-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy pathmemory_barrier.h
More file actions
92 lines (83 loc) · 3.75 KB
/
memory_barrier.h
File metadata and controls
92 lines (83 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/
/**
* @file memory_barrier.h
* @brief Memory barrier definitions for shared memory synchronization
*
* This header provides platform-specific memory barrier macros for
* synchronizing shared memory accesses between Host, AICPU, and AICore.
*
* Memory barriers ensure that:
* - Read barriers (rmb): All reads before the barrier complete before any reads after
* - Write barriers (wmb): All writes before the barrier complete before any writes after
*
* These are critical for correct operation of lock-free data structures
* and shared memory protocols across different processing units.
*/
#ifndef SRC_A5_PLATFORM_INCLUDE_COMMON_MEMORY_BARRIER_H_
#define SRC_A5_PLATFORM_INCLUDE_COMMON_MEMORY_BARRIER_H_
// =============================================================================
// Memory Barrier Macros
// =============================================================================
#ifdef __aarch64__
/**
* Read memory barrier (ARM64)
* Ensures all loads before this point complete before any loads after.
*/
#define rmb() __asm__ __volatile__("dsb ld" ::: "memory")
/**
* Write memory barrier (ARM64)
* Ensures all stores before this point complete before any stores after.
*/
#define wmb() __asm__ __volatile__("dsb st" ::: "memory")
/**
* Store-store barrier (ARM64, inner shareable domain)
* Ensures all stores before this barrier are globally visible before any
* stores after.
*/
#define OUT_OF_ORDER_STORE_BARRIER() __asm__ __volatile__("dmb ishst" ::: "memory")
#else
/**
* Compiler barrier (fallback for non-ARM64 platforms)
* Prevents compiler reordering but does not emit hardware barriers.
*/
#define rmb() __asm__ __volatile__("" ::: "memory")
#define wmb() __asm__ __volatile__("" ::: "memory")
#define OUT_OF_ORDER_STORE_BARRIER() __asm__ __volatile__("" ::: "memory")
#endif
// =============================================================================
// Polling Acquire Barrier
// =============================================================================
/**
* Polling acquire barrier
*
* Use after poll_reg() detects the awaited condition (e.g., task completion),
* before accessing Normal memory whose correctness depends on the polled value.
*
* ARM64: dmb ish (data memory barrier, inner shareable, full)
* Ensures the Device-memory register read is ordered before all subsequent
* Normal-memory loads and stores in the completion path.
* Chosen over dmb ishld (load-only) for safety margin: negligible cost
* (executed once per completion, not per poll iteration) and protects
* against future stores that may be added to the completion path.
*
* x86_64: compiler barrier only (TSO provides implicit acquire on all loads)
*
* Other: full barrier fallback (__sync_synchronize)
*/
#if defined(__aarch64__)
#define poll_acquire_barrier() __asm__ __volatile__("dmb ish" ::: "memory")
#elif defined(__x86_64__)
#define poll_acquire_barrier() __asm__ __volatile__("" ::: "memory")
#else
#define poll_acquire_barrier() __sync_synchronize()
#endif
#endif // SRC_A5_PLATFORM_INCLUDE_COMMON_MEMORY_BARRIER_H_