@@ -129,15 +129,18 @@ inline Link CLASS::top(const Link& index) const NOEXCEPT
129129 if (is_null (raw))
130130 return {};
131131
132- // //if constexpr (Align)
133- // //{
134- // // // Reads full padded word.
135- // // // xcode clang++16 does not support C++20 std::atomic_ref.
136- // // ////const std::atomic_ref<integer> head(unsafe_byte_cast<integer>(raw));
137- // // const auto& head = *pointer_cast<std::atomic<integer>>(raw);
138- // // return head.load(std::memory_order_acquire);
139- // //}
140- // //else
132+ if constexpr (Align)
133+ {
134+ // Reads full padded word.
135+ // xcode clang++16 does not support C++20 std::atomic_ref.
136+ // //const std::atomic_ref<integer> head(unsafe_byte_cast<integer>(raw));
137+ const auto & head = *pointer_cast<std::atomic<integer>>(raw);
138+
139+ // Acquire is necessary to synchronize with push release.
140+ // Relaxed would miss next updates, so acquire is optimal.
141+ return head.load (std::memory_order_acquire);
142+ }
143+ else
141144 {
142145 const auto & head = to_array<size_>(raw);
143146 mutex_.lock_shared ();
@@ -163,15 +166,27 @@ inline bool CLASS::push(const Link& current, bytes& next,
163166 if (is_null (raw))
164167 return false ;
165168
166- // //if constexpr (Align)
167- // //{
168- // // // Writes full padded word (0x00 fill).
169- // // // xcode clang++16 does not support C++20 std::atomic_ref.
170- // // ////const std::atomic_ref<integer> head(unsafe_byte_cast<integer>(raw));
171- // // auto& head = *pointer_cast<std::atomic<integer>>(raw);
172- // // next = Link(head.exchange(current, std::memory_order_acq_rel));
173- // //}
174- // //else
169+ if constexpr (Align)
170+ {
171+ // Writes full padded word (0x00 fill).
172+ // xcode clang++16 does not support C++20 std::atomic_ref.
173+ // //const std::atomic_ref<integer> head(unsafe_byte_cast<integer>(raw));
174+ auto & head = *pointer_cast<std::atomic<integer>>(raw);
175+
176+ integer top = head.load (std::memory_order_acquire);
177+ do
178+ {
179+ // Compiler could order this after head.store, which would expose key
180+ // to search before next entry is linked. Thread fence imposes order.
181+ // A release fence ensures that all prior writes (like next) are
182+ // completed before any subsequent atomic store.
183+ next = Link{ top };
184+ std::atomic_thread_fence (std::memory_order_release);
185+ }
186+ while (!head.compare_exchange_weak (top, current,
187+ std::memory_order_release, std::memory_order_acquire));
188+ }
189+ else
175190 {
176191 auto & head = to_array<size_>(raw);
177192 mutex_.lock ();
0 commit comments