diff --git a/engine/main.cpp b/engine/main.cpp index 2846c78..374e77d 100644 --- a/engine/main.cpp +++ b/engine/main.cpp @@ -310,7 +310,7 @@ int main(int argc, char *argv[]) { pool.clear_search_vars(); pool.search(pos, rp, 1e9, 12, 1e18, 0); pool.wait_finished(); - tot_nodes += nodes[0]; + tot_nodes += nodes[0].get(); } uint64_t end = clock(); std::cout << tot_nodes << " nodes " << int(tot_nodes / ((double)(end - start) / CLOCKS_PER_SEC)) << " nps" << std::endl; diff --git a/engine/search.cpp b/engine/search.cpp index 6fd7899..85a9683 100644 --- a/engine/search.cpp +++ b/engine/search.cpp @@ -15,7 +15,7 @@ std::stringstream last_line; uint16_t num_threads = 1; std::atomic nodecnt[64][64] = {{}}; -alignas(64) std::atomic nodes[MAX_THREADS] = {}; +NodeCounter nodes[MAX_THREADS]; std::atomic tbhits = 0; uint64_t perft(Position &pos, int depth) { @@ -192,14 +192,14 @@ bool is_valid_score(Value score) { * - Late move reduction (instead of reducing depth, we reduce the search window) (not a known technique, maybe worth trying?) */ Value quiesce(Position &pos, ThreadInfo &ti, Value alpha, Value beta, int side, int ply, bool pv=false) { - nodes[ti.id].fetch_add(1, std::memory_order_relaxed); + nodes[ti.id]++; if (pv) ti.pvlen[ply] = 0; if (stop_search) return 0; if (ti.is_main) { - auto cur_nodes = nodes[ti.id].load(std::memory_order_relaxed); + auto cur_nodes = nodes[ti.id].get(); if (!(cur_nodes & 4095)) { // The time check is relatively expensive and thus only performed every 4096 nodes auto time = std::chrono::duration_cast(std::chrono::steady_clock::now() - start).count(); @@ -370,12 +370,12 @@ Value negamax(Position &pos, ThreadInfo &ti, int depth, Value alpha = -VALUE_INF ti.seldepth = std::max(ti.seldepth, ply); } - nodes[ti.id].fetch_add(1, std::memory_order_relaxed); + nodes[ti.id]++; if (stop_search) return 0; if (ti.is_main) { - auto cur_nodes = nodes[ti.id].load(std::memory_order_relaxed); + auto cur_nodes = nodes[ti.id].get(); if (!(cur_nodes & 4095)) { // The time check is relatively expensive and thus only performed every 4096 nodes auto time = std::chrono::duration_cast(std::chrono::steady_clock::now() - start).count(); @@ -657,7 +657,7 @@ Value negamax(Position &pos, ThreadInfo &ti, int depth, Value alpha = -VALUE_INF int i = 0; uint64_t prev_nodes = 0; - if (root) prev_nodes = nodes[ti.id].load(std::memory_order_relaxed); + if (root) prev_nodes = nodes[ti.id].get(); ti.line[ply+1].cutoffcnt = 0; @@ -881,7 +881,7 @@ Value negamax(Position &pos, ThreadInfo &ti, int depth, Value alpha = -VALUE_INF ti.line[ply].corr_hist = nullptr; if (root) { - auto cur_nodes = nodes[ti.id].load(std::memory_order_relaxed); + auto cur_nodes = nodes[ti.id].get(); nodecnt[move.src()][move.dst()].fetch_add(cur_nodes - prev_nodes, std::memory_order_relaxed); prev_nodes = cur_nodes; } @@ -1048,7 +1048,7 @@ void iterativedeepening(Position &pos, ThreadInfo &ti, int depth) { uint64_t bm_nodes = nodecnt[best_move.src()][best_move.dst()]; uint64_t tot_nodes = 0; for (int t = 0; t < num_threads; t++) { - tot_nodes += nodes[t].load(std::memory_order_relaxed); // ig this is dangerous but whatever + tot_nodes += nodes[t].get(); } // UCI output from main thread only diff --git a/engine/search.hpp b/engine/search.hpp index 6afc5d6..e1b6010 100644 --- a/engine/search.hpp +++ b/engine/search.hpp @@ -50,7 +50,23 @@ extern bool show_wdl; extern bool do_softnodes; extern bool do_datagen; -extern std::atomic nodes[MAX_THREADS]; +struct alignas(64) NodeCounter { + std::atomic val = 0; + + void operator++(int) { + val.fetch_add(1, std::memory_order_relaxed); + } + + void operator=(uint64_t new_val) { + val.store(new_val, std::memory_order_relaxed); + } + + uint64_t get() const { + return val.load(std::memory_order_relaxed); + } +}; + +extern NodeCounter nodes[MAX_THREADS]; struct ThreadInfo { Position pos;