-
Notifications
You must be signed in to change notification settings - Fork 32
[Algs] add buf to checksum utility (for debug) #1773
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5d07c43
de797a6
b161128
58b4695
ff5a902
e0cd91d
6bc2845
ee3632a
f815d5f
cdca66a
d811e5f
775d26a
3944ef1
1d53302
9011b2a
9dc904d
bd98637
9534a22
dcd76b0
2ebb27a
8f7087b
36b2516
5e2d837
8d69537
0f0d14c
c4217cd
e698051
9079339
05bf4f7
b6228ea
e0a117d
6ec6ec7
2b28ecd
2ab4df4
1bed2de
35941e4
5687d48
06eee3e
98b1e99
d2ddf42
9774603
f376a07
91848f5
afe16b0
5ce8eae
2634fc2
098ef5e
40347bb
bc83f81
e78096e
e8977bf
38b4785
90486c3
46632aa
7f0079e
46ed9b7
ece0151
0ba522b
f1b4ecb
28a757e
cbd3e14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| // -------------------------------------------------------// | ||
| // | ||
| // SHAMROCK code for hydrodynamics | ||
| // Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me> | ||
| // SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1 | ||
| // Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information | ||
| // | ||
| // -------------------------------------------------------// | ||
|
|
||
| #pragma once | ||
|
|
||
| /** | ||
| * @file buf_checksum.hpp | ||
| * @author Timothée David--Cléris (tim.shamrock@proton.me) | ||
| * @brief | ||
| * | ||
| */ | ||
|
|
||
| #include "shambase/checksum.hpp" | ||
| #include "shamalgs/primitives/flatten.hpp" | ||
| #include "shambackends/DeviceBuffer.hpp" | ||
|
|
||
| namespace shamalgs { | ||
|
|
||
| template<class T> | ||
| inline u64 buf_checksum(const sham::DeviceBuffer<T> &buf) { | ||
| auto flattened_buf = primitives::flatten_buffer(buf); | ||
|
|
||
| using Tscal = typename shambase::VectorProperties<T>::component_type; | ||
| std::vector<Tscal> data = flattened_buf.copy_to_stdvec(); | ||
| return shambase::fnv1a_hash( | ||
| reinterpret_cast<const char *>(data.data()), data.size() * sizeof(Tscal)); | ||
| } | ||
|
|
||
| } // namespace shamalgs |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,13 +18,21 @@ | |
| #include "shambase/aliases_int.hpp" | ||
| #include "shambase/assert.hpp" | ||
| #include "shambase/memory.hpp" | ||
| #include "shambase/string.hpp" | ||
| #include "shamalgs/buf_checksum.hpp" | ||
| #include "shambackends/DeviceBuffer.hpp" | ||
| #include "shamcmdopt/env.hpp" | ||
| #include "shamcomm/worldInfo.hpp" | ||
| #include "shammath/sphkernels.hpp" | ||
| #include "shammodels/sph/modules/NeighbourCache.hpp" | ||
| #include "shamsys/legacy/log.hpp" | ||
| #include "shamtree/TreeTraversal.hpp" | ||
| #include "shamtree/kernels/geometry_utils.hpp" | ||
| #include "shamunits/Constants.hpp" | ||
| #include <fmt/base.h> | ||
| #include <fstream> | ||
|
|
||
| std::string checksum_prefix = shambase::get_check_ref(shamcmdopt::getenv_str("CHECKSUM_PREFIX")); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initializing a global variable using |
||
|
|
||
| template<class Tvec, class Tmorton, template<class> class SPHKernel> | ||
| void shammodels::sph::modules::NeighbourCache<Tvec, Tmorton, SPHKernel>::start_neighbors_cache() { | ||
|
|
@@ -262,18 +270,101 @@ void shammodels::sph::modules::NeighbourCache<Tvec, Tmorton, SPHKernel>:: | |
|
|
||
| Tscal h_tolerance = solver_config.htol_up_coarse_cycle; | ||
|
|
||
| NamedStackEntry stack_loc1ddddd{"wait queue"}; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The stack entry names like References
|
||
|
|
||
| shamsys::instance::get_compute_queue().wait_and_throw(); | ||
|
|
||
| NamedStackEntry stack_loc1{"init cache"}; | ||
|
|
||
| // start by counting number of leaf neighbours | ||
|
|
||
| sham::DeviceBuffer<u32> neigh_count_leaf( | ||
| leaf_cnt, shamsys::instance::get_compute_scheduler_ptr()); | ||
|
|
||
| shamsys::instance::get_compute_queue().wait_and_throw(); | ||
|
|
||
| shamlog_debug_sycl_ln("Cache", "generate cache for Nleaf=", leaf_cnt); | ||
|
|
||
| std::string checksum_file_path | ||
| = checksum_prefix + "/" + fmt::format("patch_{}_debug.txt", patch_id); | ||
|
|
||
| { | ||
| std::ofstream patch_file(checksum_file_path, std::ios::app); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| patch_file << fmt::format( | ||
| "patch {} buf_xyz hash={}\n", patch_id, shamalgs::buf_checksum(buf_xyz)); | ||
| patch_file << fmt::format( | ||
| "patch {} buf_hpart hash={}\n", patch_id, shamalgs::buf_checksum(buf_hpart)); | ||
| patch_file << fmt::format( | ||
| "patch {} tree_field_rint hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tree_field_rint)); | ||
| // patch_file << fmt::format( | ||
| // "patch {} neigh_count_leaf hash={}\n", | ||
| // patch_id, | ||
| // shamalgs::buf_checksum(neigh_count_leaf)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.aabb_min hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.aabb_min)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.aabb_max hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.aabb_max)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.tree_traverser.buf_lchild_id hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.tree_traverser.buf_lchild_id)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.tree_traverser.buf_rchild_id hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.tree_traverser.buf_rchild_id)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.tree_traverser.buf_lchild_flag hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.tree_traverser.buf_lchild_flag)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.tree_traverser.buf_rchild_flag hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(leaf_it.tree_traverser.buf_rchild_flag)); | ||
| patch_file << fmt::format( | ||
| "patch {} leaf_it.tree_traverser.offset_leaf = {}\n", | ||
| patch_id, | ||
| leaf_it.tree_traverser.offset_leaf); | ||
|
|
||
| // other tree fields | ||
| auto &tmp1 = tree.structure.buf_endrange; | ||
| auto &tmp2 = tree.reduced_morton_set.buf_reduc_index_map; | ||
| auto &tmp3 = tree.reduced_morton_set.reduced_morton_codes; | ||
| auto &tmp4 = tree.reduced_morton_set.morton_codes_set.sorted_morton_codes; | ||
| auto &tmp5 = tree.reduced_morton_set.morton_codes_set.map_morton_id_to_obj_id; | ||
| patch_file << fmt::format( | ||
| "patch {} tree.structure.buf_endrange hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tmp1)); | ||
| patch_file << fmt::format( | ||
| "patch {} tree.reduced_morton_set.buf_reduc_index_map hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tmp2)); | ||
| patch_file << fmt::format( | ||
| "patch {} tree.reduced_morton_set.reduced_morton_codes hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tmp3)); | ||
| patch_file << fmt::format( | ||
| "patch {} tree.reduced_morton_set.morton_codes_set.sorted_morton_codes hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tmp4)); | ||
| patch_file << fmt::format( | ||
| "patch {} tree.reduced_morton_set.morton_codes_set.map_morton_id_to_obj_id " | ||
| "hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(tmp5)); | ||
| } | ||
|
|
||
| // replay the kernel like a madman | ||
| for (u32 i = 0; i < 1000; i++) { | ||
|
|
||
| if (shamcomm::world_rank() == 0 && i % 100 == 0) { | ||
| logger::raw_ln(shambase::format("replay the kernel {}/1000", i)); | ||
| } | ||
|
|
||
| sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue(); | ||
| sham::EventList depends_list; | ||
|
|
||
|
|
@@ -324,6 +415,22 @@ void shammodels::sph::modules::NeighbourCache<Tvec, Tmorton, SPHKernel>:: | |
| tree_field_rint.complete_event_state(e); | ||
| neigh_count_leaf.complete_event_state(e); | ||
| leaf_it.complete_event_state(e); | ||
|
|
||
| NamedStackEntry stack_loc1ccccc{"wait queue"}; | ||
|
|
||
| shamsys::instance::get_compute_queue().wait_and_throw(); | ||
| } | ||
|
|
||
| NamedStackEntry stack_loc1ccccc{"wait queue"}; | ||
|
|
||
| shamsys::instance::get_compute_queue().wait_and_throw(); | ||
|
|
||
| { | ||
| std::ofstream patch_file(checksum_file_path, std::ios::app); | ||
| patch_file << fmt::format( | ||
| "patch {} neigh_count_leaf hash={}\n", | ||
| patch_id, | ||
| shamalgs::buf_checksum(neigh_count_leaf)); | ||
| } | ||
|
|
||
| //{ | ||
|
|
@@ -342,9 +449,15 @@ void shammodels::sph::modules::NeighbourCache<Tvec, Tmorton, SPHKernel>:: | |
| // } | ||
| //} | ||
|
|
||
| NamedStackEntry stack_loc1bbbb{"prepare cache"}; | ||
|
|
||
| tree::ObjectCache pleaf_cache | ||
| = tree::prepare_object_cache(std::move(neigh_count_leaf), leaf_cnt); | ||
|
|
||
| NamedStackEntry stack_loc1aaaa{"wait queue"}; | ||
|
|
||
| shamsys::instance::get_compute_queue().wait_and_throw(); | ||
|
|
||
| // fill ids of leaf neighbours | ||
|
|
||
| NamedStackEntry stack_loc2{"fill cache"}; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -145,14 +145,17 @@ struct TestExclScanUSM { | |
| void check() { | ||
| if constexpr (std::is_same<u32, T>::value) { | ||
|
|
||
| u32 len_test = 1e5; | ||
| u32 len_test = 32e6; | ||
|
|
||
| std::vector<u32> data = shamalgs::primitives::mock_vector<u32>(0x111, len_test, 0, 10); | ||
| std::vector<u32> data = shamalgs::primitives::mock_vector<u32>( | ||
| 0x111 + shambase::details::get_wtime() * 100000000, len_test, 0, 60); | ||
|
|
||
| std::vector<u32> data_buf(data); | ||
|
|
||
| std::exclusive_scan(data.begin(), data.end(), data.begin(), 0); | ||
|
|
||
| std::cout << "total = " << data[len_test - 1] + data_buf[len_test - 1] << std::endl; | ||
|
|
||
| sham::DeviceBuffer<u32> buf{ | ||
| data_buf.size(), shamsys::instance::get_compute_scheduler_ptr()}; | ||
| buf.copy_from_stdvec(data_buf); | ||
|
|
@@ -482,7 +485,10 @@ TestStart( | |
| TestExclScanUSM<u32> test( | ||
| (TestExclScanUSM<u32>::vFunctionCall) | ||
| shamalgs::numeric::details::exclusive_sum_atomic_decoupled_v5_usm<u32, 512>); | ||
| test.check(); | ||
|
|
||
| for (u32 i = 0; i < 1000; i++) { | ||
| test.check(); | ||
| } | ||
|
Comment on lines
+489
to
+491
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Running the exclusive scan test 1000 times with 32M elements each will significantly increase the duration of the unit test suite. This might be excessive for standard CI runs. Additionally, ensure the scan implementation adheres to the optimization rule regarding accumulator types for non-negative sequences. References
|
||
| } | ||
| #endif | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
continuestatement here will cause the benchmark loop to be skipped entirely. This appears to be leftover debug code that should be removed to allow the benchmark to run.