diff --git a/doc/modules/ROOT/pages/cuda.adoc b/doc/modules/ROOT/pages/cuda.adoc index 2da5f89..9c99b51 100644 --- a/doc/modules/ROOT/pages/cuda.adoc +++ b/doc/modules/ROOT/pages/cuda.adoc @@ -10,7 +10,7 @@ https://www.boost.org/LICENSE_1_0.txt == Description -The types of this library support compilation with NVCC. +All integer types of this library (unsigned `u8`-`u128`, signed `i8`-`i128`, and bounded types) support compilation with NVCC. To get the safety guarantees, there are some small modifications to the way that CUDA code is written. Normally you would have something like this: diff --git a/include/boost/safe_numbers/detail/signed_integer_basis.hpp b/include/boost/safe_numbers/detail/signed_integer_basis.hpp index 736dc4a..9b3b3e2 100644 --- a/include/boost/safe_numbers/detail/signed_integer_basis.hpp +++ b/include/boost/safe_numbers/detail/signed_integer_basis.hpp @@ -43,54 +43,54 @@ class signed_integer_basis constexpr signed_integer_basis() noexcept = default; - explicit constexpr signed_integer_basis(const BasisType val) noexcept : basis_{val} {} + BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr signed_integer_basis(const BasisType val) noexcept : basis_{val} {} template requires std::is_same_v - explicit constexpr signed_integer_basis(T) noexcept + BOOST_SAFE_NUMBERS_HOST_DEVICE explicit constexpr signed_integer_basis(T) noexcept { static_assert(dependent_false, "Construction from bool is not allowed"); } template - [[nodiscard]] explicit constexpr operator OtherBasis() const; + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] explicit constexpr operator OtherBasis() const; - [[nodiscard]] explicit constexpr operator BasisType() const noexcept { return basis_; } + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] explicit constexpr operator BasisType() const noexcept { return basis_; } - [[nodiscard]] friend constexpr auto operator<=>(signed_integer_basis lhs, signed_integer_basis rhs) noexcept + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] friend constexpr auto operator<=>(signed_integer_basis lhs, signed_integer_basis rhs) noexcept -> std::strong_ordering = default; - [[nodiscard]] constexpr auto operator+() const noexcept -> signed_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator+() const noexcept -> signed_integer_basis; - [[nodiscard]] constexpr auto operator-() const -> signed_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator-() const -> signed_integer_basis; template - constexpr auto operator+=(signed_integer_basis rhs) -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator+=(signed_integer_basis rhs) -> signed_integer_basis&; template - constexpr auto operator-=(signed_integer_basis rhs) -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator-=(signed_integer_basis rhs) -> signed_integer_basis&; template - constexpr auto operator*=(signed_integer_basis rhs) -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator*=(signed_integer_basis rhs) -> signed_integer_basis&; template - constexpr auto operator/=(signed_integer_basis rhs) -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator/=(signed_integer_basis rhs) -> signed_integer_basis&; template - constexpr auto operator%=(signed_integer_basis rhs) -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator%=(signed_integer_basis rhs) -> signed_integer_basis&; - constexpr auto operator++() -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++() -> signed_integer_basis&; - constexpr auto operator++(int) -> signed_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator++(int) -> signed_integer_basis; - constexpr auto operator--() -> signed_integer_basis&; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--() -> signed_integer_basis&; - constexpr auto operator--(int) -> signed_integer_basis; + BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto operator--(int) -> signed_integer_basis; }; // Helper for diagnostic messages template -constexpr auto signed_type_name() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_type_name() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -117,7 +117,7 @@ constexpr auto signed_type_name() noexcept -> const char* // Device-friendly error message helpers returning const char* string literals template -constexpr auto signed_overflow_add_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_add_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -142,7 +142,7 @@ constexpr auto signed_overflow_add_msg() noexcept -> const char* } template -constexpr auto signed_underflow_add_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_add_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -167,7 +167,7 @@ constexpr auto signed_underflow_add_msg() noexcept -> const char* } template -constexpr auto signed_unary_minus_overflow_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_unary_minus_overflow_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -192,7 +192,7 @@ constexpr auto signed_unary_minus_overflow_msg() noexcept -> const char* } template -constexpr auto signed_overflow_conversion_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_conversion_msg() noexcept -> const char* { if constexpr (std::is_same_v && std::is_same_v) { @@ -241,7 +241,7 @@ constexpr auto signed_overflow_conversion_msg() noexcept -> const char* } template -constexpr auto signed_underflow_conversion_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_conversion_msg() noexcept -> const char* { if constexpr (std::is_same_v && std::is_same_v) { @@ -291,7 +291,7 @@ constexpr auto signed_underflow_conversion_msg() noexcept -> const char* template template -constexpr signed_integer_basis::operator OtherBasis() const +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr signed_integer_basis::operator OtherBasis() const { if constexpr (sizeof(OtherBasis) < sizeof(BasisType)) { @@ -309,13 +309,13 @@ constexpr signed_integer_basis::operator OtherBasis() const } template -constexpr auto signed_integer_basis::operator+() const noexcept -> signed_integer_basis +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator+() const noexcept -> signed_integer_basis { return signed_integer_basis{basis_}; } template -constexpr auto signed_integer_basis::operator-() const -> signed_integer_basis +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator-() const -> signed_integer_basis { if (basis_ == std::numeric_limits::min()) [[unlikely]] { @@ -357,7 +357,7 @@ using make_unsigned_helper_t = typename make_unsigned_helper::type; // Signed addition overflow only occurs when both operands share the same sign, // so the sign of lhs is sufficient to determine direction. template -constexpr auto classify_signed_overflow(const T lhs) noexcept -> signed_overflow_status +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto classify_signed_overflow(const T lhs) noexcept -> signed_overflow_status { return lhs >= 0 ? signed_overflow_status::overflow : signed_overflow_status::underflow; } @@ -428,7 +428,7 @@ auto signed_intrin_add(const T lhs, const T rhs, T& result) -> signed_overflow_s #endif template -constexpr auto signed_no_intrin_add(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_no_intrin_add(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status { using unsigned_t = make_unsigned_helper_t; unsigned_t temp {}; @@ -460,6 +460,7 @@ constexpr auto signed_no_intrin_add(const T lhs, const T rhs, T& result) noexcep template struct signed_add_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy != overflow_policy::throw_exception) @@ -473,6 +474,7 @@ struct signed_add_helper auto handle_error = [&result](signed_overflow_status status) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if (status == signed_overflow_status::overflow) @@ -523,6 +525,7 @@ struct signed_add_helper } } else + #endif { if constexpr (Policy == overflow_policy::throw_exception) { @@ -565,6 +568,8 @@ struct signed_add_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_add(lhs_basis, rhs_basis, result)}; @@ -575,6 +580,8 @@ struct signed_add_helper return result_type{result}; } + + #endif } #endif // BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_add_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN) @@ -593,6 +600,7 @@ struct signed_add_helper template struct signed_add_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::pair, bool> @@ -607,11 +615,15 @@ struct signed_add_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_add(lhs_basis, rhs_basis, result)}; return std::make_pair(result_type{result}, status != signed_overflow_status::no_error); } + + #endif } #endif @@ -625,6 +637,7 @@ struct signed_add_helper template struct signed_add_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::optional> @@ -639,6 +652,8 @@ struct signed_add_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_add(lhs_basis, rhs_basis, result)}; @@ -646,6 +661,8 @@ struct signed_add_helper ? std::nullopt : std::make_optional(result_type{result}); } + + #endif } #endif @@ -661,6 +678,7 @@ struct signed_add_helper template struct signed_add_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept { @@ -673,6 +691,7 @@ struct signed_add_helper }; template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto add_impl(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::saturate || Policy == overflow_policy::overflow_tuple || @@ -685,9 +704,12 @@ template } // namespace impl template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator+(const signed_integer_basis lhs, const signed_integer_basis rhs) -> signed_integer_basis { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (std::is_constant_evaluated()) { BasisType res {}; @@ -742,6 +764,8 @@ template return signed_integer_basis{res}; } + #endif + return impl::signed_add_helper::apply(lhs, rhs); } @@ -751,6 +775,7 @@ template template \ requires (!std::is_same_v) \ +BOOST_SAFE_NUMBERS_HOST_DEVICE \ constexpr auto OP_SYMBOL(const boost::safe_numbers::detail::signed_integer_basis, \ const boost::safe_numbers::detail::signed_integer_basis) \ { \ @@ -887,6 +912,7 @@ BOOST_SAFE_NUMBERS_DEFINE_MIXED_SIGNED_INTEGER_OP("addition", operator+) template template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator+=(const signed_integer_basis rhs) -> signed_integer_basis& { @@ -901,7 +927,7 @@ constexpr auto signed_integer_basis::operator+=(const signed_integer_ namespace impl { template -constexpr auto signed_overflow_sub_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_sub_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -926,7 +952,7 @@ constexpr auto signed_overflow_sub_msg() noexcept -> const char* } template -constexpr auto signed_underflow_sub_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_sub_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -1015,7 +1041,7 @@ auto signed_intrin_sub(const T lhs, const T rhs, T& result) -> signed_overflow_s #endif template -constexpr auto signed_no_intrin_sub(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_no_intrin_sub(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status { using unsigned_t = make_unsigned_helper_t; unsigned_t temp {}; @@ -1047,6 +1073,7 @@ constexpr auto signed_no_intrin_sub(const T lhs, const T rhs, T& result) noexcep template struct signed_sub_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy != overflow_policy::throw_exception) @@ -1060,6 +1087,7 @@ struct signed_sub_helper auto handle_error = [&result](signed_overflow_status status) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if (status == signed_overflow_status::overflow) @@ -1110,6 +1138,7 @@ struct signed_sub_helper } } else + #endif { if constexpr (Policy == overflow_policy::throw_exception) { @@ -1152,6 +1181,8 @@ struct signed_sub_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_sub(lhs_basis, rhs_basis, result)}; @@ -1162,6 +1193,8 @@ struct signed_sub_helper return result_type{result}; } + + #endif } #endif // BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_sub_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN) @@ -1180,6 +1213,7 @@ struct signed_sub_helper template struct signed_sub_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::pair, bool> @@ -1194,11 +1228,15 @@ struct signed_sub_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_sub(lhs_basis, rhs_basis, result)}; return std::make_pair(result_type{result}, status != signed_overflow_status::no_error); } + + #endif } #endif @@ -1212,6 +1250,7 @@ struct signed_sub_helper template struct signed_sub_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::optional> @@ -1226,6 +1265,8 @@ struct signed_sub_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_sub(lhs_basis, rhs_basis, result)}; @@ -1233,6 +1274,8 @@ struct signed_sub_helper ? std::nullopt : std::make_optional(result_type{result}); } + + #endif } #endif @@ -1245,6 +1288,7 @@ struct signed_sub_helper }; template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto sub_impl(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::saturate || Policy == overflow_policy::overflow_tuple || @@ -1256,9 +1300,12 @@ template } // namespace impl template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator-(const signed_integer_basis lhs, const signed_integer_basis rhs) -> signed_integer_basis { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (std::is_constant_evaluated()) { BasisType res {}; @@ -1313,6 +1360,8 @@ template return signed_integer_basis{res}; } + #endif + return impl::signed_sub_helper::apply(lhs, rhs); } @@ -1320,6 +1369,7 @@ BOOST_SAFE_NUMBERS_DEFINE_MIXED_SIGNED_INTEGER_OP("subtraction", operator-) template template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator-=(const signed_integer_basis rhs) -> signed_integer_basis& { @@ -1334,7 +1384,7 @@ constexpr auto signed_integer_basis::operator-=(const signed_integer_ namespace impl { template -constexpr auto signed_overflow_mul_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_mul_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -1359,7 +1409,7 @@ constexpr auto signed_overflow_mul_msg() noexcept -> const char* } template -constexpr auto signed_underflow_mul_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_mul_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -1488,7 +1538,7 @@ auto signed_intrin_mul(const T lhs, const T rhs, T& result) -> signed_overflow_s #endif template -constexpr auto signed_no_intrin_mul(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_no_intrin_mul(const T lhs, const T rhs, T& result) noexcept -> signed_overflow_status { if constexpr (std::is_same_v) { @@ -1579,6 +1629,7 @@ constexpr auto signed_no_intrin_mul(const T lhs, const T rhs, T& result) noexcep template struct signed_mul_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy != overflow_policy::throw_exception) @@ -1592,6 +1643,7 @@ struct signed_mul_helper auto handle_error = [&result](signed_overflow_status status) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if (status == signed_overflow_status::overflow) @@ -1642,6 +1694,7 @@ struct signed_mul_helper } } else + #endif { if constexpr (Policy == overflow_policy::throw_exception) { @@ -1684,6 +1737,8 @@ struct signed_mul_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_mul(lhs_basis, rhs_basis, result)}; @@ -1694,6 +1749,8 @@ struct signed_mul_helper return result_type{result}; } + + #endif } #endif // BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN) @@ -1712,6 +1769,7 @@ struct signed_mul_helper template struct signed_mul_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::pair, bool> @@ -1726,11 +1784,15 @@ struct signed_mul_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_mul(lhs_basis, rhs_basis, result)}; return std::make_pair(result_type{result}, status != signed_overflow_status::no_error); } + + #endif } #endif @@ -1744,6 +1806,7 @@ struct signed_mul_helper template struct signed_mul_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::optional> @@ -1758,6 +1821,8 @@ struct signed_mul_helper if constexpr (!std::is_same_v) { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (!std::is_constant_evaluated()) { const auto status {impl::signed_intrin_mul(lhs_basis, rhs_basis, result)}; @@ -1765,6 +1830,8 @@ struct signed_mul_helper ? std::nullopt : std::make_optional(result_type{result}); } + + #endif } #endif @@ -1780,6 +1847,7 @@ struct signed_mul_helper template struct signed_mul_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept { @@ -1792,6 +1860,7 @@ struct signed_mul_helper }; template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto mul_impl(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::saturate || Policy == overflow_policy::overflow_tuple || @@ -1804,9 +1873,12 @@ template } // namespace impl template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator*(const signed_integer_basis lhs, const signed_integer_basis rhs) -> signed_integer_basis { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) + if (std::is_constant_evaluated()) { BasisType res {}; @@ -1861,6 +1933,8 @@ template return signed_integer_basis{res}; } + #endif + return impl::signed_mul_helper::apply(lhs, rhs); } @@ -1868,6 +1942,7 @@ BOOST_SAFE_NUMBERS_DEFINE_MIXED_SIGNED_INTEGER_OP("multiplication", operator*) template template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator*=(const signed_integer_basis rhs) -> signed_integer_basis& { @@ -1882,7 +1957,7 @@ constexpr auto signed_integer_basis::operator*=(const signed_integer_ namespace impl { template -constexpr auto signed_div_by_zero_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_div_by_zero_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -1907,7 +1982,7 @@ constexpr auto signed_div_by_zero_msg() noexcept -> const char* } template -constexpr auto signed_overflow_div_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_div_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -1934,6 +2009,7 @@ constexpr auto signed_overflow_div_msg() noexcept -> const char* template struct signed_div_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::strict) @@ -1952,6 +2028,7 @@ struct signed_div_helper } else { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if constexpr (std::is_same_v) @@ -1976,6 +2053,7 @@ struct signed_div_helper } } else + #endif { BOOST_SAFE_NUMBERS_THROW_EXCEPTION(std::domain_error, signed_div_by_zero_msg()); } @@ -2002,6 +2080,7 @@ struct signed_div_helper } else { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if constexpr (std::is_same_v) @@ -2026,6 +2105,7 @@ struct signed_div_helper } } else + #endif { BOOST_SAFE_NUMBERS_THROW_EXCEPTION(std::overflow_error, signed_overflow_div_msg()); } @@ -2040,6 +2120,7 @@ struct signed_div_helper template struct signed_div_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) -> std::pair, bool> @@ -2074,6 +2155,7 @@ struct signed_div_helper template struct signed_div_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::optional> @@ -2104,6 +2186,7 @@ struct signed_div_helper }; template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto div_impl(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::checked || Policy == overflow_policy::strict) @@ -2114,6 +2197,7 @@ template } // namespace impl template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator/(const signed_integer_basis lhs, const signed_integer_basis rhs) -> signed_integer_basis { @@ -2124,6 +2208,7 @@ BOOST_SAFE_NUMBERS_DEFINE_MIXED_SIGNED_INTEGER_OP("division", operator/) template template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator/=(const signed_integer_basis rhs) -> signed_integer_basis& { @@ -2138,7 +2223,7 @@ constexpr auto signed_integer_basis::operator/=(const signed_integer_ namespace impl { template -constexpr auto signed_mod_by_zero_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_mod_by_zero_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -2163,7 +2248,7 @@ constexpr auto signed_mod_by_zero_msg() noexcept -> const char* } template -constexpr auto signed_overflow_mod_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_mod_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -2190,6 +2275,7 @@ constexpr auto signed_overflow_mod_msg() noexcept -> const char* template struct signed_mod_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::strict) @@ -2208,6 +2294,7 @@ struct signed_mod_helper } else { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if constexpr (std::is_same_v) @@ -2232,6 +2319,7 @@ struct signed_mod_helper } } else + #endif { BOOST_SAFE_NUMBERS_THROW_EXCEPTION(std::domain_error, signed_mod_by_zero_msg()); } @@ -2261,6 +2349,7 @@ struct signed_mod_helper } else { + #if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA)) if (std::is_constant_evaluated()) { if constexpr (std::is_same_v) @@ -2285,6 +2374,7 @@ struct signed_mod_helper } } else + #endif { BOOST_SAFE_NUMBERS_THROW_EXCEPTION(std::overflow_error, signed_overflow_mod_msg()); } @@ -2299,6 +2389,7 @@ struct signed_mod_helper template struct signed_mod_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) -> std::pair, bool> @@ -2333,6 +2424,7 @@ struct signed_mod_helper template struct signed_mod_helper { + BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] static constexpr auto apply(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept -> std::optional> @@ -2363,6 +2455,7 @@ struct signed_mod_helper }; template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto mod_impl(const signed_integer_basis lhs, const signed_integer_basis rhs) noexcept(Policy == overflow_policy::checked || Policy == overflow_policy::strict) @@ -2373,6 +2466,7 @@ template } // namespace impl template +BOOST_SAFE_NUMBERS_HOST_DEVICE [[nodiscard]] constexpr auto operator%(const signed_integer_basis lhs, const signed_integer_basis rhs) -> signed_integer_basis { @@ -2383,6 +2477,7 @@ BOOST_SAFE_NUMBERS_DEFINE_MIXED_SIGNED_INTEGER_OP("modulo", operator%) template template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator%=(const signed_integer_basis rhs) -> signed_integer_basis& { @@ -2395,7 +2490,7 @@ constexpr auto signed_integer_basis::operator%=(const signed_integer_ // ------------------------------ template -constexpr auto signed_overflow_inc_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_overflow_inc_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -2420,7 +2515,7 @@ constexpr auto signed_overflow_inc_msg() noexcept -> const char* } template -constexpr auto signed_underflow_dec_msg() noexcept -> const char* +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_dec_msg() noexcept -> const char* { if constexpr (std::is_same_v) { @@ -2449,6 +2544,7 @@ constexpr auto signed_underflow_dec_msg() noexcept -> const char* // ------------------------------ template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator++() -> signed_integer_basis& { @@ -2462,6 +2558,7 @@ constexpr auto signed_integer_basis::operator++() } template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator++(int) -> signed_integer_basis { @@ -2480,6 +2577,7 @@ constexpr auto signed_integer_basis::operator++(int) // ------------------------------ template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator--() -> signed_integer_basis& { @@ -2493,6 +2591,7 @@ constexpr auto signed_integer_basis::operator--() } template +BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_integer_basis::operator--(int) -> signed_integer_basis { diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 24b7ddd..3678fe8 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -309,6 +309,41 @@ run test_cuda_u128_gcd.cu ; run test_cuda_u128_lcm.cu ; run test_cuda_u128_midpoint.cu ; +# i8 tests +run test_cuda_i8_add.cu ; +run test_cuda_i8_sub.cu ; +run test_cuda_i8_mul.cu ; +run test_cuda_i8_div.cu ; +run test_cuda_i8_mod.cu ; + +# i16 tests +run test_cuda_i16_add.cu ; +run test_cuda_i16_sub.cu ; +run test_cuda_i16_mul.cu ; +run test_cuda_i16_div.cu ; +run test_cuda_i16_mod.cu ; + +# i32 tests +run test_cuda_i32_add.cu ; +run test_cuda_i32_sub.cu ; +run test_cuda_i32_mul.cu ; +run test_cuda_i32_div.cu ; +run test_cuda_i32_mod.cu ; + +# i64 tests +run test_cuda_i64_add.cu ; +run test_cuda_i64_sub.cu ; +run test_cuda_i64_mul.cu ; +run test_cuda_i64_div.cu ; +run test_cuda_i64_mod.cu ; + +# i128 tests +run test_cuda_i128_add.cu ; +run test_cuda_i128_sub.cu ; +run test_cuda_i128_mul.cu ; +run test_cuda_i128_div.cu ; +run test_cuda_i128_mod.cu ; + # Examples run ../examples/cuda.cu ; run ../examples/cuda_error_handling.cu ; diff --git a/test/test_cuda_i128_add.cu b/test/test_cuda_i128_add.cu new file mode 100644 index 0000000..55d3cd6 --- /dev/null +++ b/test/test_cuda_i128_add.cu @@ -0,0 +1,86 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{(std::numeric_limits::min)() / 2, (std::numeric_limits::max)() / 2}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i128_div.cu b/test/test_cuda_i128_div.cu new file mode 100644 index 0000000..279b63a --- /dev/null +++ b/test/test_cuda_i128_div.cu @@ -0,0 +1,89 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i128_mod.cu b/test/test_cuda_i128_mod.cu new file mode 100644 index 0000000..4245d64 --- /dev/null +++ b/test/test_cuda_i128_mod.cu @@ -0,0 +1,89 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + boost::random::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i128_mul.cu b/test/test_cuda_i128_mul.cu new file mode 100644 index 0000000..555ae6f --- /dev/null +++ b/test/test_cuda_i128_mul.cu @@ -0,0 +1,89 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // isqrt(INT128_MAX) fits in an int64_t range: use INT64_MAX as safe bound + boost::random::uniform_int_distribution dist{static_cast(-static_cast(INT64_MAX)), static_cast(INT64_MAX)}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i128_sub.cu b/test/test_cuda_i128_sub.cu new file mode 100644 index 0000000..0ffea4d --- /dev/null +++ b/test/test_cuda_i128_sub.cu @@ -0,0 +1,88 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION +#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i128; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist{(std::numeric_limits::min)() / 4, (std::numeric_limits::max)() / 4}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i16_add.cu b/test/test_cuda_i16_add.cu new file mode 100644 index 0000000..d2869e8 --- /dev/null +++ b/test/test_cuda_i16_add.cu @@ -0,0 +1,81 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)() / 2), static_cast((std::numeric_limits::max)() / 2)}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i16_div.cu b/test/test_cuda_i16_div.cu new file mode 100644 index 0000000..1521140 --- /dev/null +++ b/test/test_cuda_i16_div.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)()), static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i16_mod.cu b/test/test_cuda_i16_mod.cu new file mode 100644 index 0000000..2493d8a --- /dev/null +++ b/test/test_cuda_i16_mod.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)()), static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i16_mul.cu b/test/test_cuda_i16_mul.cu new file mode 100644 index 0000000..0265be1 --- /dev/null +++ b/test/test_cuda_i16_mul.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // isqrt(INT16_MAX) = 181 + std::uniform_int_distribution dist{-181, 181}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i16_sub.cu b/test/test_cuda_i16_sub.cu new file mode 100644 index 0000000..d867aed --- /dev/null +++ b/test/test_cuda_i16_sub.cu @@ -0,0 +1,83 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i16; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)() / 4), static_cast((std::numeric_limits::max)() / 4)}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i32_add.cu b/test/test_cuda_i32_add.cu new file mode 100644 index 0000000..ed0745d --- /dev/null +++ b/test/test_cuda_i32_add.cu @@ -0,0 +1,81 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)() / 2, (std::numeric_limits::max)() / 2}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i32_div.cu b/test/test_cuda_i32_div.cu new file mode 100644 index 0000000..f18a61f --- /dev/null +++ b/test/test_cuda_i32_div.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i32_mod.cu b/test/test_cuda_i32_mod.cu new file mode 100644 index 0000000..7269b36 --- /dev/null +++ b/test/test_cuda_i32_mod.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i32_mul.cu b/test/test_cuda_i32_mul.cu new file mode 100644 index 0000000..63d1465 --- /dev/null +++ b/test/test_cuda_i32_mul.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // isqrt(INT32_MAX) = 46340 + std::uniform_int_distribution dist{-46340, 46340}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i32_sub.cu b/test/test_cuda_i32_sub.cu new file mode 100644 index 0000000..f67c522 --- /dev/null +++ b/test/test_cuda_i32_sub.cu @@ -0,0 +1,83 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i32; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)() / 4, (std::numeric_limits::max)() / 4}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i64_add.cu b/test/test_cuda_i64_add.cu new file mode 100644 index 0000000..98ef840 --- /dev/null +++ b/test/test_cuda_i64_add.cu @@ -0,0 +1,81 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)() / 2, (std::numeric_limits::max)() / 2}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i64_div.cu b/test/test_cuda_i64_div.cu new file mode 100644 index 0000000..2a93597 --- /dev/null +++ b/test/test_cuda_i64_div.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i64_mod.cu b/test/test_cuda_i64_mod.cu new file mode 100644 index 0000000..a17f228 --- /dev/null +++ b/test/test_cuda_i64_mod.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::uniform_int_distribution dist2{basis_type{1}, (std::numeric_limits::max)()}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist2(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i64_mul.cu b/test/test_cuda_i64_mul.cu new file mode 100644 index 0000000..361f159 --- /dev/null +++ b/test/test_cuda_i64_mul.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // isqrt(INT64_MAX) = 3037000499 + std::uniform_int_distribution dist{-3037000499LL, 3037000499LL}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i64_sub.cu b/test/test_cuda_i64_sub.cu new file mode 100644 index 0000000..210624a --- /dev/null +++ b/test/test_cuda_i64_sub.cu @@ -0,0 +1,83 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i64; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{(std::numeric_limits::min)() / 4, (std::numeric_limits::max)() / 4}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{dist(rng)}; + input_vector2[i] = test_type{dist(rng)}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i8_add.cu b/test/test_cuda_i8_add.cu new file mode 100644 index 0000000..71fe64b --- /dev/null +++ b/test/test_cuda_i8_add.cu @@ -0,0 +1,81 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] + in[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)() / 2), static_cast((std::numeric_limits::max)() / 2)}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] + input_vector[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i8_div.cu b/test/test_cuda_i8_div.cu new file mode 100644 index 0000000..0351aff --- /dev/null +++ b/test/test_cuda_i8_div.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] / in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)()), static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] / input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i8_mod.cu b/test/test_cuda_i8_mod.cu new file mode 100644 index 0000000..cb3b118 --- /dev/null +++ b/test/test_cuda_i8_mod.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] % in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)()), static_cast((std::numeric_limits::max)())}; + std::uniform_int_distribution dist2{1, static_cast((std::numeric_limits::max)())}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist2(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] % input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i8_mul.cu b/test/test_cuda_i8_mul.cu new file mode 100644 index 0000000..f274dc6 --- /dev/null +++ b/test/test_cuda_i8_mul.cu @@ -0,0 +1,84 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] * in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // isqrt(INT8_MAX) = 11 + std::uniform_int_distribution dist{-11, 11}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] * input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_cuda_i8_sub.cu b/test/test_cuda_i8_sub.cu new file mode 100644 index 0000000..02bf40c --- /dev/null +++ b/test/test_cuda_i8_sub.cu @@ -0,0 +1,83 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::safe_numbers::i8; +using basis_type = test_type::basis_type; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = in[i] - in2[i]; + } +} + +int main(void) +{ + std::mt19937_64 rng{42}; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + std::uniform_int_distribution dist{static_cast((std::numeric_limits::min)() / 4), static_cast((std::numeric_limits::max)() / 4)}; + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = test_type{static_cast(dist(rng))}; + input_vector2[i] = test_type{static_cast(dist(rng))}; + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + boost::safe_numbers::device_error_context ctx; + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + ctx.synchronize(); + + std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl; + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(input_vector[i] - input_vector2[i]); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}