diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp index 07e112721..ce3f1e7cc 100644 --- a/include/boost/math/policies/error_handling.hpp +++ b/include/boost/math/policies/error_handling.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #ifndef BOOST_MATH_HAS_NVRTC @@ -1017,7 +1018,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, co } template -BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, BOOST_MATH_UINTMAX_T max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) +BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) { if(max_iter >= policies::get_max_series_iterations()) raise_evaluation_error( @@ -1026,7 +1027,7 @@ BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, } template -BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, BOOST_MATH_UINTMAX_T max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) +BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v) { if(max_iter >= policies::get_max_root_iterations()) raise_evaluation_error( diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp index 432191362..ec7b36f2d 100644 --- a/include/boost/math/policies/policy.hpp +++ b/include/boost/math/policies/policy.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace boost{ namespace math{ @@ -313,7 +314,7 @@ class is_default_policy }; }; -template +template struct append_N { using type = typename append_N, T, N-1>::type; @@ -402,7 +403,7 @@ class policy // Typelist of the arguments: // using arg_list = mp::mp_list; - static constexpr BOOST_MATH_SIZE_T arg_list_size = mp::mp_size::value; + static constexpr boost::math::size_t arg_list_size = mp::mp_size::value; template struct pick_arg @@ -533,7 +534,7 @@ class normalise { private: using arg_list = mp::mp_list; - static constexpr BOOST_MATH_SIZE_T arg_list_size = mp::mp_size::value; + static constexpr boost::math::size_t arg_list_size = mp::mp_size::value; template struct pick_arg @@ -882,7 +883,7 @@ struct series_factor_calc::value) { - return 1 / static_cast(static_cast(1u) << (Digits::value - 1)); + return 1 / static_cast(static_cast(1u) << (Digits::value - 1)); } }; template @@ -901,7 +902,7 @@ BOOST_MATH_GPU_ENABLED constexpr T get_epsilon_imp(boost::math::true_type const& static_assert(boost::math::numeric_limits::radix == 2, "boost::math::numeric_limits::radix == 2"); typedef typename boost::math::policies::precision::type p_t; - typedef boost::math::integral_constant::digits> is_small_int; + typedef boost::math::integral_constant::digits> is_small_int; typedef boost::math::integral_constant= boost::math::numeric_limits::digits> is_default_value; return series_factor_calc::get(); } diff --git a/include/boost/math/special_functions/beta.hpp b/include/boost/math/special_functions/beta.hpp index 75968f3eb..f0ac6318b 100644 --- a/include/boost/math/special_functions/beta.hpp +++ b/include/boost/math/special_functions/beta.hpp @@ -11,19 +11,31 @@ #pragma once #endif -#include #include #include +#include +#include +#include +#include +#include +#include #include -#include -#include #include #include #include #include +#include +#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#include +#include #include -#include #include +#endif namespace boost{ namespace math{ @@ -124,6 +136,7 @@ BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const Lanczos&, const Policy& pol) // Generic implementation of Beta(a,b) without Lanczos approximation support // (Caution this is slow!!!): // +#ifndef BOOST_MATH_HAS_NVRTC template BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol) { @@ -194,7 +207,7 @@ BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, } } // template T beta_imp(T a, T b, const lanczos::undefined_lanczos& l) - +#endif // // Compute the leading power terms in the incomplete Beta: @@ -246,11 +259,11 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, // l1 and l2 are the base of the exponents minus one: T l1 = (x * b - y * agh) / agh; T l2 = (y * a - x * bgh) / bgh; - if(((std::min)(fabs(l1), fabs(l2)) < 0.2)) + if((BOOST_MATH_GPU_SAFE_MIN(fabs(l1), fabs(l2)) < 0.2)) { // when the base of the exponent is very near 1 we get really // gross errors unless extra care is taken: - if((l1 * l2 > 0) || ((std::min)(a, b) < 1)) + if((l1 * l2 > 0) || (BOOST_MATH_GPU_SAFE_MIN(a, b) < 1)) { // // This first branch handles the simple cases where either: @@ -286,7 +299,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, BOOST_MATH_INSTRUMENT_VARIABLE(result); } } - else if((std::max)(fabs(l1), fabs(l2)) < 0.5) + else if(BOOST_MATH_GPU_SAFE_MAX(fabs(l1), fabs(l2)) < 0.5) { // // Both exponents are near one and both the exponents are @@ -448,6 +461,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, // // This version is generic, slow, and does not use the Lanczos approximation. // +#ifndef BOOST_MATH_HAS_NVRTC template BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, T b, @@ -484,7 +498,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, bool need_logs = false; if (a < b) { - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { power1 = pow((x * y * c * c) / (a * b), a); power2 = pow((y * c) / b, b - a); @@ -507,7 +521,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, } else { - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { power1 = pow((x * y * c * c) / (a * b), b); power2 = pow((x * c) / a, a - b); @@ -526,7 +540,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, need_logs = true; } } - BOOST_MATH_IF_CONSTEXPR(std::numeric_limits::has_infinity) + BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits::has_infinity) { if (!(boost::math::isnormal)(power1) || !(boost::math::isnormal)(power2)) { @@ -625,6 +639,8 @@ BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a, } return prefix * power1 * (power2 / bet); } + +#endif // // Series approximation to the incomplete beta: // @@ -717,7 +733,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool if(result < tools::min_value()) return s0; // Safeguard: series can't cope with denorms. ibeta_series_t s(a, b, x, result); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter, s0); policies::check_series_iterations("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (with lanczos)", max_iter, pol); return result; @@ -725,6 +741,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool // // Incomplete Beta series again, this time without Lanczos support: // +#ifndef BOOST_MATH_HAS_NVRTC template BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczos& l, bool normalised, T* p_derivative, T y, const Policy& pol) { @@ -778,7 +795,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const boost::math::la if(result < tools::min_value()) return s0; // Safeguard: series can't cope with denorms. ibeta_series_t s(a, b, x, result); - std::uintmax_t max_iter = policies::get_max_series_iterations(); + boost::math::uintmax_t max_iter = policies::get_max_series_iterations(); result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon(), max_iter, s0); policies::check_series_iterations("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (without lanczos)", max_iter, pol); return result; @@ -790,7 +807,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const boost::math::la template struct ibeta_fraction2_t { - typedef std::pair result_type; + typedef boost::math::pair result_type; BOOST_MATH_GPU_ENABLED ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {} @@ -806,7 +823,7 @@ struct ibeta_fraction2_t ++m; - return std::make_pair(aN, bN); + return boost::math::make_pair(aN, bN); } private: @@ -867,6 +884,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& p return prefix; } +#endif // // This function is only needed for the non-regular incomplete beta, // it computes the delta in: @@ -905,11 +923,15 @@ struct Pn_size { // This is likely to be enough for ~35-50 digit accuracy // but it's hard to quantify exactly: + #ifndef BOOST_MATH_HAS_NVRTC static constexpr unsigned value = ::boost::math::max_factorial::value >= 100 ? 50 : ::boost::math::max_factorial::value >= ::boost::math::max_factorial::value ? 30 : ::boost::math::max_factorial::value >= ::boost::math::max_factorial::value ? 15 : 1; static_assert(::boost::math::max_factorial::value >= ::boost::math::max_factorial::value, "Type does not provide for 35-50 digits of accuracy."); + #else + static constexpr unsigned value = 0; // Will never be called + #endif }; template <> struct Pn_size @@ -936,6 +958,7 @@ struct Pn_size #endif }; +#ifndef BOOST_MATH_HAS_NVRTC template BOOST_MATH_GPU_ENABLED T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised) { @@ -1037,7 +1060,7 @@ BOOST_MATH_GPU_ENABLED T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T } return sum; } // template T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Lanczos& l, bool normalised) - +#endif // // For integer arguments we can relate the incomplete beta to the // complement of the binomial distribution cdf and use this finite sum. @@ -1224,7 +1247,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, b return p; } - if((std::min)(a, b) <= 1) + if(BOOST_MATH_GPU_SAFE_MIN(a, b) <= 1) { if(x > 0.5) { @@ -1233,10 +1256,10 @@ BOOST_MATH_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, b invert = !invert; BOOST_MATH_INSTRUMENT_VARIABLE(invert); } - if((std::max)(a, b) <= 1) + if(BOOST_MATH_GPU_SAFE_MAX(a, b) <= 1) { // Both a,b < 1: - if((a >= (std::min)(T(0.2), b)) || (pow(x, a) <= 0.9)) + if((a >= BOOST_MATH_GPU_SAFE_MIN(T(0.2), b)) || (pow(x, a) <= 0.9)) { if(!invert) { @@ -1405,7 +1428,7 @@ BOOST_MATH_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, b if(b < 40) { - if((floor(a) == a) && (floor(b) == b) && (a < static_cast((std::numeric_limits::max)() - 100)) && (y != 1)) + if((floor(a) == a) && (floor(b) == b) && (a < static_cast((boost::math::numeric_limits::max)() - 100)) && (y != 1)) { // relate to the binomial distribution and use a finite sum: T k = a - 1; @@ -1726,7 +1749,12 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type } // namespace math } // namespace boost +// TODO(mborland): Get the ibeta_inv working on NVRTC +#ifndef BOOST_MATH_HAS_NVRTC + #include #include +#endif + #endif // BOOST_MATH_SPECIAL_BETA_HPP diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp index 0d5e5f0b2..afb8e9728 100644 --- a/include/boost/math/special_functions/gamma.hpp +++ b/include/boost/math/special_functions/gamma.hpp @@ -14,9 +14,10 @@ #pragma once #endif -#ifndef __CUDACC_RTC__ - #include + +#ifndef BOOST_MATH_HAS_NVRTC + #include #include #include diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp index b804bf829..0ec24bddb 100644 --- a/include/boost/math/special_functions/lanczos.hpp +++ b/include/boost/math/special_functions/lanczos.hpp @@ -11,12 +11,16 @@ #endif #include -#include #include +#include +#include +#include #include -#include -#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include #include +#endif #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -59,7 +63,7 @@ BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const L&) // Max experimental error (with arbitrary precision arithmetic) 9.516e-12 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos6 : public std::integral_constant +struct lanczos6 : public boost::math::integral_constant { // // Produces slightly better than float precision when evaluated at @@ -77,13 +81,13 @@ struct lanczos6 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 63.99951844938187085666201263218840287667)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 2.506628274631006311133031631822390264407)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -101,13 +105,13 @@ struct lanczos6 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.2412010548258800231126240760264822486599)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 35, 0.009446967704539249494420221613134244048319)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -163,7 +167,7 @@ struct lanczos6 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 2.16676e-19 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos11 : public std::integral_constant +struct lanczos11 : public boost::math::integral_constant { // // Produces slightly better than double precision when evaluated at @@ -186,18 +190,18 @@ struct lanczos11 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 261.6140441641668190791708576058805625502)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 2.506628274631000502415573855452633787834)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = { - static_cast(0u), - static_cast(362880u), - static_cast(1026576u), - static_cast(1172700u), - static_cast(723680u), - static_cast(269325u), - static_cast(63273u), - static_cast(9450u), - static_cast(870u), - static_cast(45u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = { + static_cast(0u), + static_cast(362880u), + static_cast(1026576u), + static_cast(1172700u), + static_cast(723680u), + static_cast(269325u), + static_cast(63273u), + static_cast(9450u), + static_cast(870u), + static_cast(45u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -220,18 +224,18 @@ struct lanczos11 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.004826466289237661857584712046231435101741)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 60, 0.4624429436045378766270459638520555557321e-4)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = { - static_cast(0u), - static_cast(362880u), - static_cast(1026576u), - static_cast(1172700u), - static_cast(723680u), - static_cast(269325u), - static_cast(63273u), - static_cast(9450u), - static_cast(870u), - static_cast(45u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = { + static_cast(0u), + static_cast(362880u), + static_cast(1026576u), + static_cast(1172700u), + static_cast(723680u), + static_cast(269325u), + static_cast(63273u), + static_cast(9450u), + static_cast(870u), + static_cast(45u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -297,7 +301,7 @@ struct lanczos11 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 9.2213e-23 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos13 : public std::integral_constant +struct lanczos13 : public boost::math::integral_constant { // // Produces slightly better than extended-double precision when evaluated at @@ -322,20 +326,20 @@ struct lanczos13 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 381.8801248632926870394389468349331394196)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 2.506628274631000502415763426076722427007)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -360,20 +364,20 @@ struct lanczos13 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.0007469903808915448316510079585999893674101)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 72, 0.4903180573459871862552197089738373164184e-5)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -443,7 +447,7 @@ struct lanczos13 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 8.111667e-8 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos6m24 : public std::integral_constant +struct lanczos6m24 : public boost::math::integral_constant { // // Use for float precision, when evaluated as a float: @@ -460,13 +464,13 @@ struct lanczos6m24 : public std::integral_constant static_cast(27.5192015197455403062503721613097825345L), static_cast(2.50662858515256974113978724717473206342L) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -484,13 +488,13 @@ struct lanczos6m24 : public std::integral_constant static_cast(6.595765571169314946316366571954421695196L), static_cast(0.6007854010515290065101128585795542383721L) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = { - static_cast(0u), - static_cast(24u), - static_cast(50u), - static_cast(35u), - static_cast(10u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = { + static_cast(0u), + static_cast(24u), + static_cast(50u), + static_cast(35u), + static_cast(10u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -546,7 +550,7 @@ struct lanczos6m24 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 1.196214e-17 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos13m53 : public std::integral_constant +struct lanczos13m53 : public boost::math::integral_constant { // // Use for double precision, when evaluated as a double: @@ -570,20 +574,20 @@ struct lanczos13m53 : public std::integral_constant static_cast(210.8242777515793458725097339207133627117L), static_cast(2.506628274631000270164908177133837338626L) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -608,20 +612,20 @@ struct lanczos13m53 : public std::integral_constant static_cast(0.5098416655656676188125178644804694509993L), static_cast(0.006061842346248906525783753964555936883222L) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = { - static_cast(0u), - static_cast(39916800u), - static_cast(120543840u), - static_cast(150917976u), - static_cast(105258076u), - static_cast(45995730u), - static_cast(13339535u), - static_cast(2637558u), - static_cast(357423u), - static_cast(32670u), - static_cast(1925u), - static_cast(66u), - static_cast(1u) + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = { + static_cast(0u), + static_cast(39916800u), + static_cast(120543840u), + static_cast(150917976u), + static_cast(105258076u), + static_cast(45995730u), + static_cast(13339535u), + static_cast(2637558u), + static_cast(357423u), + static_cast(32670u), + static_cast(1925u), + static_cast(66u), + static_cast(1u) }; // LCOV_EXCL_STOP return boost::math::tools::evaluate_rational(num, denom, z); @@ -691,7 +695,7 @@ struct lanczos13m53 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 2.7699e-26 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos17m64 : public std::integral_constant +struct lanczos17m64 : public boost::math::integral_constant { // // Use for extended-double precision, when evaluated as an extended-double: @@ -719,7 +723,7 @@ struct lanczos17m64 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 488.0063567520005730476791712814838113252)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 2.50662827463100050241576877135758834683)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = { + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -765,7 +769,7 @@ struct lanczos17m64 : public std::integral_constant static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.002393749522058449186690627996063983095463)), static_cast(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1229541408909435212800785616808830746135e-4)) }; - BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = { + BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = { BOOST_MATH_INT_VALUE_SUFFIX(0, uLL), BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL), BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL), @@ -860,13 +864,13 @@ struct lanczos17m64 : public std::integral_constant // Max experimental error (with arbitrary precision arithmetic) 1.0541e-38 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006 // -struct lanczos24m113 : public std::integral_constant +struct lanczos24m113 : public boost::math::integral_constant { // // Use for long-double precision, when evaluated as an long-double: // template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[24] = { @@ -926,7 +930,7 @@ struct lanczos24m113 : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[24] = { @@ -987,7 +991,7 @@ struct lanczos24m113 : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[23] = { @@ -1025,7 +1029,7 @@ struct lanczos24m113 : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[23] = { @@ -1063,7 +1067,7 @@ struct lanczos24m113 : public std::integral_constant return result; } - static double g(){ return 20.3209821879863739013671875; } + BOOST_MATH_GPU_ENABLED static double g(){ return 20.3209821879863739013671875; } }; // @@ -1072,10 +1076,10 @@ struct lanczos24m113 : public std::integral_constant // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021 // Type precision was 134 bits or 42 max_digits10 // -struct lanczos27MP : public std::integral_constant +struct lanczos27MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[27] = { @@ -1141,7 +1145,7 @@ struct lanczos27MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[27] = { @@ -1208,7 +1212,7 @@ struct lanczos27MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[34] = { @@ -1257,7 +1261,7 @@ struct lanczos27MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[34] = { @@ -1306,10 +1310,10 @@ struct lanczos27MP : public std::integral_constant return result; } - static double g() { return 2.472513680905104038743047567550092935562134e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.472513680905104038743047567550092935562134e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos27MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos27MP&) { return 17.03623256087303; } @@ -1320,10 +1324,10 @@ inline double lanczos_g_near_1_and_2(const lanczos27MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019 // Type precision was 168 bits or 53 max_digits10 // -struct lanczos35MP : public std::integral_constant +struct lanczos35MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[35] = { @@ -1405,7 +1409,7 @@ struct lanczos35MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[35] = { @@ -1488,7 +1492,7 @@ struct lanczos35MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[42] = { @@ -1545,7 +1549,7 @@ struct lanczos35MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[42] = { @@ -1602,10 +1606,10 @@ struct lanczos35MP : public std::integral_constant return result; } - static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos35MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos35MP&) { return 22.36563469469547; } @@ -1615,10 +1619,10 @@ inline double lanczos_g_near_1_and_2(const lanczos35MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019 // Type precision was 201 bits or 63 max_digits10 // -struct lanczos48MP : public std::integral_constant +struct lanczos48MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[48] = { @@ -1726,7 +1730,7 @@ struct lanczos48MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[48] = { @@ -1835,7 +1839,7 @@ struct lanczos48MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[47] = { @@ -1897,7 +1901,7 @@ struct lanczos48MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[47] = { @@ -1959,7 +1963,7 @@ struct lanczos48MP : public std::integral_constant return result; } - static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; } }; // // Lanczos Coefficients for N=49 G=3.531905273437499914734871708787977695465087890625000000000000000000000000e+01 @@ -1967,10 +1971,10 @@ struct lanczos48MP : public std::integral_constant // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021 // Type precision was 234 bits or 72 max_digits10 // -struct lanczos49MP : public std::integral_constant +struct lanczos49MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[49] = { @@ -2080,7 +2084,7 @@ struct lanczos49MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[49] = { @@ -2191,7 +2195,7 @@ struct lanczos49MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[48] = { @@ -2254,7 +2258,7 @@ struct lanczos49MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[48] = { @@ -2317,10 +2321,10 @@ struct lanczos49MP : public std::integral_constant return result; } - static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos49MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos49MP&) { return 33.54638671875000; } @@ -2331,10 +2335,10 @@ inline double lanczos_g_near_1_and_2(const lanczos49MP&) // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 22 2021 // Type precision was 267 bits or 82 max_digits10 // -struct lanczos52MP : public std::integral_constant +struct lanczos52MP : public boost::math::integral_constant { template - static T lanczos_sum(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[52] = { @@ -2450,7 +2454,7 @@ struct lanczos52MP : public std::integral_constant } template - static T lanczos_sum_expG_scaled(const T& z) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z) { // LCOV_EXCL_START BOOST_MATH_STATIC const T num[52] = { @@ -2567,7 +2571,7 @@ struct lanczos52MP : public std::integral_constant template - static T lanczos_sum_near_1(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[56] = { @@ -2638,7 +2642,7 @@ struct lanczos52MP : public std::integral_constant } template - static T lanczos_sum_near_2(const T& dz) + BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz) { // LCOV_EXCL_START BOOST_MATH_STATIC const T d[56] = { @@ -2709,10 +2713,10 @@ struct lanczos52MP : public std::integral_constant return result; } - static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; } + BOOST_MATH_GPU_ENABLED static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; } }; -inline double lanczos_g_near_1_and_2(const lanczos52MP&) +BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos52MP&) { return 38.73733398437500; } @@ -2721,24 +2725,24 @@ inline double lanczos_g_near_1_and_2(const lanczos52MP&) // // placeholder for no lanczos info available: // -struct undefined_lanczos : public std::integral_constant::max)() - 1> { }; +struct undefined_lanczos : public boost::math::integral_constant::max)() - 1> { }; template struct lanczos { - BOOST_MATH_STATIC constexpr auto target_precision = policies::precision::type::value <= 0 ? (std::numeric_limits::max)()-2 : + BOOST_MATH_STATIC constexpr auto target_precision = policies::precision::type::value <= 0 ? (boost::math::numeric_limits::max)()-2 : policies::precision::type::value; - using type = typename std::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, - typename std::conditional<(target_precision <= lanczos13m53::value), lanczos13m53, - typename std::conditional<(target_precision <= lanczos11::value), lanczos11, - typename std::conditional<(target_precision <= lanczos17m64::value), lanczos17m64, - typename std::conditional<(target_precision <= lanczos24m113::value), lanczos24m113, - typename std::conditional<(target_precision <= lanczos27MP::value), lanczos27MP, - typename std::conditional<(target_precision <= lanczos35MP::value), lanczos35MP, - typename std::conditional<(target_precision <= lanczos48MP::value), lanczos48MP, - typename std::conditional<(target_precision <= lanczos49MP::value), lanczos49MP, - typename std::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type + using type = typename boost::math::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, + typename boost::math::conditional<(target_precision <= lanczos13m53::value), lanczos13m53, + typename boost::math::conditional<(target_precision <= lanczos11::value), lanczos11, + typename boost::math::conditional<(target_precision <= lanczos17m64::value), lanczos17m64, + typename boost::math::conditional<(target_precision <= lanczos24m113::value), lanczos24m113, + typename boost::math::conditional<(target_precision <= lanczos27MP::value), lanczos27MP, + typename boost::math::conditional<(target_precision <= lanczos35MP::value), lanczos35MP, + typename boost::math::conditional<(target_precision <= lanczos48MP::value), lanczos48MP, + typename boost::math::conditional<(target_precision <= lanczos49MP::value), lanczos49MP, + typename boost::math::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type >::type>::type>::type>::type>::type>::type>::type>::type >::type; }; diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp index e08945dca..6e372ec9a 100644 --- a/include/boost/math/special_functions/modf.hpp +++ b/include/boost/math/special_functions/modf.hpp @@ -11,9 +11,13 @@ #pragma once #endif -#include #include #include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif namespace boost{ namespace math{ diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp index a056f6daf..7a1bb14eb 100644 --- a/include/boost/math/special_functions/pow.hpp +++ b/include/boost/math/special_functions/pow.hpp @@ -14,11 +14,13 @@ #define BOOST_MATH_POW_HPP #include -#include #include #include #include +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif namespace boost { namespace math { @@ -35,7 +37,7 @@ template struct positive_power { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base) { T power = positive_power::result(base); return power * power; @@ -46,7 +48,7 @@ template struct positive_power { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base) { T power = positive_power::result(base); return base * power * power; @@ -57,7 +59,7 @@ template <> struct positive_power<1, 1> { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; } + BOOST_MATH_GPU_ENABLED static constexpr T result(T base){ return base; } }; @@ -65,7 +67,7 @@ template struct power_if_positive { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy&) { return positive_power::result(base); } }; @@ -73,7 +75,7 @@ template struct power_if_positive { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy) { if (base == 0) { @@ -92,7 +94,7 @@ template <> struct power_if_positive<0, true> { template - BOOST_MATH_GPU_ENABLED static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy) + BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy) { if (base == 0) { @@ -121,14 +123,14 @@ struct select_power_if_positive template -BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base, const Policy& policy) +BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args::type pow(T base, const Policy& policy) { using result_type = typename tools::promote_args::type; return detail::select_power_if_positive::type::result(static_cast(base), policy); } template -BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args::type pow(T base) +BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args::type pow(T base) { return pow(base, policies::policy<>()); } #ifdef _MSC_VER diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp index 2ec6c5256..61a60b502 100644 --- a/include/boost/math/special_functions/trigamma.hpp +++ b/include/boost/math/special_functions/trigamma.hpp @@ -11,16 +11,22 @@ #pragma once #endif -#include #include -#include #include -#include #include +#include +#include +#include #include #include -#include +#include +#include + +#ifndef BOOST_MATH_HAS_NVRTC +#include #include +#include +#endif #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128) // @@ -36,17 +42,20 @@ namespace boost{ namespace math{ namespace detail{ +// TODO(mborland): Temporary for NVRTC +#ifndef BOOST_MATH_HAS_NVRTC template T polygamma_imp(const int n, T x, const Policy &pol); template -T trigamma_prec(T x, const Policy& pol, const std::integral_constant&) +T trigamma_prec(T x, const Policy& pol, const boost::math::integral_constant&) { return polygamma_imp(1, x, pol); } +#endif template -BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 3.736e-017 BOOST_MATH_STATIC const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469); @@ -119,7 +128,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_c } template -BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 1.178e-020 BOOST_MATH_STATIC const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875); @@ -197,7 +206,7 @@ BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const std::integral_c } template -T trigamma_prec(T x, const Policy&, const std::integral_constant&) +BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant&) { // Max error in interpolated form: 1.916e-035 @@ -416,13 +425,13 @@ struct trigamma_initializer BOOST_MATH_GPU_ENABLED init() { typedef typename policies::precision::type precision_type; - do_init(std::integral_constant()); + do_init(boost::math::integral_constant()); } - BOOST_MATH_GPU_ENABLED void do_init(const std::true_type&) + BOOST_MATH_GPU_ENABLED void do_init(const boost::math::true_type&) { boost::math::trigamma(T(2.5), Policy()); } - BOOST_MATH_GPU_ENABLED void do_init(const std::false_type&){} + BOOST_MATH_GPU_ENABLED void do_init(const boost::math::false_type&){} BOOST_MATH_GPU_ENABLED void force_instantiate()const{} }; static const init initializer; @@ -446,7 +455,7 @@ BOOST_MATH_GPU_ENABLED inline typename tools::promote_args::type typedef typename tools::promote_args::type result_type; typedef typename policies::evaluation::type value_type; typedef typename policies::precision::type precision_type; - typedef std::integral_constant +#include #include +#ifdef BOOST_MATH_ENABLE_CUDA +#include +#endif + +#ifndef BOOST_MATH_HAS_NVRTC +#include +#endif + namespace boost { namespace math { namespace tools { @@ -24,12 +32,21 @@ namespace boost { static constexpr bool value = false; }; + #ifndef BOOST_MATH_ENABLE_CUDA template struct is_complex_type_impl().real()), decltype(std::declval().imag())>> { static constexpr bool value = true; }; + #else + template + struct is_complex_type_impl().real()), + decltype(cuda::std::declval().imag())>> + { + static constexpr bool value = true; + }; + #endif } // Namespace detail template diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 364aa8f54..2736d660f 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -675,6 +675,7 @@ namespace boost{ namespace math{ #include #include #include +#include # define BOOST_MATH_CUDA_ENABLED __host__ __device__ # define BOOST_MATH_HAS_GPU_SUPPORT @@ -775,11 +776,6 @@ BOOST_MATH_GPU_ENABLED constexpr T cuda_safe_max(const T& a, const T& b) { retur #define BOOST_MATH_FP_SUBNORMAL FP_SUBNORMAL #define BOOST_MATH_FP_NORMAL FP_NORMAL -// Missing type from NVRTC -#include -#define BOOST_MATH_SIZE_T std::size_t -#define BOOST_MATH_UINTMAX_T std::uintmax_t - #else // Special section for CUDA NVRTC to ensure we consume no STL headers #ifndef BOOST_MATH_STANDALONE @@ -824,8 +820,8 @@ BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; #define BOOST_MATH_FP_SUBNORMAL 3 #define BOOST_MATH_FP_NORMAL 4 -#define BOOST_MATH_SIZE_T unsigned long -#define BOOST_MATH_UINTMAX_T unsigned long +#define BOOST_MATH_INT_VALUE_SUFFIX(RV, SUF) RV##SUF +#define BOOST_MATH_INT_TABLE_TYPE(RT, IT) IT #if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L # define BOOST_MATH_INLINE_CONSTEXPR inline constexpr diff --git a/include/boost/math/tools/cstdint.hpp b/include/boost/math/tools/cstdint.hpp new file mode 100644 index 000000000..ce2c913b5 --- /dev/null +++ b/include/boost/math/tools/cstdint.hpp @@ -0,0 +1,107 @@ +// Copyright (c) 2024 Matt Borland +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_CSTDINT +#define BOOST_MATH_TOOLS_CSTDINT + +#include + + +#ifdef BOOST_MATH_ENABLE_CUDA + +#include + +namespace boost { +namespace math { + +using cuda::std::int8_t; +using cuda::std::int16_t; +using cuda::std::int32_t; +using cuda::std::int64_t; + +using cuda::std::int_fast8_t; +using cuda::std::int_fast16_t; +using cuda::std::int_fast32_t; +using cuda::std::int_fast64_t; + +using cuda::std::int_least8_t; +using cuda::std::int_least16_t; +using cuda::std::int_least32_t; +using cuda::std::int_least64_t; + +using cuda::std::intmax_t; +using cuda::std::intptr_t; + +using cuda::std::uint8_t; +using cuda::std::uint16_t; +using cuda::std::uint32_t; +using cuda::std::uint64_t; + +using cuda::std::uint_fast8_t; +using cuda::std::uint_fast16_t; +using cuda::std::uint_fast32_t; +using cuda::std::uint_fast64_t; + +using cuda::std::uint_least8_t; +using cuda::std::uint_least16_t; +using cuda::std::uint_least32_t; +using cuda::std::uint_least64_t; + +using cuda::std::uintmax_t; +using cuda::std::uintptr_t; + +using size_t = unsigned long; + +#else + +#include + +namespace boost { +namespace math { + +using std::int8_t; +using std::int16_t; +using std::int32_t; +using std::int64_t; + +using std::int_fast8_t; +using std::int_fast16_t; +using std::int_fast32_t; +using std::int_fast64_t; + +using std::int_least8_t; +using std::int_least16_t; +using std::int_least32_t; +using std::int_least64_t; + +using std::intmax_t; +using std::intptr_t; + +using std::uint8_t; +using std::uint16_t; +using std::uint32_t; +using std::uint64_t; + +using std::uint_fast8_t; +using std::uint_fast16_t; +using std::uint_fast32_t; +using std::uint_fast64_t; + +using std::uint_least8_t; +using std::uint_least16_t; +using std::uint_least32_t; +using std::uint_least64_t; + +using std::uintmax_t; +using std::uintptr_t; + +using std::size_t; + +#endif + +} // namespace math +} // namespace boost + +#endif // BOOST_MATH_TOOLS_CSTDINT diff --git a/include/boost/math/tools/fraction.hpp b/include/boost/math/tools/fraction.hpp index 1970e5ca3..e5a31edf6 100644 --- a/include/boost/math/tools/fraction.hpp +++ b/include/boost/math/tools/fraction.hpp @@ -12,9 +12,12 @@ #endif #include +#include +#include #include #include #include +#include #include #include #include @@ -25,10 +28,10 @@ namespace detail { template - struct is_pair : public std::false_type{}; + struct is_pair : public boost::math::false_type{}; template - struct is_pair> : public std::true_type{}; + struct is_pair> : public boost::math::true_type{}; template struct fraction_traits_simple @@ -110,7 +113,7 @@ namespace detail { // template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b_impl(Gen& g, const U& factor, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL // SYCL can not handle this condition so we only check float on that platform @@ -140,7 +143,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type C = f; D = 0; - std::uintmax_t counter(max_terms); + boost::math::uintmax_t counter(max_terms); do{ v = g(); D = traits::b(v) + traits::a(v) * D; @@ -162,7 +165,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type } // namespace detail template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL && noexcept(std::declval()()) @@ -180,7 +183,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type #endif ) { - std::uintmax_t max_terms = (std::numeric_limits::max)(); + boost::math::uintmax_t max_terms = (std::numeric_limits::max)(); return detail::continued_fraction_b_impl(g, factor, max_terms); } @@ -198,12 +201,12 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type using result_type = typename traits::result_type; result_type factor = ldexp(1.0f, 1 - bits); // 1 / pow(result_type(2), bits); - std::uintmax_t max_terms = (std::numeric_limits::max)(); + boost::math::uintmax_t max_terms = (std::numeric_limits::max)(); return detail::continued_fraction_b_impl(g, factor, max_terms); } template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_b(Gen& g, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL && noexcept(std::declval()()) @@ -236,7 +239,7 @@ namespace detail { // Note that the first a1 and b1 returned by generator Gen are both used. // template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a_impl(Gen& g, const U& factor, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL && noexcept(std::declval()()) @@ -266,7 +269,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type C = f; D = 0; - std::uintmax_t counter(max_terms); + boost::math::uintmax_t counter(max_terms); do{ v = g(); @@ -289,7 +292,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type } // namespace detail template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL && noexcept(std::declval()()) @@ -307,7 +310,7 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type #endif ) { - std::uintmax_t max_iter = (std::numeric_limits::max)(); + boost::math::uintmax_t max_iter = (std::numeric_limits::max)(); return detail::continued_fraction_a_impl(g, factor, max_iter); } @@ -325,13 +328,13 @@ BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type typedef typename traits::result_type result_type; result_type factor = ldexp(1.0f, 1-bits); // 1 / pow(result_type(2), bits); - std::uintmax_t max_iter = (std::numeric_limits::max)(); + boost::math::uintmax_t max_iter = (std::numeric_limits::max)(); return detail::continued_fraction_a_impl(g, factor, max_iter); } template -BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, std::uintmax_t& max_terms) +BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits::result_type continued_fraction_a(Gen& g, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits::result_type) #ifndef BOOST_MATH_ENABLE_SYCL && noexcept(std::declval()()) diff --git a/include/boost/math/tools/is_detected.hpp b/include/boost/math/tools/is_detected.hpp index 8dfe86b74..93fa96f60 100644 --- a/include/boost/math/tools/is_detected.hpp +++ b/include/boost/math/tools/is_detected.hpp @@ -8,7 +8,7 @@ #ifndef BOOST_MATH_TOOLS_IS_DETECTED_HPP #define BOOST_MATH_TOOLS_IS_DETECTED_HPP -#include +#include namespace boost { namespace math { namespace tools { @@ -20,14 +20,14 @@ namespace detail { template class Op, typename... Args> struct detector { - using value_t = std::false_type; + using value_t = boost::math::false_type; using type = Default; }; template class Op, typename... Args> struct detector>, Op, Args...> { - using value_t = std::true_type; + using value_t = boost::math::true_type; using type = Op; }; diff --git a/include/boost/math/tools/mp.hpp b/include/boost/math/tools/mp.hpp index 66fcbdbaf..560ae8b50 100644 --- a/include/boost/math/tools/mp.hpp +++ b/include/boost/math/tools/mp.hpp @@ -13,6 +13,7 @@ #include #include +#include namespace boost { namespace math { namespace tools { namespace meta_programming { @@ -22,8 +23,8 @@ template struct mp_list {}; // Size_t -template -using mp_size_t = boost::math::integral_constant; +template +using mp_size_t = boost::math::integral_constant; // Boolean template @@ -52,7 +53,7 @@ struct mp_size_impl {}; template class L, typename... T> // Template template parameter must use class struct mp_size_impl> { - using type = boost::math::integral_constant; + using type = boost::math::integral_constant; }; } @@ -78,7 +79,7 @@ namespace detail { // At // TODO - Use tree based lookup for larger typelists // http://odinthenerd.blogspot.com/2017/04/tree-based-lookup-why-kvasirmpl-is.html -template +template struct mp_at_c {}; template class L, typename T0, typename... T> @@ -167,7 +168,7 @@ struct mp_at_c, 1 }; } -template +template using mp_at_c = typename detail::mp_at_c::type; template @@ -338,8 +339,8 @@ using mp_remove_if_q = mp_remove_if; template struct integer_sequence {}; -template -using index_sequence = integer_sequence; +template +using index_sequence = integer_sequence; namespace detail { @@ -411,11 +412,11 @@ struct make_integer_sequence_impl template using make_integer_sequence = typename detail::make_integer_sequence_impl::type; -template -using make_index_sequence = make_integer_sequence; +template +using make_index_sequence = make_integer_sequence; template -using index_sequence_for = make_integer_sequence; +using index_sequence_for = make_integer_sequence; }}}} // namespaces diff --git a/include/boost/math/tools/rational.hpp b/include/boost/math/tools/rational.hpp index e3c3e7e33..a535abcdc 100644 --- a/include/boost/math/tools/rational.hpp +++ b/include/boost/math/tools/rational.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #ifndef BOOST_MATH_HAS_NVRTC #include @@ -172,7 +173,7 @@ namespace boost{ namespace math{ namespace tools{ // Forward declaration to keep two phase lookup happy: // template -BOOST_MATH_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(U); +BOOST_MATH_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U); namespace detail{ @@ -190,7 +191,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp // the loop expanded versions above: // template -BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { BOOST_MATH_ASSERT(count > 0); U sum = static_cast(poly[count - 1]); @@ -205,7 +206,7 @@ BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, B // Compile time sized polynomials, just inline forwarders to the // implementations above: // -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V) { typedef boost::math::integral_constant tag_type; @@ -213,7 +214,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const } #ifndef BOOST_MATH_HAS_NVRTC -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const std::array& a, const V& val) BOOST_MATH_NOEXCEPT(V) { typedef boost::math::integral_constant tag_type; @@ -224,19 +225,19 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const // Even polynomials are trivial: just square the argument! // template -BOOST_MATH_GPU_ENABLED inline U evaluate_even_polynomial(const T* poly, U z, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_even_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { return evaluate_polynomial(poly, U(z*z), count); } -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); } #ifndef BOOST_MATH_HAS_NVRTC -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { return evaluate_polynomial(a, V(z*z)); @@ -246,12 +247,12 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial( // Odd polynomials come next: // template -BOOST_MATH_GPU_ENABLED inline U evaluate_odd_polynomial(const T* poly, U z, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(U) +BOOST_MATH_GPU_ENABLED inline U evaluate_odd_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U) { return poly[0] + z * evaluate_polynomial(poly+1, U(z*z), count-1); } -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { typedef boost::math::integral_constant tag_type; @@ -259,7 +260,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(c } #ifndef BOOST_MATH_HAS_NVRTC -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const std::array& a, const V& z) BOOST_MATH_NOEXCEPT(V) { typedef boost::math::integral_constant tag_type; @@ -268,7 +269,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(c #endif template -BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(V); +BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V); namespace detail{ @@ -288,7 +289,7 @@ BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(c // in our Lanczos code for example. // template -BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, BOOST_MATH_SIZE_T count) BOOST_MATH_NOEXCEPT(V) +BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V) { V z(z_); V s1, s2; @@ -320,14 +321,14 @@ BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V return s1 / s2; } -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V) { return detail::evaluate_rational_c_imp(a, b, z, static_cast*>(nullptr)); } #ifndef BOOST_MATH_HAS_NVRTC -template +template BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const std::array& a, const std::array& b, const V& z) BOOST_MATH_NOEXCEPT(V) { return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast*>(nullptr)); diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp index 97e67fae9..8f36aa22d 100644 --- a/include/boost/math/tools/roots.hpp +++ b/include/boost/math/tools/roots.hpp @@ -9,6 +9,11 @@ #ifdef _MSC_VER #pragma once #endif + +#include + +#ifndef BOOST_MATH_HAS_NVRTC // Disabled for now + #include // test for multiprecision types in complex Newton #include @@ -16,7 +21,6 @@ #include #include -#include #include #include @@ -1025,4 +1029,6 @@ inline std::pair::type, typename tools: } // namespace math } // namespace boost +#endif // BOOST_MATH_HAS_NVRTC + #endif // BOOST_MATH_TOOLS_NEWTON_SOLVER_HPP diff --git a/include/boost/math/tools/type_traits.hpp b/include/boost/math/tools/type_traits.hpp index a37e8e7eb..a13332797 100644 --- a/include/boost/math/tools/type_traits.hpp +++ b/include/boost/math/tools/type_traits.hpp @@ -12,13 +12,13 @@ #include -namespace boost { -namespace math { - #ifdef BOOST_MATH_ENABLE_CUDA #include +namespace boost { +namespace math { + // Helper classes using cuda::std::integral_constant; using cuda::std::true_type; @@ -163,6 +163,9 @@ using cuda::std::underlying_type_t; #include +namespace boost { +namespace math { + // Helper classes using std::integral_constant; using std::true_type; diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile index 8d33eabb5..7e57f93ce 100644 --- a/test/nvrtc_jamfile +++ b/test/nvrtc_jamfile @@ -88,6 +88,8 @@ run test_saspoint5_quan_nvrtc_double.cpp ; run test_saspoint5_quan_nvrtc_float.cpp ; # Special Functions +run test_beta_nvrtc_double.cpp ; +run test_beta_nvrtc_float.cpp ; run test_cbrt_nvrtc_double.cpp ; run test_cbrt_nvrtc_float.cpp ; run test_cos_pi_nvrtc_double.cpp ; @@ -110,10 +112,14 @@ run test_gamma_nvrtc_double.cpp ; run test_gamma_nvrtc_float.cpp ; run test_log1p_nvrtc_double.cpp ; run test_log1p_nvrtc_float.cpp ; +run test_modf_nvrtc_double.cpp ; +run test_modf_nvrtc_float.cpp ; run test_round_nvrtc_double.cpp ; run test_round_nvrtc_float.cpp ; run test_sign_nvrtc_double.cpp ; run test_sign_nvrtc_float.cpp ; run test_sin_pi_nvrtc_double.cpp ; run test_sin_pi_nvrtc_float.cpp ; +run test_trigamma_nvrtc_double.cpp ; +run test_trigamma_nvrtc_float.cpp ; run test_trunc_nvrtc_double.cpp ; diff --git a/test/test_beta_nvrtc_double.cpp b/test/test_beta_nvrtc_double.cpp new file mode 100644 index 000000000..fdc502a19 --- /dev/null +++ b/test/test_beta_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::beta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_beta_nvrtc_float.cpp b/test/test_beta_nvrtc_float.cpp new file mode 100644 index 000000000..d403d3315 --- /dev/null +++ b/test/test_beta_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::beta(in1[i], in2[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_beta_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::beta(h_in1[i], h_in2[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_modf_nvrtc_double.cpp b/test/test_modf_nvrtc_double.cpp new file mode 100644 index 000000000..f172dd52c --- /dev/null +++ b/test/test_modf_nvrtc_double.cpp @@ -0,0 +1,200 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in1[i], &fract) + + boost::math::modf(in1[i], &i_part) + + boost::math::modf(in1[i], &l_part) + + boost::math::modf(in1[i], &ll_part); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_modf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + float_type fract; + const auto res = 4 * boost::math::modf(h_in1[i], &fract); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_modf_nvrtc_float.cpp b/test/test_modf_nvrtc_float.cpp new file mode 100644 index 000000000..1dcd3c081 --- /dev/null +++ b/test/test_modf_nvrtc_float.cpp @@ -0,0 +1,200 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + float_type fract; + int i_part; + long l_part; + long long ll_part; + + if (i < numElements) + { + out[i] = boost::math::modf(in1[i], &fract) + + boost::math::modf(in1[i], &i_part) + + boost::math::modf(in1[i], &l_part) + + boost::math::modf(in1[i], &ll_part); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_modf_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + float_type fract; + const auto res = 4 * boost::math::modf(h_in1[i], &fract); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trigamma_nvrtc_double.cpp b/test/test_trigamma_nvrtc_double.cpp new file mode 100644 index 000000000..46877acce --- /dev/null +++ b/test/test_trigamma_nvrtc_double.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef double float_type; + +const char* cuda_kernel = R"( +typedef double float_type; +#include +#include +extern "C" __global__ +void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trigamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trigamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trigamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/test/test_trigamma_nvrtc_float.cpp b/test/test_trigamma_nvrtc_float.cpp new file mode 100644 index 000000000..083c7d876 --- /dev/null +++ b/test/test_trigamma_nvrtc_float.cpp @@ -0,0 +1,190 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error +#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false + +// Must be included first +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +typedef float float_type; + +const char* cuda_kernel = R"( +typedef float float_type; +#include +#include +extern "C" __global__ +void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < numElements) + { + out[i] = boost::math::trigamma(in1[i]); + } +} +)"; + +void checkCUDAError(cudaError_t result, const char* msg) +{ + if (result != cudaSuccess) + { + std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkCUError(CUresult result, const char* msg) +{ + if (result != CUDA_SUCCESS) + { + const char* errorStr; + cuGetErrorString(result, &errorStr); + std::cerr << msg << ": " << errorStr << std::endl; + exit(EXIT_FAILURE); + } +} + +void checkNVRTCError(nvrtcResult result, const char* msg) +{ + if (result != NVRTC_SUCCESS) + { + std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl; + exit(EXIT_FAILURE); + } +} + +int main() +{ + try + { + // Initialize CUDA driver API + checkCUError(cuInit(0), "Failed to initialize CUDA"); + + // Create CUDA context + CUcontext context; + CUdevice device; + checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device"); + checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context"); + + nvrtcProgram prog; + nvrtcResult res; + + res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr); + checkNVRTCError(res, "Failed to create NVRTC program"); + + nvrtcAddNameExpression(prog, "test_trigamma_kernel"); + + #ifdef BOOST_MATH_NVRTC_CI_RUN + const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #else + const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"}; + #endif + + // Compile the program + res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts); + if (res != NVRTC_SUCCESS) + { + size_t log_size; + nvrtcGetProgramLogSize(prog, &log_size); + char* log = new char[log_size]; + nvrtcGetProgramLog(prog, log); + std::cerr << "Compilation failed:\n" << log << std::endl; + delete[] log; + exit(EXIT_FAILURE); + } + + // Get PTX from the program + size_t ptx_size; + nvrtcGetPTXSize(prog, &ptx_size); + char* ptx = new char[ptx_size]; + nvrtcGetPTX(prog, ptx); + + // Load PTX into CUDA module + CUmodule module; + CUfunction kernel; + checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module"); + checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function"); + + int numElements = 5000; + float_type *h_in1, *h_in2, *h_out; + float_type *d_in1, *d_in2, *d_out; + + // Allocate memory on the host + h_in1 = new float_type[numElements]; + h_in2 = new float_type[numElements]; + h_out = new float_type[numElements]; + + // Initialize input arrays + std::mt19937_64 rng(42); + std::uniform_real_distribution dist(0.0f, 1000.0f); + for (int i = 0; i < numElements; ++i) + { + h_in1[i] = static_cast(dist(rng)); + h_in2[i] = static_cast(dist(rng)); + } + + checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1"); + checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2"); + checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out"); + + checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1"); + checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2"); + + int blockSize = 256; + int numBlocks = (numElements + blockSize - 1) / blockSize; + void* args[] = { &d_in1, &d_in2, &d_out, &numElements }; + checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed"); + + checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out"); + + // Verify Result + for (int i = 0; i < numElements; ++i) + { + const auto res = boost::math::trigamma(h_in1[i]); + + if (std::isfinite(res)) + { + if (boost::math::epsilon_difference(res, h_out[i]) > 300) + { + std::cout << "error at line: " << i + << "\nParallel: " << h_out[i] + << "\n Serial: " << res + << "\n Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl; + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); + delete[] h_in1; + delete[] h_in2; + delete[] h_out; + + nvrtcDestroyProgram(&prog); + delete[] ptx; + + cuCtxDestroy(context); + + std::cout << "Kernel executed successfully." << std::endl; + return 0; + } + catch(const std::exception& e) + { + std::cerr << "Stopped with exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } +}