From b76153346e436cf423d282764ace9ed0eda07b92 Mon Sep 17 00:00:00 2001 From: Cong Ma Date: Wed, 15 Nov 2023 22:31:58 +0000 Subject: [PATCH] Add type related files new file: include/native_types.hpp new file: include/native_types_impl.hpp new file: include/type_traits.hpp new file: include/types_ext.hpp new file: include/xfloat32.hpp --- .../hiptensor/internal/hiptensor_utility.hpp | 5 - library/src/CMakeLists.txt | 2 +- .../contraction/contraction_meta_traits.hpp | 2 +- .../contraction_solution_params.hpp | 2 +- .../contraction_solution_params_impl.hpp | 2 +- .../contraction_solution_registry.hpp | 2 +- library/src/{types.cpp => data_types.cpp} | 2 +- library/src/hiptensor.cpp | 2 +- library/src/include/config.hpp | 184 +++++ library/src/include/data_types.hpp | 78 ++ .../{types_impl.hpp => data_types_impl.hpp} | 8 +- library/src/include/native_types.hpp | 119 +++ library/src/include/native_types_impl.hpp | 36 + library/src/include/type_traits.hpp | 749 ++++++++++++++++++ library/src/include/types.hpp | 62 +- library/src/include/types_ext.hpp | 195 +++++ library/src/include/xfloat32.hpp | 349 ++++++++ .../src/permutation/permutation_ck_impl.hpp | 2 +- .../permutation_cpu_reference_impl.hpp | 2 +- test/01_contraction/contraction_test.cpp | 2 +- test/02_permutation/permutation_resource.cpp | 2 +- test/02_permutation/permutation_test.cpp | 2 +- test/utils.hpp | 16 +- 23 files changed, 1734 insertions(+), 91 deletions(-) rename library/src/{types.cpp => data_types.cpp} (99%) create mode 100644 library/src/include/config.hpp create mode 100644 library/src/include/data_types.hpp rename library/src/include/{types_impl.hpp => data_types_impl.hpp} (97%) create mode 100644 library/src/include/native_types.hpp create mode 100644 library/src/include/native_types_impl.hpp create mode 100644 library/src/include/type_traits.hpp create mode 100644 library/src/include/types_ext.hpp create mode 100644 library/src/include/xfloat32.hpp diff --git a/library/include/hiptensor/internal/hiptensor_utility.hpp b/library/include/hiptensor/internal/hiptensor_utility.hpp index 5fd9e331..f2df2dd2 100644 --- a/library/include/hiptensor/internal/hiptensor_utility.hpp +++ b/library/include/hiptensor/internal/hiptensor_utility.hpp @@ -131,11 +131,6 @@ namespace std return os; } - static ostream& operator<<(ostream& os, const _Float16 value) - { - os << static_cast(value); - return os; - } } #endif // HIPTENSOR_UTILITY_INTERNAL_HPP diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt index 801d0761..286ed2e3 100644 --- a/library/src/CMakeLists.txt +++ b/library/src/CMakeLists.txt @@ -62,7 +62,7 @@ set(HIPTENSOR_CORE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hiptensor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/types.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/data_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/handle.cpp ) diff --git a/library/src/contraction/contraction_meta_traits.hpp b/library/src/contraction/contraction_meta_traits.hpp index e018661b..4fa7acf7 100644 --- a/library/src/contraction/contraction_meta_traits.hpp +++ b/library/src/contraction/contraction_meta_traits.hpp @@ -34,8 +34,8 @@ #include // hiptensor includes +#include "data_types.hpp" #include "meta_traits.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/contraction/contraction_solution_params.hpp b/library/src/contraction/contraction_solution_params.hpp index f55d3c85..ec9de45c 100644 --- a/library/src/contraction/contraction_solution_params.hpp +++ b/library/src/contraction/contraction_solution_params.hpp @@ -30,7 +30,7 @@ #include #include "contraction_types.hpp" -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { diff --git a/library/src/contraction/contraction_solution_params_impl.hpp b/library/src/contraction/contraction_solution_params_impl.hpp index 194fb0d5..bff33960 100644 --- a/library/src/contraction/contraction_solution_params_impl.hpp +++ b/library/src/contraction/contraction_solution_params_impl.hpp @@ -29,8 +29,8 @@ #include "contraction_meta_traits.hpp" #include "contraction_solution_params.hpp" +#include "data_types.hpp" #include "hash.hpp" -#include "types.hpp" namespace std { diff --git a/library/src/contraction/contraction_solution_registry.hpp b/library/src/contraction/contraction_solution_registry.hpp index 039e9e14..d1b80ec5 100644 --- a/library/src/contraction/contraction_solution_registry.hpp +++ b/library/src/contraction/contraction_solution_registry.hpp @@ -32,8 +32,8 @@ #include #include "contraction_types.hpp" +#include "data_types.hpp" #include "singleton.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/types.cpp b/library/src/data_types.cpp similarity index 99% rename from library/src/types.cpp rename to library/src/data_types.cpp index 9cfb3290..b270973d 100644 --- a/library/src/types.cpp +++ b/library/src/data_types.cpp @@ -24,7 +24,7 @@ * *******************************************************************************/ -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { diff --git a/library/src/hiptensor.cpp b/library/src/hiptensor.cpp index 3016ef64..9740d2a8 100644 --- a/library/src/hiptensor.cpp +++ b/library/src/hiptensor.cpp @@ -27,9 +27,9 @@ #include +#include "data_types.hpp" #include "handle.hpp" #include "logger.hpp" -#include "types.hpp" #include "util.hpp" hiptensorStatus_t hiptensorCreate(hiptensorHandle_t** handle) diff --git a/library/src/include/config.hpp b/library/src/include/config.hpp new file mode 100644 index 00000000..91e0bf53 --- /dev/null +++ b/library/src/include/config.hpp @@ -0,0 +1,184 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_CONFIG_HPP +#define HIPTENSOR_CONFIG_HPP + +namespace hiptensor +{ + +/// +/// Architecture support +/// Guaranteed symbols: +/// HIPTENSOR_ARCH_GFX908 +/// HIPTENSOR_ARCH_GFX90a +/// HIPTENSOR_ARCH_GFX940 +/// HIPTENSOR_ARCH_GFX941 +/// HIPTENSOR_ARCH_GFX942 +/// HIPTENSOR_ARCH_GFX1100 +/// HIPTENSOR_ARCH_GFX1101 +/// HIPTENSOR_ARCH_GFX1102 +#if defined(__gfx908__) +#define HIPTENSOR_ARCH_GFX908 __gfx908__ +#elif defined(__gfx90a__) +#define HIPTENSOR_ARCH_GFX90A __gfx90a__ +#elif defined(__gfx940__) +#define HIPTENSOR_ARCH_GFX940 __gfx940__ +#elif defined(__gfx941__) +#define HIPTENSOR_ARCH_GFX941 __gfx941__ +#elif defined(__gfx942__) +#define HIPTENSOR_ARCH_GFX942 __gfx942__ +#elif defined(__gfx1100__) +#define HIPTENSOR_ARCH_GFX1100 __gfx1100__ +#elif defined(__gfx1101__) +#define HIPTENSOR_ARCH_GFX1101 __gfx1101__ +#elif defined(__gfx1102__) +#define HIPTENSOR_ARCH_GFX1102 __gfx1102__ +#else +#define HIPTENSOR_ARCH_HOST 1 +#endif + +#if !defined(HIPTENSOR_ARCH_GFX908) +#define HIPTENSOR_ARCH_GFX908 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX90A) +#define HIPTENSOR_ARCH_GFX90A 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX940) +#define HIPTENSOR_ARCH_GFX940 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX941) +#define HIPTENSOR_ARCH_GFX941 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX942) +#define HIPTENSOR_ARCH_GFX942 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX1100) +#define HIPTENSOR_ARCH_GFX1100 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX1101) +#define HIPTENSOR_ARCH_GFX1101 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX1102) +#define HIPTENSOR_ARCH_GFX1102 0 +#endif +#if !defined(HIPTENSOR_ARCH_HOST) +#define HIPTENSOR_ARCH_HOST 0 +#endif + +/// +/// Architecture configuration +/// Guaranteed symbols: +/// HIPTENSOR_ARCH_GFX9 +/// HIPTENSOR_ARCH_GFX11 +/// HIPTENSOR_WAVE64_MODE +/// HIPTENSOR_WAVE32_MODE +/// HIPTENSOR_BLOCK_DIM_16_SUPPORTED +/// HIPTENSOR_BLOCK_DIM_32_SUPPORTED +/// +#if HIPTENSOR_ARCH_GFX908 || HIPTENSOR_ARCH_GFX90A || HIPTENSOR_ARCH_GFX940 \ + || HIPTENSOR_ARCH_GFX941 || HIPTENSOR_ARCH_GFX942 +#define HIPTENSOR_ARCH_GFX9 1 +#define HIPTENSOR_WAVE64_MODE 1 +#define HIPTENSOR_BLOCK_DIM_16_SUPPORTED 1 +#define HIPTENSOR_BLOCK_DIM_32_SUPPORTED 1 +#endif + +#if HIPTENSOR_ARCH_GFX1100 || HIPTENSOR_ARCH_GFX1101 || HIPTENSOR_ARCH_GFX1102 +#define HIPTENSOR_ARCH_GFX11 1 +#define HIPTENSOR_WAVE32_MODE 1 +#define HIPTENSOR_BLOCK_DIM_16_SUPPORTED 1 +#endif + +#if !defined(HIPTENSOR_ARCH_GFX9) +#define HIPTENSOR_ARCH_GFX9 0 +#endif +#if !defined(HIPTENSOR_ARCH_GFX11) +#define HIPTENSOR_ARCH_GFX11 0 +#endif +#if !defined(HIPTENSOR_WAVE64_MODE) +#define HIPTENSOR_WAVE64_MODE 0 +#endif +#if !defined(HIPTENSOR_WAVE32_MODE) +#define HIPTENSOR_WAVE32_MODE 0 +#endif +#if !defined(HIPTENSOR_BLOCK_DIM_16_SUPPORTED) +#define HIPTENSOR_BLOCK_DIM_16_SUPPORTED 0 +#endif +#if !defined(HIPTENSOR_BLOCK_DIM_32_SUPPORTED) +#define HIPTENSOR_BLOCK_DIM_32_SUPPORTED 0 +#endif + +#if defined(NDEBUG) +#define HIPTENSOR_UNSUPPORTED_IMPL(MSG) +#else +#define HIPTENSOR_UNSUPPORTED_IMPL(MSG) __attribute__((deprecated(MSG))) +#endif + +#if defined(HIP_NO_HALF) +#define HIPTENSOR_NO_HALF 1 +#else +#define HIPTENSOR_NO_HALF 0 +#endif // HIP_NO_HALF + +#if HIPTENSOR_NO_HALF || (!HIPTENSOR_NO_HALF && defined(__HIP_NO_HALF_CONVERSIONS__)) +#define HIPTENSOR_TESTS_NO_HALF 1 +#else +#define HIPTENSOR_TESTS_NO_HALF 0 +#endif // !HIPTENSOR_NO_HALF && defined(__HIP_NO_HALF_CONVERSIONS__) + +/// +/// Sanity checks +/// +#if HIPTENSOR_ARCH_GFX11 + static_assert((bool)(HIPTENSOR_WAVE32_MODE) && !(bool)(HIPTENSOR_WAVE64_MODE), + "hipTensor supports only wave32 for gfx11 arch"); + static_assert((bool)(HIPTENSOR_BLOCK_DIM_16_SUPPORTED) + && !(bool)(HIPTENSOR_BLOCK_DIM_32_SUPPORTED), + "hipTensor supports only block size of 16 for gfx11 arch"); +#endif + +#if HIPTENSOR_ARCH_GFX9 + static_assert(!(bool)(HIPTENSOR_WAVE32_MODE) && (bool)(HIPTENSOR_WAVE64_MODE), + "hipTensor supports only wave64 for gfx9 arch"); + static_assert((bool)(HIPTENSOR_BLOCK_DIM_16_SUPPORTED) + && (bool)(HIPTENSOR_BLOCK_DIM_32_SUPPORTED), + "hipTensor requires block size of 16 and 32 for gfx9 arch"); +#endif + +/// +/// Host and Device symbols +/// +#define HIPTENSOR_DEVICE __device__ + +#define HIPTENSOR_HOST __host__ + +#define HIPTENSOR_HOST_DEVICE HIPTENSOR_HOST HIPTENSOR_DEVICE + +#define HIPTENSOR_KERNEL __global__ + +} // namespace hiptensor + +#endif // HIPTENSOR_CONFIG_HPP diff --git a/library/src/include/data_types.hpp b/library/src/include/data_types.hpp new file mode 100644 index 00000000..42197650 --- /dev/null +++ b/library/src/include/data_types.hpp @@ -0,0 +1,78 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_LIBRARY_DATA_TYPES_HPP +#define HIPTENSOR_LIBRARY_DATA_TYPES_HPP + +// clang-format off +// Include order needs to be preserved +#include +#include +#include +#include + +#include + +// clang-format on + +namespace hiptensor +{ + // Used to map to empty tensors + struct NoneType; + + static constexpr hipDataType NONE_TYPE = (hipDataType)31; + + // Map type to runtime HipDataType + template + struct HipDataType; + + template + static constexpr auto HipDataType_v = HipDataType::value; + + // Get data size in bytes from id + uint32_t hipDataTypeSize(hipDataType id); + + // Convert hipDataType to hiptensorComputeType_t + hiptensorComputeType_t convertToComputeType(hipDataType hipType); + + // Read a single value from void pointer, casted to T + template + T readVal(void const* value, hipDataType id); + + template + T readVal(void const* value, hiptensorComputeType_t id); + +} // namespace hiptensor + +bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); +bool operator==(hiptensorComputeType_t computeType, hipDataType hipType); + +bool operator!=(hipDataType hipType, hiptensorComputeType_t computeType); +bool operator!=(hiptensorComputeType_t computeType, hipDataType hipType); + +#include "data_types_impl.hpp" + +#endif // HIPTENSOR_LIBRARY_DATA_TYPES_HPP diff --git a/library/src/include/types_impl.hpp b/library/src/include/data_types_impl.hpp similarity index 97% rename from library/src/include/types_impl.hpp rename to library/src/include/data_types_impl.hpp index 7a07d0b0..7df6d7d9 100644 --- a/library/src/include/types_impl.hpp +++ b/library/src/include/data_types_impl.hpp @@ -24,10 +24,10 @@ * *******************************************************************************/ -#ifndef HIPTENSOR_LIBRARY_TYPES_IMPL_HPP -#define HIPTENSOR_LIBRARY_TYPES_IMPL_HPP +#ifndef HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP +#define HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP -#include "types.hpp" +#include "data_types.hpp" namespace hiptensor { @@ -217,4 +217,4 @@ namespace hiptensor } // namespace hiptensor -#endif // HIPTENSOR_LIBRARY_TYPES_IMPL_HPP +#endif // HIPTENSOR_LIBRARY_DATA_TYPES_IMPL_HPP diff --git a/library/src/include/native_types.hpp b/library/src/include/native_types.hpp new file mode 100644 index 00000000..c8df80e4 --- /dev/null +++ b/library/src/include/native_types.hpp @@ -0,0 +1,119 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_NATIVE_TYPES_HPP +#define HIPTENSOR_NATIVE_TYPES_HPP + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#include +#include +#endif // !__HIPCC_RTC__ + +#include "xfloat32.hpp" + +namespace hiptensor +{ + + /** + * \defgroup DataTypes Data Type Metadata + * + * @brief Definition and metadata on supported data types of matrices. + * + * @{ + * + * Native Data Types: + * float64_t = f64 = double + * float = f32 + * _Float16 = f16 + * int8 + * uint8 + * int16 + * int32 + * uint32 + * + * + * Non-Native Data Types: + * h16 = __half + * bf16 = bfloat16 + * + */ + + // Native types + using float16_t = _Float16; + using float32_t = float; + using float64_t = double; + +#if !defined(__HIPCC_RTC__) + + using int8_t = ::int8_t; + using uint8_t = ::uint8_t; + using int16_t = ::int16_t; + using uint16_t = ::uint16_t; + using int32_t = ::int32_t; + using uint32_t = ::uint32_t; + using int64_t = ::int64_t; + using uint64_t = ::uint64_t; + using index_t = ::int32_t; + +#else + + using int8_t = __hip_internal::int8_t; + using uint8_t = __hip_internal::uint8_t; + using int16_t = __hip_internal::int16_t; + using uint16_t = __hip_internal::uint16_t; + using int32_t = __hip_internal::int32_t; + using uint32_t = __hip_internal::uint32_t; + using int64_t = __hip_internal::int64_t; + using uint64_t = __hip_internal::uint64_t; + using index_t = __hip_internal::int32_t; + +#endif // !defined(__HIPCC_RTC__) + + // Non-native types + using bfloat16_t = hip_bfloat16; + +#if !HIPTENSOR_NO_HALF + using hfloat16_t = __half; +#endif // !HIPTENSOR_NO_HALF + + using xfloat32_t = hiptensor_xfloat32; + + // clang-format off + + +} // namespace hiptensor + +// Add in some extensions to basic type support. +// Some of these are required for vector implementations. +// #include "type_traits.hpp" +// #include "types_ext.hpp" + +#include "native_types_impl.hpp" + +#endif // HIPTENSOR_NATIVE_TYPES_HPP diff --git a/library/src/include/native_types_impl.hpp b/library/src/include/native_types_impl.hpp new file mode 100644 index 00000000..1b29b459 --- /dev/null +++ b/library/src/include/native_types_impl.hpp @@ -0,0 +1,36 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_NATIVE_TYPES_IMPL_HPP +#define HIPTENSOR_NATIVE_TYPES_IMPL_HPP + +#include "native_types.hpp" + +namespace hiptensor +{ + +} // namespace hiptensor + +#endif // HIPTENSOR_NATIVE_TYPES_IMPL_HPP diff --git a/library/src/include/type_traits.hpp b/library/src/include/type_traits.hpp new file mode 100644 index 00000000..20d73a03 --- /dev/null +++ b/library/src/include/type_traits.hpp @@ -0,0 +1,749 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef HIPTENSOR_TYPE_TRAITS_HPP +#define HIPTENSOR_TYPE_TRAITS_HPP + +#if !defined(__HIPCC_RTC__) + +#include + +#else + +#define FLT_EPSILON __FLT_EPSILON__ +#define FLT_MAX __FLT_MAX__ +#define FLT_MIN __FLT_MIN__ +#define HUGE_VALF (__builtin_huge_valf()) + +#endif // !defined(__HIPCC_RTC__) + +#include "native_types.hpp" + +namespace hiptensor +{ + namespace detail + { + struct Fp16Bits + { + union + { + uint16_t i16; + float16_t f16; +#if !HIPTENSOR_NO_HALF + hfloat16_t h16; +#endif // !HIPTENSOR_NO_HALF + bfloat16_t b16; + }; + constexpr Fp16Bits(uint16_t initVal) + : i16(initVal) + { + } +#define TEST_TEST 1 + constexpr Fp16Bits(float16_t initVal) + : f16(initVal) + { + } +#if !HIPTENSOR_NO_HALF + constexpr Fp16Bits(hfloat16_t initVal) + : h16(initVal) + { + } +#endif + constexpr Fp16Bits(bfloat16_t initVal) + : b16(initVal) + { + } + }; + + struct Fp32Bits + { + union + { + uint32_t i32; + float32_t f32; + xfloat32_t xf32; + }; + constexpr Fp32Bits(uint32_t initVal) + : i32(initVal) + { + } + constexpr Fp32Bits(float32_t initVal) + : f32(initVal) + { + } + constexpr Fp32Bits(xfloat32_t initVal) + : xf32(initVal) + { + } + }; + + } // namespace detail +} // namespace hiptensor + +/////////////////////////////////////////////////////////// +///////////// std replacements for hipRTC /////////////// +/////////////////////////////////////////////////////////// +#if defined(__HIPCC_RTC__) +namespace std +{ + template + class numeric_limits + { + public: + HIPTENSOR_HOST_DEVICE static constexpr T min() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T lowest() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T max() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T epsilon() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T round_error() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T infinity() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T quiet_NaN() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T signaling_NaN() noexcept; + HIPTENSOR_HOST_DEVICE static constexpr T denorm_min() noexcept; + }; + + template + using enable_if_t = typename enable_if::type; + + template + struct conditional + { + }; + + template + struct conditional + { + using type = T; + }; + + template + struct conditional + { + using type = F; + }; + + template + using conditional_t = typename conditional::type; + + template + HIPTENSOR_HOST_DEVICE const T& max(const T& a, const T& b) + { + return (a < b) ? b : a; + } + + template + HIPTENSOR_HOST_DEVICE const T& min(const T& a, const T& b) + { + return (b < a) ? a : b; + } + + // Meta programming helper types. + + template + struct conditional; + + template + struct __or_; + + template <> + struct __or_<> : public false_type + { + }; + + template + struct __or_<_B1> : public _B1 + { + }; + + template + struct __or_<_B1, _B2> : public conditional<_B1::value, _B1, _B2>::type + { + }; + + template + struct __or_<_B1, _B2, _B3, _Bn...> + : public conditional<_B1::value, _B1, __or_<_B2, _B3, _Bn...>>::type + { + }; + + template + struct __and_; + + template <> + struct __and_<> : public true_type + { + }; + + template + struct __and_<_B1> : public _B1 + { + }; + + template + struct __and_<_B1, _B2> : public conditional<_B1::value, _B2, _B1>::type + { + }; + + template + struct __and_<_B1, _B2, _B3, _Bn...> + : public conditional<_B1::value, __and_<_B2, _B3, _Bn...>, _B1>::type + { + }; + + template + using __bool_constant = integral_constant; + + template + struct __not_ : public __bool_constant + { + }; + + // remove_reference + template + struct remove_reference + { + typedef T type; + }; + + template + struct remove_reference + { + typedef T type; + }; + + template + struct remove_reference + { + typedef T type; + }; + + // is_lvalue_reference + template + struct is_lvalue_reference : public false_type + { + }; + + template + struct is_lvalue_reference : public true_type + { + }; + + // is_rvalue_reference + template + struct is_rvalue_reference : public false_type + { + }; + + template + struct is_rvalue_reference : public true_type + { + }; + + // lvalue forwarding + template + constexpr T&& forward(typename remove_reference::type& __t) noexcept + { + return static_cast(__t); + } + + // rvalue forwarding + template + constexpr T&& forward(typename remove_reference::type&& __t) noexcept + { + static_assert(!is_lvalue_reference::value, + "template argument" + " substituting T is an lvalue reference type"); + return static_cast(__t); + } + + // remove_const + template + struct remove_const + { + typedef T type; + }; + + template + struct remove_const + { + typedef T type; + }; + + // remove_volatile + template + struct remove_volatile + { + typedef T type; + }; + + template + struct remove_volatile + { + typedef T type; + }; + + // remove_cv + template + struct remove_cv + { + typedef typename remove_const::type>::type type; + }; + + // remove_extent + template + struct remove_extent + { + typedef T type; + }; + + template + struct remove_extent + { + typedef T type; + }; + + template + struct remove_extent + { + typedef T type; + }; + + // is_void + template + struct __is_void_helper : public false_type + { + }; + + template <> + struct __is_void_helper : public true_type + { + }; + + template + struct is_void : public __is_void_helper::type>::type + { + }; + + // is_reference + template + struct is_reference : public __or_, is_rvalue_reference>::type + { + }; + + // is_function + template + struct is_function : public false_type + { + }; + + // is_object + template + struct is_object : public __not_<__or_, is_reference, is_void>>::type + { + }; + + // __is_referenceable + template + struct __is_referenceable : public __or_, is_reference>::type{}; + + // add_pointer + template , is_void>::value> + struct __add_pointer_helper + { + typedef T type; + }; + + template + struct __add_pointer_helper + { + typedef typename remove_reference::type* type; + }; + + template + struct add_pointer : public __add_pointer_helper + { + }; + + // is_array + template + struct is_array : public false_type + { + }; + + template + struct is_array : public true_type + { + }; + + template + struct is_array : public true_type + { + }; + + // decay selectors + template ::value, + bool _IsFunction = is_function<_Up>::value> + struct __decay_selector; + + template + struct __decay_selector<_Up, false, false> + { + typedef typename remove_cv<_Up>::type __type; + }; + + template + struct __decay_selector<_Up, true, false> + { + typedef typename remove_extent<_Up>::type* __type; + }; + + template + struct __decay_selector<_Up, false, true> + { + typedef typename add_pointer<_Up>::type __type; + }; + + // decay + template + class decay + { + typedef typename remove_reference::type __remove_type; + + public: + typedef typename __decay_selector<__remove_type>::__type type; + }; + + template + using decay_t = typename decay::type; + +} // namespace std +#endif + +namespace std +{ +#if defined(__HIPCC_RTC__) + using uint16_t = hiptensor::uint16_t; +#endif + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ////////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x1400)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7C00)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFBFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7BFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x0400)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FFF)); + return eps.f16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::float16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7DFF)); + return eps.f16; + } + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ///////////// + /////////////////////////////////////////////////////////// +#if !HIPTENSOR_NO_HALF + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x1400)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7C00)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFBFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7BFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x0400)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FFF)); + return eps.h16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::hfloat16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7DFF)); + return eps.h16; + } + +#endif // !HIPTENSOR_NO_HALF + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ///////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x3C00)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7F80)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0xFF7F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7F7F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x007F)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FC0)); + return eps.b16; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::bfloat16_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp16Bits eps(static_cast(0x7FC0)); + return eps.b16; + } + + /////////////////////////////////////////////////////////// + /////////// std::numeric_limits ////////////// + /////////////////////////////////////////////////////////// + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::epsilon() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_EPSILON)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::infinity() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(HUGE_VALF)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::lowest() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(-FLT_MAX)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::max() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_MAX)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::min() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(FLT_MIN)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::quiet_NaN() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(0x7FF80000)); + return eps.xf32; + } + + template <> + HIPTENSOR_HOST_DEVICE constexpr hiptensor::xfloat32_t + numeric_limits::signaling_NaN() noexcept + { + hiptensor::detail::Fp32Bits eps(static_cast(0x7FF00000)); + return eps.xf32; + } + // @endcond + +} // namespace std + +namespace hiptensor +{ +#if !defined(__HIPCC_RTC__) + template ::value, int> = 0> + constexpr auto maxExactInteger() -> decltype(std::numeric_limits::max()) + { + return std::numeric_limits::max(); + } + + template ::value + && std::numeric_limits::digits, + int> + = 0> + constexpr auto maxExactInteger() -> + typename std::conditional_t::value, int64_t, int32_t> + { + using RetT = + typename std::conditional_t::value, int64_t, int32_t>; + return ((RetT)1 << std::numeric_limits::digits); + } + + template ::value || +#endif // !HIPTENSOR_NO_HALF + std::is_same::value, + int> + = 0> + constexpr auto maxExactInteger() -> int32_t + { + // f16 mantissa is 10 bits + return ((int32_t)1 << 11); + } + + template ::value, int> = 0> + constexpr auto maxExactInteger() -> int32_t + { + // b16 mantissa is 7 bits + return ((int32_t)1 << 8); + } + + template ::value, int> = 0> + constexpr auto maxExactInteger() -> int32_t + { + // xf32 mantissa is 7 bits + return ((int32_t)1 << 8); + } +#endif // !defined(__HIPCC_RTC__) + +} // namespace hiptensor + +#endif // HIPTENSOR_TYPE_TRAITS_HPP diff --git a/library/src/include/types.hpp b/library/src/include/types.hpp index 8cd677f3..2cc7805c 100644 --- a/library/src/include/types.hpp +++ b/library/src/include/types.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -19,60 +19,10 @@ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. * *******************************************************************************/ - -#ifndef HIPTENSOR_LIBRARY_TYPES_HPP -#define HIPTENSOR_LIBRARY_TYPES_HPP - -// clang-format off -// Include order needs to be preserved -#include -#include -#include -#include - -#include - -// clang-format on - -namespace hiptensor -{ - // Used to map to empty tensors - struct NoneType; - - static constexpr hipDataType NONE_TYPE = (hipDataType)31; - - // Map type to runtime HipDataType - template - struct HipDataType; - - template - static constexpr auto HipDataType_v = HipDataType::value; - - // Get data size in bytes from id - uint32_t hipDataTypeSize(hipDataType id); - - // Convert hipDataType to hiptensorComputeType_t - hiptensorComputeType_t convertToComputeType(hipDataType hipType); - - // Read a single value from void pointer, casted to T - template - T readVal(void const* value, hipDataType id); - - template - T readVal(void const* value, hiptensorComputeType_t id); - -} // namespace hiptensor - -bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); -bool operator==(hiptensorComputeType_t computeType, hipDataType hipType); - -bool operator!=(hipDataType hipType, hiptensorComputeType_t computeType); -bool operator!=(hiptensorComputeType_t computeType, hipDataType hipType); - -#include "types_impl.hpp" - -#endif // HIPTENSOR_LIBRARY_TYPES_HPP +#include "native_types.hpp" +#include "type_traits.hpp" +#include "types_ext.hpp" diff --git a/library/src/include/types_ext.hpp b/library/src/include/types_ext.hpp new file mode 100644 index 00000000..e668c17d --- /dev/null +++ b/library/src/include/types_ext.hpp @@ -0,0 +1,195 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef HIPTENSOR_TYPES_EXT_HPP +#define HIPTENSOR_TYPES_EXT_HPP + +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#include +#else +#include "utils.hpp" +#endif // !defined(__HIPCC_RTC__) + +#include "type_traits.hpp" + +namespace hiptensor +{ + +#if !defined(__HIPCC_RTC__) + + //////////////////////////////////////////////////////////////////////// + /////////// hiptensor::hfloat16_t host and device conversions ////////// + //////////////////////////////////////////////////////////////////////// + template , int> = 0> + __host__ __device__ inline Outgoing convert(const Incoming& value) + { +#if !HIPTENSOR_NO_HALF + if constexpr(std::is_same_v) + { + +#if defined(__HIP_NO_HALF_CONVERSIONS__) + detail::Fp16Bits fp16(static_cast(value)); + return fp16.h16; +#else + return static_cast(value); +#endif // defined(__HIP_NO_HALF_CONVERSIONS__) + } + else if constexpr(std::is_same_v) + { + +#if defined(__HIP_NO_HALF_CONVERSIONS__) + detail::Fp16Bits fp16(value); + return static_cast(fp16.f16); +#else + return static_cast(value); +#endif // defined(__HIP_NO_HALF_CONVERSIONS__) + } + else +#endif // !HIPTENSOR_NO_HALF + { + return static_cast(value); + } + } + + template , int> = 0> + __host__ __device__ inline Outgoing const& convert(const Incoming& value) + { + return value; + } + + //////////////////////////////////////////////////////////////////// + /////////// hiptensor::hfloat16_t host & device operators ////////// + /////////////////////////////////////////////////////////////////// + +#if defined(__HIP_NO_HALF_OPERATORS__) +// No operators defined for host or device +#define HIPTENSOR_HALF_OP_ATTR HIPTENSOR_HOST_DEVICE +#else +// No operators defined just for host +#define HIPTENSOR_HALF_OP_ATTR HIPTENSOR_HOST +#endif // defined(__HIP_NO_HALF_OPERATORS__) + +#if !HIPTENSOR_NO_HALF + + HIPTENSOR_HALF_OP_ATTR inline bool operator==(const hfloat16_t& x, const hfloat16_t& y) + { + auto absDiff = std::fabs(__half2float(x) - __half2float(y)); + auto absAdd = std::fabs(__half2float(x) + __half2float(y)); + return absDiff <= __half2float(std::numeric_limits::epsilon()) * absAdd * 2.0f + || absDiff < __half2float(std::numeric_limits::min()); + } + + HIPTENSOR_HALF_OP_ATTR inline bool operator!=(const hfloat16_t& x, const hfloat16_t& y) + { + return !(x == y); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator-(const hfloat16_t& x) + { + detail::Fp16Bits fp16(x); + fp16.i16 ^= 0x8000; // Flip sign + return fp16.h16; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator+(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) + convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator-(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) - convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator*(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) * convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t operator/(const hfloat16_t& x, const hfloat16_t& y) + { + return convert(convert(x) / convert(y)); + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator+=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x + y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator-=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x - y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator*=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x * y; + } + + HIPTENSOR_HALF_OP_ATTR inline hfloat16_t& operator/=(hfloat16_t& x, const hfloat16_t& y) + { + return x = x / y; + } + +#endif // !HIPTENSOR_NO_HALF + +#endif // !defined(__HIPCC_RTC__) + +} // namespace hiptensor + +namespace std +{ +#if !defined(__HIPCC_RTC__) + /////////////////////////////////////////////////////////// + ////////// std::ostream::operator<<(float16_t) ////////// + /////////////////////////////////////////////////////////// + + inline ostream& operator<<(ostream& stream, hiptensor::float16_t const& val) + { + return stream << static_cast(val); + } + + /////////////////////////////////////////////////////////// + ////////// std::ostream::operator<<(hfloat16_t) ///////// + /////////////////////////////////////////////////////////// +#if !HIPTENSOR_NO_HALF + inline ostream& operator<<(ostream& stream, hiptensor::hfloat16_t const& val) + { + return stream << __half2float(val); + } +#endif // !HIPTENSOR_NO_HALF + +#endif // !defined(__HIPCC_RTC__) && !HIPTENSOR_NO_HALF + +} // namespace std + +#endif // HIPTENSOR_TYPES_EXT_HPP diff --git a/library/src/include/xfloat32.hpp b/library/src/include/xfloat32.hpp new file mode 100644 index 00000000..880d1bee --- /dev/null +++ b/library/src/include/xfloat32.hpp @@ -0,0 +1,349 @@ +/* ************************************************************************ + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +/*!\file + * \brief xfloat32.h provides struct for hiptensor_xfloat32 typedef + */ + +#ifndef HIPTENSOR_XFLOAT32_HPP +#define HIPTENSOR_XFLOAT32_HPP + +#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only +// include a minimal definition of hiptensor_xfloat32 + +#include +typedef struct +{ + float data; +} hiptensor_xfloat32; + +#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#if !defined(__HIPCC_RTC__) + +#include +#include +#include +#include +#include +#include + +#else + +namespace std +{ + using __hip_internal::is_standard_layout; + using __hip_internal::is_trivial; +} + +#endif // !defined(__HIPCC_RTC__) + +#include "config.hpp" + +struct hiptensor_xfloat32 +{ + float data; + + enum round_t + { + round_up + }; + + HIPTENSOR_HOST_DEVICE hiptensor_xfloat32() = default; + + // round upper 19 bits of IEEE float to convert to xfloat32 + explicit HIPTENSOR_HOST_DEVICE hiptensor_xfloat32(float f, round_t) + : data(float_to_xfloat32(f)) + { + } + + explicit HIPTENSOR_HOST_DEVICE hiptensor_xfloat32(float f) + : data(truncate_float_to_xfloat32(f)) + { + } + + // zero extend lower 13 bits of xfloat32 to convert to IEEE float + HIPTENSOR_HOST_DEVICE operator float() const + { + return data; + } + + explicit HIPTENSOR_HOST_DEVICE operator bool() const + { + union + { + float fp32; + uint32_t int32; + } u = {data}; + return u.int32 & 0x7fffe000; + } + + explicit HIPTENSOR_HOST_DEVICE operator uint32_t() const + { + return uint32_t(float(*this)); + } + + explicit HIPTENSOR_HOST_DEVICE operator long() const + { + return long(float(*this)); + } + + explicit HIPTENSOR_HOST_DEVICE operator double() const + { + return double(float(*this)); + } + +private: + static HIPTENSOR_HOST_DEVICE float float_to_xfloat32(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + if(~u.int32 & 0x7f800000) + { + // When the exponent bits are not all 1s, then the value is zero, normal, + // or subnormal. We round the xfloat32 mantissa up by adding 0xFFF, plus + // 1 if the least significant bit of the xfloat32 mantissa is 1 (odd). + // This causes the xfloat32's mantissa to be incremented by 1 if the 13 + // least significant bits of the float mantissa are greater than 0x1000, + // or if they are equal to 0x1000 and the least significant bit of the + // xfloat32 mantissa is 1 (odd). This causes it to be rounded to even when + // the lower 13 bits are exactly 0x1000. If the xfloat32 mantissa already + // has the value 0x3ff, then incrementing it causes it to become 0x00 and + // the exponent is incremented by one, which is the next higher FP value + // to the unrounded xfloat32 value. When the xfloat32 value is subnormal + // with an exponent of 0x00 and a mantissa of 0x3FF, it may be rounded up + // to a normal value with an exponent of 0x01 and a mantissa of 0x00. + // When the xfloat32 value has an exponent of 0xFE and a mantissa of 0x3FF, + // incrementing it causes it to become an exponent of 0xFF and a mantissa + // of 0x00, which is Inf, the next higher value to the unrounded value. + + u.int32 += 0xfff + ((u.int32 >> 13) & 1); // Round to nearest, round to even + } + else if(u.int32 & 0x1fff) + { + // When all of the exponent bits are 1, the value is Inf or NaN. + // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero + // mantissa bit. Quiet NaN is indicated by the most significant mantissa + // bit being 1. Signaling NaN is indicated by the most significant + // mantissa bit being 0 but some other bit(s) being 1. If any of the + // lower 13 bits of the mantissa are 1, we set the least significant bit + // of the xfloat32 mantissa, in order to preserve signaling NaN in case + // the xfloat32's mantissa bits are all 0. + u.int32 |= 0x2000; // Preserve signaling NaN + } + + u.int32 &= 0xffffe000; + return u.fp32; + } + + // Truncate instead of rounding + static HIPTENSOR_HOST_DEVICE float truncate_float_to_xfloat32(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + + u.int32 = u.int32 & 0xffffe000; + return u.fp32; + } +}; + +typedef struct +{ + float data; +} hiptensor_xfloat32_public; + +static_assert(std::is_standard_layout{}, + "hiptensor_xfloat32 is not a standard layout type, and thus is " + "incompatible with C."); + +static_assert(std::is_trivial{}, + "hiptensor_xfloat32 is not a trivial type, and thus is " + "incompatible with C."); + +#if !defined(__HIPCC_RTC__) +static_assert(sizeof(hiptensor_xfloat32) == sizeof(hiptensor_xfloat32_public) + && offsetof(hiptensor_xfloat32, data) + == offsetof(hiptensor_xfloat32_public, data), + "internal hiptensor_xfloat32 does not match public hiptensor_xfloat32"); +#endif // !defined(__HIPCC_RTC__) + +#if !defined(__HIPCC_RTC__) +inline std::ostream& operator<<(std::ostream& os, const hiptensor_xfloat32& xf32) +{ + return os << float(xf32); +} +#endif // !defined(__HIPCC_RTC__) +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator+(hiptensor_xfloat32 a) +{ + return a; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator-(hiptensor_xfloat32 a) +{ + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + u.int32 ^= 0x80000000; + return hiptensor_xfloat32(u.fp32); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator+(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) + float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator-(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) - float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator*(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) * float(b)); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator/(hiptensor_xfloat32 a, + hiptensor_xfloat32 b) +{ + return hiptensor_xfloat32(float(a) / float(b)); +} +inline HIPTENSOR_HOST_DEVICE bool operator<(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return float(a) < float(b); +} +inline HIPTENSOR_HOST_DEVICE bool operator==(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return float(a) == float(b); +} +inline HIPTENSOR_HOST_DEVICE bool operator>(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return b < a; +} +inline HIPTENSOR_HOST_DEVICE bool operator<=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a > b); +} +inline HIPTENSOR_HOST_DEVICE bool operator!=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a == b); +} +inline HIPTENSOR_HOST_DEVICE bool operator>=(hiptensor_xfloat32 a, hiptensor_xfloat32 b) +{ + return !(a < b); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator+=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a + b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator-=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a - b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator*=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a * b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator/=(hiptensor_xfloat32& a, + hiptensor_xfloat32 b) +{ + return a = a / b; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator++(hiptensor_xfloat32& a) +{ + return a += hiptensor_xfloat32(1.0f); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32& operator--(hiptensor_xfloat32& a) +{ + return a -= hiptensor_xfloat32(1.0f); +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator++(hiptensor_xfloat32& a, int) +{ + hiptensor_xfloat32 orig = a; + ++a; + return orig; +} +inline HIPTENSOR_HOST_DEVICE hiptensor_xfloat32 operator--(hiptensor_xfloat32& a, int) +{ + hiptensor_xfloat32 orig = a; + --a; + return orig; +} + +namespace std +{ + constexpr HIPTENSOR_HOST_DEVICE bool isinf(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return !(~u.int32 & 0x7f800000) && !(u.int32 & 0x7fe000); + } + constexpr HIPTENSOR_HOST_DEVICE bool isnan(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return !(~u.int32 & 0x7f800000) && +(u.int32 & 0x7fe000); + } + constexpr HIPTENSOR_HOST_DEVICE bool iszero(hiptensor_xfloat32 a) + { + union + { + float fp32; + uint32_t int32; + } u = {a.data}; + return (u.fp32 == 0.0f); + } + + HIPTENSOR_HOST_DEVICE inline hiptensor_xfloat32 sin(hiptensor_xfloat32 a) + { + return hiptensor_xfloat32(sinf(float(a))); + } + HIPTENSOR_HOST_DEVICE inline hiptensor_xfloat32 cos(hiptensor_xfloat32 a) + { + return hiptensor_xfloat32(cosf(float(a))); + } + + HIPTENSOR_HOST_DEVICE constexpr hiptensor_xfloat32 real(const hiptensor_xfloat32& a) + { + return a; + } +} + +#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#endif // HIPTENSOR_XFLOAT32_HPP diff --git a/library/src/permutation/permutation_ck_impl.hpp b/library/src/permutation/permutation_ck_impl.hpp index 7878e908..0c73ccff 100644 --- a/library/src/permutation/permutation_ck_impl.hpp +++ b/library/src/permutation/permutation_ck_impl.hpp @@ -32,8 +32,8 @@ #include #include +#include "data_types.hpp" #include "performance.hpp" -#include "types.hpp" namespace hiptensor { diff --git a/library/src/permutation/permutation_cpu_reference_impl.hpp b/library/src/permutation/permutation_cpu_reference_impl.hpp index d64147fe..c1d4a3af 100644 --- a/library/src/permutation/permutation_cpu_reference_impl.hpp +++ b/library/src/permutation/permutation_cpu_reference_impl.hpp @@ -29,8 +29,8 @@ #include #include +#include "data_types.hpp" #include "permutation_cpu_reference.hpp" -#include "types.hpp" #include "util.hpp" namespace hiptensor diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index 4ad97610..5d745d12 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -25,7 +25,7 @@ *******************************************************************************/ #include -#include "types.hpp" +#include "data_types.hpp" #include "llvm/hiptensor_options.hpp" #include "contraction/contraction_cpu_reference.hpp" diff --git a/test/02_permutation/permutation_resource.cpp b/test/02_permutation/permutation_resource.cpp index 4323e01d..1f448ff8 100644 --- a/test/02_permutation/permutation_resource.cpp +++ b/test/02_permutation/permutation_resource.cpp @@ -28,7 +28,7 @@ #define HIPTENSOR_PERMUTATION_RESOURCE_IMPL_HPP #include "permutation_resource.hpp" -#include "types.hpp" +#include "data_types.hpp" #include "utils.hpp" namespace hiptensor diff --git a/test/02_permutation/permutation_test.cpp b/test/02_permutation/permutation_test.cpp index dbb52d6e..cfadf5c0 100644 --- a/test/02_permutation/permutation_test.cpp +++ b/test/02_permutation/permutation_test.cpp @@ -25,10 +25,10 @@ *******************************************************************************/ #include +#include "data_types.hpp" #include "logger.hpp" #include "permutation/permutation_cpu_reference.hpp" #include "permutation_test.hpp" -#include "types.hpp" #include "utils.hpp" #include "llvm/hiptensor_options.hpp" diff --git a/test/utils.hpp b/test/utils.hpp index 67418568..1f7ece44 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -43,6 +43,7 @@ #include #include "device/common.hpp" +#include "types.hpp" #define HIPTENSOR_FREE_DEVICE(ptr) \ if(ptr != nullptr) \ @@ -209,19 +210,6 @@ std::pair compareEqual(DDataType const* deviceD, return std::make_pair(retval, max_relative_error); } -template -double getEpsilon() -{ - if(std::is_same_v) - { - return 0.0009765625; // numeric_limits<_Float16>::epsilon() => 0 - } - else - { - return std::numeric_limits::epsilon(); - } -}; - template std::pair compareEqualLaunchKernel(DDataType* deviceD, DDataType* hostD, @@ -288,7 +276,7 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, auto toDouble = [](DDataType const& val) { return static_cast(static_cast(val)); }; - auto eps = getEpsilon(); + auto eps = toDouble(std::numeric_limits::epsilon()); if(isNaN) { retval = false;