diff --git a/library/src/data_types.cpp b/library/src/data_types.cpp index 4c6e3a50..b17de958 100644 --- a/library/src/data_types.cpp +++ b/library/src/data_types.cpp @@ -254,6 +254,102 @@ namespace hiptensor return; } } + + std::string computeTypeToString(hiptensorComputeType_t computeType) + { + if(computeType == HIPTENSOR_COMPUTE_16BF) + { + return "HIPTENSOR_COMPUTE_16BF"; + } + else if(computeType == HIPTENSOR_COMPUTE_16F) + { + return "HIPTENSOR_COMPUTE_16F"; + } + else if(computeType == HIPTENSOR_COMPUTE_32F) + { + return "HIPTENSOR_COMPUTE_32F"; + } + else if(computeType == HIPTENSOR_COMPUTE_64F) + { + return "HIPTENSOR_COMPUTE_64F"; + } + else if(computeType == HIPTENSOR_COMPUTE_8I) + { + return "HIPTENSOR_COMPUTE_8I"; + } + else if(computeType == HIPTENSOR_COMPUTE_8U) + { + return "HIPTENSOR_COMPUTE_8U"; + } + else if(computeType == HIPTENSOR_COMPUTE_32I) + { + return "HIPTENSOR_COMPUTE_32I"; + } + else if(computeType == HIPTENSOR_COMPUTE_32U) + { + return "HIPTENSOR_COMPUTE_32U"; + } + else if(computeType == HIPTENSOR_COMPUTE_C32F) + { + return "HIPTENSOR_COMPUTE_C32F"; + } + else if(computeType == HIPTENSOR_COMPUTE_C64F) + { + return "HIPTENSOR_COMPUTE_C64F"; + } + else + { + return "HIPTENSOR_COMPUTE_NONE"; + } + } + + std::string hipTypeToString(hipDataType hipType) + { + if(hipType == HIP_R_16BF) + { + return "HIP_R_16BF"; + } + else if(hipType == HIP_R_16F) + { + return "HIP_R_16F"; + } + else if(hipType == HIP_R_32F) + { + return "HIP_R_32F"; + } + else if(hipType == HIP_R_64F) + { + return "HIP_R_64F"; + } + else if(hipType == HIP_R_8I) + { + return "HIP_R_8I"; + } + else if(hipType == HIP_R_8U) + { + return "HIP_R_8U"; + } + else if(hipType == HIP_R_32I) + { + return "HIP_R_32I"; + } + else if(hipType == HIP_R_32U) + { + return "HIP_R_32U"; + } + else if(hipType == HIP_C_32F) + { + return "HIP_C_32F"; + } + else if(hipType == HIP_C_64F) + { + return "HIP_C_64F"; + } + else + { + return "HIP_TYPE_NONE"; + } + } } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType) diff --git a/library/src/include/data_types.hpp b/library/src/include/data_types.hpp index db9ff6c7..59e70f2a 100644 --- a/library/src/include/data_types.hpp +++ b/library/src/include/data_types.hpp @@ -107,6 +107,9 @@ namespace hiptensor T readVal(void const* value, hiptensorComputeType_t id); void writeVal(void const* addr, hiptensorComputeType_t id, ScalarData value); + + std::string computeTypeToString(hiptensorComputeType_t computeType); + std::string hipTypeToString(hipDataType hipType); } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index d8cc5d41..078fc79c 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -656,13 +656,36 @@ namespace hiptensor auto reference = resource->allocDevice(sizeD); resource->copyData(reference, resource->hostD(), sizeD); + // Compute tolerance based on compute type + auto dimension = a_ms_ks.mLengths.size() / 2; + auto nelems_k = std::accumulate(a_ms_ks.mLengths.begin() + dimension, + a_ms_ks.mLengths.end(), + size_t{1}, + std::multiplies()); + + auto eps = getEpsilon(computeType); + double tolerance = 2 * nelems_k * eps; + + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || DDataType == HIP_R_16BF) + { + const double epsilon = std::pow(2, -7); + tolerance += epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || DDataType == HIP_R_16F) + { + const double epsilon = std::pow(2, -10); + tolerance += epsilon * 2; + } + if(DDataType == HIP_R_16F) { std::tie(mValidationResult, mMaxRelativeError) = compareEqualLaunchKernel<_Float16>((_Float16*)resource->deviceD().get(), (_Float16*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_16BF) { @@ -671,7 +694,8 @@ namespace hiptensor (hip_bfloat16*)resource->deviceD().get(), (hip_bfloat16*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_32F || DDataType == HIP_C_32F) { @@ -679,7 +703,8 @@ namespace hiptensor = compareEqualLaunchKernel((float*)resource->deviceD().get(), (float*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_64F || DDataType == HIP_C_64F) { @@ -687,7 +712,8 @@ namespace hiptensor = compareEqualLaunchKernel((double*)resource->deviceD().get(), (double*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } EXPECT_TRUE(mValidationResult) << "Max relative error: " << mMaxRelativeError; diff --git a/test/device/common.hpp b/test/device/common.hpp index d22c2d2c..73410eea 100644 --- a/test/device/common.hpp +++ b/test/device/common.hpp @@ -65,28 +65,42 @@ __global__ static void } } +__device__ inline unsigned pcg_hash(unsigned input) +{ + unsigned state = input * 747796405u + 2891336453u; + unsigned word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +// gen random float in range [-range, range) +template +__device__ inline float gen_random_float(unsigned input) +{ + return (static_cast(pcg_hash(input)) / static_cast(UINT_MAX) - 0.5f) + * static_cast(range) * 2; +} + // fill kernel for 'elementSize' elements template -__global__ void fillKernel(DataType* data, uint32_t elementSize, uint32_t seed) +__global__ void fillKernel(DataType* data, uint32_t elementSize) { uint32_t index = (blockIdx.x * blockDim.x + threadIdx.x); if(index < elementSize) { - // Input values scaled by 10, Doing UnarySquare Operation for tensors(16F) may cause overflow. if constexpr(std::is_same_v) { - auto value = (float(index / float(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = gen_random_float(index); data[index] = make_hipFloatComplex(value, value); } else if constexpr(std::is_same_v) { - auto value = (double(index / double(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = static_cast(gen_random_float(index)); data[index] = make_hipDoubleComplex(value, value); } else { - auto value = (DataType(index / double(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = gen_random_float(index); data[index] = static_cast(value); } } diff --git a/test/utils.hpp b/test/utils.hpp index fc999738..8edb8551 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -174,8 +174,7 @@ __host__ static inline void fillLaunchKernel(DataType* data, uint32_t elementSiz 0, 0, data, - elementSize, - static_cast(std::time(nullptr))); + elementSize); } // fill kernel wrapper for 'elementSize' elements with a specific value @@ -194,7 +193,7 @@ std::pair compareEqual(DDataType const* deviceD, DDataType const* hostD, std::size_t elementsD, hiptensorComputeType_t computeType, - double tolerance = 100.0) + double tolerance = 0.0) { bool retval = true; double max_relative_error = 0.0; @@ -245,7 +244,25 @@ std::pair compareEqual(DDataType const* deviceD, } } - auto eps = getEpsilon(computeType); + if(tolerance == 0.0) + { + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || std::is_same_v) + { + const double epsilon = std::pow(2, -7); + tolerance = epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || std::is_same_v) + { + const double epsilon = std::pow(2, -10); + tolerance = epsilon * 2; + } + else + { + tolerance = 1e-5; + } + } + if(isInf) { retval = false; @@ -256,7 +273,7 @@ std::pair compareEqual(DDataType const* deviceD, retval = false; max_relative_error = std::numeric_limits::signaling_NaN(); } - else if(max_relative_error > (eps * tolerance)) + else if(max_relative_error > tolerance) { retval = false; } @@ -269,13 +286,14 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, DDataType* hostD, std::size_t elementsD, hiptensorComputeType_t computeType, - double tolerance = 100.0) + double tolerance = 0.0) { auto blockDim = dim3(1024, 1, 1); auto gridDim = dim3(ceilDiv(elementsD, blockDim.x), 1, 1); double* d_relativeError; double maxRelativeError; + CHECK_HIP_ERROR(hipMalloc(&d_relativeError, elementsD * sizeof(double))); hipEvent_t syncEvent; @@ -332,12 +350,32 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, = [](DDataType const& val) { return static_cast(static_cast(val)); }; auto eps = getEpsilon(computeType); + + if(tolerance == 0.0) + { + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || std::is_same_v) + { + const double epsilon = std::pow(2, -7); + tolerance = epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || std::is_same_v) + { + const double epsilon = std::pow(2, -10); + tolerance = epsilon * 2; + } + else + { + tolerance = 1e-5; + } + } + if(isNaN) { retval = false; maxRelativeError = std::numeric_limits::signaling_NaN(); } - else if(maxRelativeError > (eps * tolerance)) + else if(maxRelativeError > (tolerance)) { retval = false; }