diff --git a/library/src/data_types.cpp b/library/src/data_types.cpp index 4c6e3a50..b17de958 100644 --- a/library/src/data_types.cpp +++ b/library/src/data_types.cpp @@ -254,6 +254,102 @@ namespace hiptensor return; } } + + std::string computeTypeToString(hiptensorComputeType_t computeType) + { + if(computeType == HIPTENSOR_COMPUTE_16BF) + { + return "HIPTENSOR_COMPUTE_16BF"; + } + else if(computeType == HIPTENSOR_COMPUTE_16F) + { + return "HIPTENSOR_COMPUTE_16F"; + } + else if(computeType == HIPTENSOR_COMPUTE_32F) + { + return "HIPTENSOR_COMPUTE_32F"; + } + else if(computeType == HIPTENSOR_COMPUTE_64F) + { + return "HIPTENSOR_COMPUTE_64F"; + } + else if(computeType == HIPTENSOR_COMPUTE_8I) + { + return "HIPTENSOR_COMPUTE_8I"; + } + else if(computeType == HIPTENSOR_COMPUTE_8U) + { + return "HIPTENSOR_COMPUTE_8U"; + } + else if(computeType == HIPTENSOR_COMPUTE_32I) + { + return "HIPTENSOR_COMPUTE_32I"; + } + else if(computeType == HIPTENSOR_COMPUTE_32U) + { + return "HIPTENSOR_COMPUTE_32U"; + } + else if(computeType == HIPTENSOR_COMPUTE_C32F) + { + return "HIPTENSOR_COMPUTE_C32F"; + } + else if(computeType == HIPTENSOR_COMPUTE_C64F) + { + return "HIPTENSOR_COMPUTE_C64F"; + } + else + { + return "HIPTENSOR_COMPUTE_NONE"; + } + } + + std::string hipTypeToString(hipDataType hipType) + { + if(hipType == HIP_R_16BF) + { + return "HIP_R_16BF"; + } + else if(hipType == HIP_R_16F) + { + return "HIP_R_16F"; + } + else if(hipType == HIP_R_32F) + { + return "HIP_R_32F"; + } + else if(hipType == HIP_R_64F) + { + return "HIP_R_64F"; + } + else if(hipType == HIP_R_8I) + { + return "HIP_R_8I"; + } + else if(hipType == HIP_R_8U) + { + return "HIP_R_8U"; + } + else if(hipType == HIP_R_32I) + { + return "HIP_R_32I"; + } + else if(hipType == HIP_R_32U) + { + return "HIP_R_32U"; + } + else if(hipType == HIP_C_32F) + { + return "HIP_C_32F"; + } + else if(hipType == HIP_C_64F) + { + return "HIP_C_64F"; + } + else + { + return "HIP_TYPE_NONE"; + } + } } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType) diff --git a/library/src/include/data_types.hpp b/library/src/include/data_types.hpp index db9ff6c7..59e70f2a 100644 --- a/library/src/include/data_types.hpp +++ b/library/src/include/data_types.hpp @@ -107,6 +107,9 @@ namespace hiptensor T readVal(void const* value, hiptensorComputeType_t id); void writeVal(void const* addr, hiptensorComputeType_t id, ScalarData value); + + std::string computeTypeToString(hiptensorComputeType_t computeType); + std::string hipTypeToString(hipDataType hipType); } // namespace hiptensor bool operator==(hipDataType hipType, hiptensorComputeType_t computeType); diff --git a/test/01_contraction/contraction_test.cpp b/test/01_contraction/contraction_test.cpp index d8cc5d41..08eac199 100644 --- a/test/01_contraction/contraction_test.cpp +++ b/test/01_contraction/contraction_test.cpp @@ -257,14 +257,16 @@ namespace hiptensor auto resource = getResource(); resource->resizeStorage(lengths, elementBytes); + uint32_t seed = static_cast(256); + if(ADataType == HIP_R_16F && BDataType == HIP_R_16F && DDataType == HIP_R_16F) { // Initialize matrix data on device - fillLaunchKernel<_Float16>((_Float16*)resource->deviceA().get(), elementsA); - fillLaunchKernel<_Float16>((_Float16*)resource->deviceB().get(), elementsB); + fillLaunchKernel<_Float16>((_Float16*)resource->deviceA().get(), elementsA, seed - 1); + fillLaunchKernel<_Float16>((_Float16*)resource->deviceB().get(), elementsB, seed); if(CDataType == HIP_R_16F) { - fillLaunchKernel<_Float16>((_Float16*)resource->deviceC().get(), elementsCD); + fillLaunchKernel<_Float16>((_Float16*)resource->deviceC().get(), elementsCD, seed + 1); } fillValLaunchKernel<_Float16>((_Float16*)resource->deviceD().get(), elementsCD, @@ -273,12 +275,12 @@ namespace hiptensor else if(ADataType == HIP_R_16BF && BDataType == HIP_R_16BF && DDataType == HIP_R_16BF) { // Initialize matrix data on device - fillLaunchKernel((hip_bfloat16*)resource->deviceA().get(), elementsA); - fillLaunchKernel((hip_bfloat16*)resource->deviceB().get(), elementsB); + fillLaunchKernel((hip_bfloat16*)resource->deviceA().get(), elementsA, seed - 1); + fillLaunchKernel((hip_bfloat16*)resource->deviceB().get(), elementsB, seed); if(CDataType == HIP_R_16BF) { fillLaunchKernel((hip_bfloat16*)resource->deviceC().get(), - elementsCD); + elementsCD, seed + 1); } fillValLaunchKernel( (hip_bfloat16*)resource->deviceD().get(), @@ -288,11 +290,11 @@ namespace hiptensor else if(ADataType == HIP_R_32F && BDataType == HIP_R_32F && DDataType == HIP_R_32F) { // Initialize matrix data on device - fillLaunchKernel((float*)resource->deviceA().get(), elementsA); - fillLaunchKernel((float*)resource->deviceB().get(), elementsB); + fillLaunchKernel((float*)resource->deviceA().get(), elementsA, seed - 1); + fillLaunchKernel((float*)resource->deviceB().get(), elementsB, seed); if(CDataType == HIP_R_32F) { - fillLaunchKernel((float*)resource->deviceC().get(), elementsCD); + fillLaunchKernel((float*)resource->deviceC().get(), elementsCD, seed + 1); } fillValLaunchKernel((float*)resource->deviceD().get(), elementsCD, @@ -301,11 +303,11 @@ namespace hiptensor else if(ADataType == HIP_R_64F && BDataType == HIP_R_64F && DDataType == HIP_R_64F) { // Initialize matrix data on device - fillLaunchKernel((double*)resource->deviceA().get(), elementsA); - fillLaunchKernel((double*)resource->deviceB().get(), elementsB); + fillLaunchKernel((double*)resource->deviceA().get(), elementsA, seed - 1); + fillLaunchKernel((double*)resource->deviceB().get(), elementsB, seed); if(CDataType == HIP_R_64F) { - fillLaunchKernel((double*)resource->deviceC().get(), elementsCD); + fillLaunchKernel((double*)resource->deviceC().get(), elementsCD, seed + 1); } fillValLaunchKernel((double*)resource->deviceD().get(), elementsCD, @@ -315,13 +317,13 @@ namespace hiptensor { // Initialize matrix data on device fillLaunchKernel((hipFloatComplex*)resource->deviceA().get(), - elementsA); + elementsA, seed - 1); fillLaunchKernel((hipFloatComplex*)resource->deviceB().get(), - elementsB); + elementsB, seed); if(CDataType == HIP_C_32F) { fillLaunchKernel((hipFloatComplex*)resource->deviceC().get(), - elementsCD); + elementsCD, seed + 1); } fillValLaunchKernel( (hipFloatComplex*)resource->deviceD().get(), @@ -332,13 +334,13 @@ namespace hiptensor { // Initialize matrix data on device fillLaunchKernel((hipDoubleComplex*)resource->deviceA().get(), - elementsA); + elementsA, seed - 1); fillLaunchKernel((hipDoubleComplex*)resource->deviceB().get(), - elementsB); + elementsB, seed); if(CDataType == HIP_C_64F) { fillLaunchKernel((hipDoubleComplex*)resource->deviceC().get(), - elementsCD); + elementsCD, seed + 1); } fillValLaunchKernel( (hipDoubleComplex*)resource->deviceD().get(), @@ -656,13 +658,37 @@ namespace hiptensor auto reference = resource->allocDevice(sizeD); resource->copyData(reference, resource->hostD(), sizeD); + // Compute tolerance based on compute type + auto dimension = a_ms_ks.mLengths.size() / 2; + auto nelems_k = std::accumulate(a_ms_ks.mLengths.begin() + dimension, + a_ms_ks.mLengths.end(), + size_t{1}, + std::multiplies()); + + auto eps = getEpsilon(computeType == HIPTENSOR_COMPUTE_64F ? HIPTENSOR_COMPUTE_64F + : HIPTENSOR_COMPUTE_32F); + double tolerance = 2 * nelems_k * eps; + + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || DDataType == HIP_R_16BF) + { + const double epsilon = std::pow(2, -7); + tolerance += epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || DDataType == HIP_R_16F) + { + const double epsilon = std::pow(2, -10); + tolerance += epsilon * 2; + } + if(DDataType == HIP_R_16F) { std::tie(mValidationResult, mMaxRelativeError) = compareEqualLaunchKernel<_Float16>((_Float16*)resource->deviceD().get(), (_Float16*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_16BF) { @@ -671,7 +697,8 @@ namespace hiptensor (hip_bfloat16*)resource->deviceD().get(), (hip_bfloat16*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_32F || DDataType == HIP_C_32F) { @@ -679,7 +706,8 @@ namespace hiptensor = compareEqualLaunchKernel((float*)resource->deviceD().get(), (float*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } else if(DDataType == HIP_R_64F || DDataType == HIP_C_64F) { @@ -687,7 +715,8 @@ namespace hiptensor = compareEqualLaunchKernel((double*)resource->deviceD().get(), (double*)reference.get(), elementsCD, - computeType); + computeType, + tolerance); } EXPECT_TRUE(mValidationResult) << "Max relative error: " << mMaxRelativeError; diff --git a/test/02_permutation/permutation_resource.cpp b/test/02_permutation/permutation_resource.cpp index 6acd7577..62dfd9dd 100644 --- a/test/02_permutation/permutation_resource.cpp +++ b/test/02_permutation/permutation_resource.cpp @@ -96,13 +96,15 @@ namespace hiptensor void PermutationResource::fillRandToA() { + uint32_t seed = static_cast(256); + if(mCurrentDataType == HIP_R_32F) { - fillLaunchKernel((float*)deviceA().get(), mCurrentMatrixElement); + fillLaunchKernel((float*)deviceA().get(), mCurrentMatrixElement, seed); } else { - fillLaunchKernel<_Float16>((_Float16*)deviceA().get(), mCurrentMatrixElement); + fillLaunchKernel<_Float16>((_Float16*)deviceA().get(), mCurrentMatrixElement, seed); } Base::copyData(hostA(), deviceA(), getCurrentMatrixMemorySize()); } diff --git a/test/03_reduction/reduction_resource.cpp b/test/03_reduction/reduction_resource.cpp index 22e1fb32..f4074896 100644 --- a/test/03_reduction/reduction_resource.cpp +++ b/test/03_reduction/reduction_resource.cpp @@ -127,21 +127,23 @@ namespace hiptensor void ReductionResource::fillRand(HostPtrT& hostBuf, DevicePtrT& deviceBuf, size_t elementCount) { + uint32_t seed = static_cast(256); + if(mCurrentDataType == HIP_R_16F) { - fillLaunchKernel((float16_t*)deviceBuf.get(), elementCount); + fillLaunchKernel((float16_t*)deviceBuf.get(), elementCount, seed); } else if(mCurrentDataType == HIP_R_16BF) { - fillLaunchKernel((bfloat16_t*)deviceBuf.get(), elementCount); + fillLaunchKernel((bfloat16_t*)deviceBuf.get(), elementCount, seed); } else if(mCurrentDataType == HIP_R_32F) { - fillLaunchKernel((float32_t*)deviceBuf.get(), elementCount); + fillLaunchKernel((float32_t*)deviceBuf.get(), elementCount, seed); } else if(mCurrentDataType == HIP_R_64F) { - fillLaunchKernel((float64_t*)deviceBuf.get(), elementCount); + fillLaunchKernel((float64_t*)deviceBuf.get(), elementCount, seed); } Base::copyData(hostBuf, deviceBuf, elementCount); } diff --git a/test/device/common.hpp b/test/device/common.hpp index d22c2d2c..66b57231 100644 --- a/test/device/common.hpp +++ b/test/device/common.hpp @@ -65,28 +65,43 @@ __global__ static void } } +__device__ inline unsigned pcg_hash(unsigned input) +{ + unsigned state = input * 747796405u + 2891336453u; + unsigned word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +// gen random float in range [-range, range) +template +__device__ inline float gen_random_float(unsigned input) +{ + return (static_cast(pcg_hash(input)) / static_cast(UINT_MAX) - 0.5f) + * static_cast(range) * 2; +} + // fill kernel for 'elementSize' elements template __global__ void fillKernel(DataType* data, uint32_t elementSize, uint32_t seed) { uint32_t index = (blockIdx.x * blockDim.x + threadIdx.x); + uint32_t seededIndex = static_cast(uint64_t(index + seed) % UINT_MAX); if(index < elementSize) { - // Input values scaled by 10, Doing UnarySquare Operation for tensors(16F) may cause overflow. if constexpr(std::is_same_v) { - auto value = (float(index / float(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = gen_random_float(seededIndex); data[index] = make_hipFloatComplex(value, value); } else if constexpr(std::is_same_v) { - auto value = (double(index / double(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = static_cast(gen_random_float(seededIndex)); data[index] = make_hipDoubleComplex(value, value); } else { - auto value = (DataType(index / double(RAND_MAX) - 0.5) * 10) / elementSize; + auto value = gen_random_float(seededIndex); data[index] = static_cast(value); } } diff --git a/test/utils.hpp b/test/utils.hpp index fc999738..98df2fe0 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -164,7 +164,7 @@ auto getProduct(const Container& container, // fill kernel for 'elementSize' elements template -__host__ static inline void fillLaunchKernel(DataType* data, uint32_t elementSize) +__host__ static inline void fillLaunchKernel(DataType* data, uint32_t elementSize, uint32_t seed) { auto blockDim = dim3(1024, 1, 1); auto gridDim = dim3(ceilDiv(elementSize, blockDim.x), 1, 1); @@ -175,7 +175,7 @@ __host__ static inline void fillLaunchKernel(DataType* data, uint32_t elementSiz 0, data, elementSize, - static_cast(std::time(nullptr))); + seed); } // fill kernel wrapper for 'elementSize' elements with a specific value @@ -194,7 +194,7 @@ std::pair compareEqual(DDataType const* deviceD, DDataType const* hostD, std::size_t elementsD, hiptensorComputeType_t computeType, - double tolerance = 100.0) + double tolerance = 0.0) { bool retval = true; double max_relative_error = 0.0; @@ -245,7 +245,25 @@ std::pair compareEqual(DDataType const* deviceD, } } - auto eps = getEpsilon(computeType); + if(tolerance == 0.0) + { + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || std::is_same_v) + { + const double epsilon = std::pow(2, -7); + tolerance = epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || std::is_same_v) + { + const double epsilon = std::pow(2, -10); + tolerance = epsilon * 2; + } + else + { + tolerance = 1e-5; + } + } + if(isInf) { retval = false; @@ -256,7 +274,7 @@ std::pair compareEqual(DDataType const* deviceD, retval = false; max_relative_error = std::numeric_limits::signaling_NaN(); } - else if(max_relative_error > (eps * tolerance)) + else if(max_relative_error > tolerance) { retval = false; } @@ -269,13 +287,14 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, DDataType* hostD, std::size_t elementsD, hiptensorComputeType_t computeType, - double tolerance = 100.0) + double tolerance = 0.0) { auto blockDim = dim3(1024, 1, 1); auto gridDim = dim3(ceilDiv(elementsD, blockDim.x), 1, 1); double* d_relativeError; double maxRelativeError; + CHECK_HIP_ERROR(hipMalloc(&d_relativeError, elementsD * sizeof(double))); hipEvent_t syncEvent; @@ -331,13 +350,31 @@ std::pair compareEqualLaunchKernel(DDataType* deviceD, auto toDouble = [](DDataType const& val) { return static_cast(static_cast(val)); }; - auto eps = getEpsilon(computeType); + if(tolerance == 0.0) + { + // use the same default tolerance value as CK + if (computeType == HIPTENSOR_COMPUTE_16BF || std::is_same_v) + { + const double epsilon = std::pow(2, -7); + tolerance = epsilon * 2; + } + else if (computeType == HIPTENSOR_COMPUTE_16F || std::is_same_v) + { + const double epsilon = std::pow(2, -10); + tolerance = epsilon * 2; + } + else + { + tolerance = 1e-5; + } + } + if(isNaN) { retval = false; maxRelativeError = std::numeric_limits::signaling_NaN(); } - else if(maxRelativeError > (eps * tolerance)) + else if(maxRelativeError > (tolerance)) { retval = false; }