From 2b8e52af72059160c4e36b76602560b33f53fb6e Mon Sep 17 00:00:00 2001 From: Tools Date: Thu, 7 Sep 2023 15:25:08 +0200 Subject: [PATCH] Upgrade to clang-format-16 --- .clang-format | 66 +++++- .github/workflows/ci.yml | 4 +- example/babelstream/src/Stream.h | 1 - example/babelstream/src/main.cpp | 5 +- example/bufferCopy/src/bufferCopy.cpp | 1 - example/complex/src/complex.cpp | 1 - example/heatEquation/src/heatEquation.cpp | 3 - .../src/monteCarloIntegration.cpp | 1 - example/randomCells2D/src/randomCells2D.cpp | 8 +- .../randomStrategies/src/randomStrategies.cpp | 12 +- example/reduce/src/iterator.hpp | 18 +- example/reduce/src/kernel.hpp | 2 +- include/alpaka/acc/AccCpuOmp2Blocks.hpp | 2 + include/alpaka/acc/AccCpuOmp2Threads.hpp | 2 + include/alpaka/acc/AccCpuSerial.hpp | 2 + include/alpaka/acc/AccCpuTbbBlocks.hpp | 2 + include/alpaka/acc/AccCpuThreads.hpp | 2 + include/alpaka/acc/AccDevProps.hpp | 1 + include/alpaka/atomic/AtomicAtomicRef.hpp | 1 + include/alpaka/atomic/AtomicOmpBuiltIn.hpp | 1 + include/alpaka/atomic/AtomicStdLibLock.hpp | 1 + .../alpaka/atomic/AtomicUniformCudaHip.hpp | 44 ++-- .../atomic/AtomicUniformCudaHipBuiltIn.hpp | 1 + include/alpaka/atomic/Op.hpp | 12 +- include/alpaka/atomic/Traits.hpp | 2 + .../shared/st/BlockSharedMemStMember.hpp | 1 + .../st/BlockSharedMemStMemberMasterSync.hpp | 1 - .../BlockSharedMemStUniformCudaHipBuiltIn.hpp | 1 + .../alpaka/block/sync/BlockSyncBarrierOmp.hpp | 4 + include/alpaka/block/sync/Traits.hpp | 2 + include/alpaka/core/Align.hpp | 2 + include/alpaka/core/ApiHipRt.hpp | 10 +- include/alpaka/core/BarrierThread.hpp | 5 +- include/alpaka/core/ClipCast.hpp | 4 +- include/alpaka/core/Cuda.hpp | 2 +- include/alpaka/core/CudaHipCommon.hpp | 5 +- include/alpaka/core/Debug.hpp | 4 +- include/alpaka/core/OmpSchedule.hpp | 1 - include/alpaka/core/Positioning.hpp | 2 + include/alpaka/core/Unroll.hpp | 4 +- include/alpaka/core/Vectorize.hpp | 9 + include/alpaka/dev/DevCpu.hpp | 8 +- include/alpaka/dev/DevUniformCudaHipRt.hpp | 6 +- include/alpaka/dev/cpu/SysInfo.hpp | 10 +- include/alpaka/event/EventCpu.hpp | 2 +- include/alpaka/event/EventGenericThreads.hpp | 15 ++ .../alpaka/event/EventUniformCudaHipRt.hpp | 11 + include/alpaka/extent/Traits.hpp | 216 +++++++++--------- include/alpaka/idx/Accessors.hpp | 6 + include/alpaka/idx/Traits.hpp | 1 + include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp | 1 + include/alpaka/kernel/TaskKernelGpuCudaRt.hpp | 2 +- include/alpaka/kernel/TaskKernelGpuHipRt.hpp | 2 +- include/alpaka/kernel/Traits.hpp | 5 +- include/alpaka/math/MathStdLib.hpp | 10 +- .../alpaka/math/MathUniformCudaHipBuiltIn.hpp | 11 +- include/alpaka/math/Traits.hpp | 3 +- include/alpaka/mem/buf/BufCpu.hpp | 7 + include/alpaka/mem/buf/BufCpuSycl.hpp | 2 +- include/alpaka/mem/buf/BufCudaRt.hpp | 2 +- include/alpaka/mem/buf/BufFpgaSyclIntel.hpp | 2 +- include/alpaka/mem/buf/BufGenericSycl.hpp | 1 + include/alpaka/mem/buf/BufGpuSyclIntel.hpp | 2 +- include/alpaka/mem/buf/BufHipRt.hpp | 2 +- .../alpaka/mem/buf/BufUniformCudaHipRt.hpp | 3 + include/alpaka/mem/buf/Traits.hpp | 4 +- include/alpaka/mem/buf/cpu/Copy.hpp | 2 +- include/alpaka/mem/buf/sycl/Set.hpp | 1 - include/alpaka/mem/view/Traits.hpp | 14 +- include/alpaka/mem/view/ViewAccessOps.hpp | 3 +- include/alpaka/mem/view/ViewPlainPtr.hpp | 7 +- include/alpaka/mem/view/ViewStdArray.hpp | 1 + include/alpaka/mem/view/ViewStdVector.hpp | 1 + include/alpaka/mem/view/ViewSubView.hpp | 1 + include/alpaka/meta/Apply.hpp | 1 + include/alpaka/meta/CartesianProduct.hpp | 7 + include/alpaka/meta/Concatenate.hpp | 3 + .../alpaka/meta/CudaVectorArrayWrapper.hpp | 20 +- include/alpaka/meta/Filter.hpp | 3 + include/alpaka/meta/Fold.hpp | 1 + include/alpaka/meta/ForEachType.hpp | 2 + include/alpaka/meta/Functional.hpp | 6 +- include/alpaka/meta/IntegerSequence.hpp | 8 + include/alpaka/meta/Integral.hpp | 8 +- include/alpaka/meta/Set.hpp | 5 + include/alpaka/meta/Transform.hpp | 1 + include/alpaka/offset/Traits.hpp | 176 +++++++------- include/alpaka/queue/Properties.hpp | 1 + include/alpaka/queue/QueueCpuBlocking.hpp | 2 +- include/alpaka/queue/QueueCpuNonBlocking.hpp | 2 +- include/alpaka/queue/QueueCpuSyclBlocking.hpp | 2 +- .../alpaka/queue/QueueCpuSyclNonBlocking.hpp | 2 +- .../queue/QueueFpgaSyclIntelBlocking.hpp | 2 +- .../queue/QueueFpgaSyclIntelNonBlocking.hpp | 2 +- .../alpaka/queue/QueueGenericSyclBlocking.hpp | 2 +- .../queue/QueueGenericSyclNonBlocking.hpp | 2 +- .../queue/QueueGenericThreadsBlocking.hpp | 5 + .../queue/QueueGenericThreadsNonBlocking.hpp | 6 + .../queue/QueueGpuSyclIntelBlocking.hpp | 2 +- .../queue/QueueGpuSyclIntelNonBlocking.hpp | 2 +- .../queue/cuda_hip/QueueUniformCudaHipRt.hpp | 5 + .../queue/sycl/QueueGenericSyclBase.hpp | 2 +- .../rand/Philox/MultiplyAndSplit64to32.hpp | 8 +- .../alpaka/rand/Philox/PhiloxBaseCommon.hpp | 1 - .../alpaka/rand/Philox/PhiloxBaseTraits.hpp | 6 +- .../alpaka/rand/Philox/PhiloxConstants.hpp | 12 +- .../alpaka/rand/Philox/PhiloxStateless.hpp | 3 +- .../rand/Philox/PhiloxStatelessVector.hpp | 1 - include/alpaka/rand/Philox/PhiloxVector.hpp | 1 - include/alpaka/rand/RandDefault.hpp | 3 + include/alpaka/rand/RandGenericSycl.hpp | 2 + include/alpaka/rand/RandPhilox.hpp | 6 + include/alpaka/rand/RandStdLib.hpp | 24 +- .../alpaka/rand/RandUniformCudaHipRand.hpp | 8 +- include/alpaka/rand/TinyMT/Engine.hpp | 6 +- include/alpaka/rand/Traits.hpp | 3 + .../test/event/EventHostManualTrigger.hpp | 22 +- include/alpaka/test/queue/Queue.hpp | 1 + include/alpaka/vec/Traits.hpp | 2 + .../alpaka/warp/WarpUniformCudaHipBuiltIn.hpp | 1 + include/alpaka/workdiv/Traits.hpp | 2 + include/alpaka/workdiv/WorkDivHelpers.hpp | 2 + include/alpaka/workdiv/WorkDivMembers.hpp | 4 + test/analysis/headerCheck/src/main.cpp | 1 - test/integ/mandelbrot/src/mandelbrot.cpp | 8 +- test/unit/acc/src/AccTagTest.cpp | 3 +- test/unit/core/src/ConceptsTest.cpp | 1 + test/unit/kernel/src/KernelLambda.cpp | 1 - .../unit/kernel/src/KernelWithOmpSchedule.cpp | 1 + .../src/KernelWithoutTemplatedAccParam.cpp | 3 +- test/unit/math/src/Buffer.hpp | 9 +- test/unit/math/src/TestTemplate.hpp | 5 +- test/unit/math/src/mathADL.cpp | 38 +++ test/unit/math/src/sincos.cpp | 1 - .../meta/src/CudaVectorArrayWrapperTest.cpp | 1 - test/unit/meta/src/IntegralTest.cpp | 1 + test/unit/meta/src/IsStrictBaseTest.cpp | 2 + test/unit/queue/src/CollectiveQueue.cpp | 4 +- test/unit/vec/src/VecTest.cpp | 2 + 139 files changed, 702 insertions(+), 393 deletions(-) diff --git a/.clang-format b/.clang-format index 4a5998a808c5..0056379b33b0 100644 --- a/.clang-format +++ b/.clang-format @@ -6,15 +6,16 @@ DisableFormat: false AccessModifierOffset: -4 AlignAfterOpenBracket: AlwaysBreak +AlignArrayOfStructures: None AlignConsecutiveAssignments: false AlignConsecutiveBitFields: false AlignConsecutiveDeclarations: false AlignConsecutiveMacros: false AlignEscapedNewlines: Right AlignOperands: Align -AlignTrailingComments: false +AlignTrailingComments: + Kind: Never AllowAllArgumentsOnNextLine: false -AllowAllConstructorInitializersOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: false @@ -29,9 +30,11 @@ AlwaysBreakTemplateDeclarations: Yes BinPackArguments: false BinPackParameters: false BitFieldColonSpacing: Both +BreakAfterAttributes: Never BreakBeforeBinaryOperators: All BreakBeforeBraces: Allman -BreakBeforeConceptDeclarations: true +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakInheritanceList: BeforeComma @@ -39,46 +42,69 @@ BreakStringLiterals: true ColumnLimit: 119 CommentPragmas: '^ COMMENT pragma:' CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true -DeriveLineEnding: true DerivePointerAlignment: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true IncludeBlocks: Regroup IncludeIsMainRegex: '(Test)?$' IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false IndentCaseBlocks: true IndentCaseLabels: false IndentExternBlock: AfterExternBlock IndentGotoLabels: true IndentPPDirectives: AfterHash -IndentRequires: true +IndentRequiresClause: false IndentWidth: 4 IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: true +IntegerLiteralSeparator: + Binary: 4 + Decimal: 3 + DecimalMinDigits: 7 + Hex: 4 KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 2 NamespaceIndentation: All +PackConstructorInitializers: CurrentLine PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 # default made explicit here PenaltyBreakString: 1000 PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 # default made explicit here PenaltyReturnTypeOnItsOwnLine: 1000 PointerAlignment: Left -QualifierAlignment: Right +PPIndentWidth: -1 # follow IndentWidth +QualifierAlignment: Custom +QualifierOrder: ['friend', 'static', 'inline', 'constexpr', 'type', 'const', 'volatile', 'restrict'] +ReferenceAlignment: Pointer # follow PointerAlignment ReflowComments: true +RemoveBracesLLVM: false +RemoveSemicolon: false +RequiresClausePosition: WithPreceding +RequiresExpressionIndentation: OuterScope +ShortNamespaceLines: 0 SortIncludes: true -SortUsingDeclarations: true +SortUsingDeclarations: Lexicographic +SeparateDefinitionBlocks: Always SpaceAfterCStyleCast: true SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default # follow PointerAlignment SpaceBeforeAssignmentOperators: true SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false @@ -86,6 +112,7 @@ SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: Never SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 @@ -93,14 +120,19 @@ SpacesInAngles: false SpacesInConditionalStatement: false SpacesInContainerLiterals: false SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 SpacesInParentheses: false SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false TabWidth: 4 UseCRLF: false UseTab: Never # Project specific options +#AttributeMacros: [] +#ForEachMacros: [] +#IfMacros: [] IncludeCategories: # Local headers (in "") above all else - Regex: '"([A-Za-z0-9.\/-_])+"' @@ -118,5 +150,21 @@ IncludeCategories: # below alpaka's includes - Regex: '<([A-Za-z0-9.\/-_])+>' Priority: 4 +# Macros: [] +# NamespaceMacros: [] +StatementAttributeLikeMacros: + - 'ALPAKA_DEVICE_VOLATILE' + - 'ALPAKA_FN_ACC' + - 'ALPAKA_FN_EXTERN' + - 'ALPAKA_FN_HOST' + - 'ALPAKA_FN_HOST_ACC' + - 'ALPAKA_FN_INLINE' + - 'ALPAKA_STATIC_ACC_MEM_CONSTANT' + - 'ALPAKA_STATIC_ACC_MEM_GLOBAL' + - 'ALPAKA_UNROLL' + - 'ALPAKA_VECTORIZE_HINT' +#StatementMacros: [] +#TypenameMacros: [] +#WhitespaceSensitiveMacros: [] ... diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 891e0f32c2e1..77fabb4fe568 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,9 +81,9 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - - uses: DoozyX/clang-format-lint-action@v0.14 + - uses: DoozyX/clang-format-lint-action@v0.16.2 with: - clangFormatVersion: 14 + clangFormatVersion: 16 exclude: './thirdParty' inplace: True - run: | diff --git a/example/babelstream/src/Stream.h b/example/babelstream/src/Stream.h index f049a373c9dc..2d050972c3c0 100644 --- a/example/babelstream/src/Stream.h +++ b/example/babelstream/src/Stream.h @@ -38,7 +38,6 @@ class Stream virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) = 0; }; - // Implementation specific device functions void listDevices(void); std::string getDeviceName(int const); diff --git a/example/babelstream/src/main.cpp b/example/babelstream/src/main.cpp index 840b54ca9e39..13bd865b6295 100644 --- a/example/babelstream/src/main.cpp +++ b/example/babelstream/src/main.cpp @@ -54,7 +54,7 @@ #endif // Default size of 2^25 -int ARRAY_SIZE = 33554432; +int ARRAY_SIZE = 33'554'432; unsigned int num_times = 100; unsigned int deviceIndex = 0; bool use_float = false; @@ -101,7 +101,6 @@ int main(int argc, char* argv[]) run(); } - // Run the 5 main kernels template std::vector> run_all(Stream* stream, T& sum) @@ -194,7 +193,6 @@ std::vector> run_nstream(Stream* stream) return timings; } - // Generic run routine // Runs the kernel(s) and prints output. template @@ -426,7 +424,6 @@ void run() delete stream; } - template void check_solution(unsigned int const ntimes, std::vector& a, std::vector& b, std::vector& c, T& sum) { diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp index f56fd21aeeb1..b1e53ff20f7e 100644 --- a/example/bufferCopy/src/bufferCopy.cpp +++ b/example/bufferCopy/src/bufferCopy.cpp @@ -25,7 +25,6 @@ struct PrintBufferKernel } }; - //! Tests if the value of the buffer on index i is equal to i. struct TestBufferKernel { diff --git a/example/complex/src/complex.cpp b/example/complex/src/complex.cpp index 9b116cb519f5..beadbc7795f0 100644 --- a/example/complex/src/complex.cpp +++ b/example/complex/src/complex.cpp @@ -8,7 +8,6 @@ #include #include - //! Complex numbers demonstration kernel struct ComplexKernel { diff --git a/example/heatEquation/src/heatEquation.cpp b/example/heatEquation/src/heatEquation.cpp index 7bbf610fd380..82cd5a8608a9 100644 --- a/example/heatEquation/src/heatEquation.cpp +++ b/example/heatEquation/src/heatEquation.cpp @@ -10,7 +10,6 @@ #include #include - //! alpaka version of explicit finite-difference 1d heat equation solver //! //! Solving equation u_t(x, t) = u_xx(x, t) using a simple explicit scheme with @@ -45,7 +44,6 @@ struct HeatEquationKernel } }; - //! Exact solution to the test problem //! u_t(x, t) = u_xx(x, t), x in [0, 1], t in [0, T] //! u(0, t) = u(1, t) = 0 @@ -59,7 +57,6 @@ auto exactSolution(double const x, double const t) -> double return std::exp(-pi * pi * t) * std::sin(pi * x); } - //! Each kernel computes the next step for one point. //! Therefore the number of threads should be equal to numNodesX. //! Every time step the kernel will be executed numNodesX-times diff --git a/example/monteCarloIntegration/src/monteCarloIntegration.cpp b/example/monteCarloIntegration/src/monteCarloIntegration.cpp index bfe7bcc2097a..bbb9e3fcb5a8 100644 --- a/example/monteCarloIntegration/src/monteCarloIntegration.cpp +++ b/example/monteCarloIntegration/src/monteCarloIntegration.cpp @@ -72,7 +72,6 @@ struct Kernel } }; - auto main() -> int { // Defines and setup. diff --git a/example/randomCells2D/src/randomCells2D.cpp b/example/randomCells2D/src/randomCells2D.cpp index 4db4475adf1b..7b96f7954507 100644 --- a/example/randomCells2D/src/randomCells2D.cpp +++ b/example/randomCells2D/src/randomCells2D.cpp @@ -10,9 +10,9 @@ #include #include -unsigned constexpr NUM_CALCULATIONS = 256; -unsigned constexpr NUM_X = 127; -unsigned constexpr NUM_Y = 211; +constexpr unsigned NUM_CALCULATIONS = 256; +constexpr unsigned NUM_X = 127; +constexpr unsigned NUM_Y = 211; /// Selected PRNG engine for single-value operation template @@ -121,7 +121,7 @@ struct RunTimestepKernelVector using DistributionResult = typename RandomEngineVector::template ResultContainer; // Container type which will store // the distribution results - unsigned constexpr resultVectorSize = std::tuple_size_v; // Size of the result vector + constexpr unsigned resultVectorSize = std::tuple_size_v; // Size of the result vector alpaka::rand::UniformReal dist; // Vector-aware distribution function diff --git a/example/randomStrategies/src/randomStrategies.cpp b/example/randomStrategies/src/randomStrategies.cpp index de66418ead40..84d2b54303bd 100644 --- a/example/randomStrategies/src/randomStrategies.cpp +++ b/example/randomStrategies/src/randomStrategies.cpp @@ -12,13 +12,14 @@ #include // This example generates NUM_ROLLS of random events for each of NUM_POINTS points. -unsigned constexpr NUM_POINTS = 2000; ///< Number of "points". Each will be processed by a single thread. -unsigned constexpr NUM_ROLLS = 2000; ///< Amount of random number "dice rolls" performed for each "point". +constexpr unsigned NUM_POINTS = 2000; ///< Number of "points". Each will be processed by a single thread. +constexpr unsigned NUM_ROLLS = 2000; ///< Amount of random number "dice rolls" performed for each "point". /// Selected PRNG engine // Comment the current "using" line, and uncomment a different one to change the PRNG engine template using RandomEngine = alpaka::rand::Philox4x32x10; + // using RandomEngine = alpaka::rand::engine::cpu::MersenneTwister; // using RandomEngine = alpaka::rand::engine::cpu::TinyMersenneTwister; // using RandomEngine = alpaka::rand::engine::uniform_cuda_hip::Xor; @@ -159,7 +160,6 @@ struct InitRandomKernel } }; - /// Fill the result buffer with random "dice rolls" struct FillKernel { @@ -217,7 +217,7 @@ struct Writer; template<> struct Writer { - void static save(float const* buffer, Box const& box) + static void save(float const* buffer, Box const& box) { saveDataAndShowAverage("out_seed.csv", buffer, box); } @@ -226,7 +226,7 @@ struct Writer template<> struct Writer { - void static save(float const* buffer, Box const& box) + static void save(float const* buffer, Box const& box) { saveDataAndShowAverage("out_subsequence.csv", buffer, box); } @@ -235,7 +235,7 @@ struct Writer template<> struct Writer { - void static save(float const* buffer, Box const& box) + static void save(float const* buffer, Box const& box) { saveDataAndShowAverage("out_offset.csv", buffer, box); } diff --git a/example/reduce/src/iterator.hpp b/example/reduce/src/iterator.hpp index 5e0204fed4bb..611432fbe441 100644 --- a/example/reduce/src/iterator.hpp +++ b/example/reduce/src/iterator.hpp @@ -101,7 +101,7 @@ class Iterator //! Returns the current element. //! //! Returns a reference to the current index. - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE auto operator*() -> const T& + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE auto operator*() -> T const& { return mData[mIndex]; } @@ -123,8 +123,12 @@ class IteratorCpu : public Iterator //! \param linearizedIndex The linearized index. //! \param gridSize The grid size. //! \param n The problem size. - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE - IteratorCpu(TAcc const& acc, TBuf const* data, uint32_t linearizedIndex, uint32_t gridSize, uint64_t n) + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE IteratorCpu( + TAcc const& acc, + TBuf const* data, + uint32_t linearizedIndex, + uint32_t gridSize, + uint64_t n) : Iterator( data, static_cast((n * linearizedIndex) / alpaka::math::min(acc, static_cast(gridSize), n)), @@ -244,8 +248,12 @@ class IteratorGpu : public Iterator //! \param linearizedIndex The linearized index. //! \param gridSize The grid size. //! \param n The problem size. - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE - IteratorGpu(TAcc const&, TBuf const* data, uint32_t linearizedIndex, uint32_t gridSize, uint64_t n) + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE IteratorGpu( + TAcc const&, + TBuf const* data, + uint32_t linearizedIndex, + uint32_t gridSize, + uint64_t n) : Iterator(data, linearizedIndex, n) , mGridSize(gridSize) { diff --git a/example/reduce/src/kernel.hpp b/example/reduce/src/kernel.hpp index 2df220225f91..6f76899f9c47 100644 --- a/example/reduce/src/kernel.hpp +++ b/example/reduce/src/kernel.hpp @@ -27,7 +27,7 @@ struct cheapArray //! \param index The index of the element to be accessed. //! //! Returns the requested element per constant reference. - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE auto operator[](uint64_t index) const -> const T& + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE auto operator[](uint64_t index) const -> T const& { return data[index]; } diff --git a/include/alpaka/acc/AccCpuOmp2Blocks.hpp b/include/alpaka/acc/AccCpuOmp2Blocks.hpp index 2f580cc48fa1..d166e84548b3 100644 --- a/include/alpaka/acc/AccCpuOmp2Blocks.hpp +++ b/include/alpaka/acc/AccCpuOmp2Blocks.hpp @@ -129,6 +129,7 @@ namespace alpaka { using type = AccCpuOmp2Blocks; }; + //! The CPU OpenMP 2.0 block accelerator device properties get trait specialization. template struct GetAccDevProps> @@ -153,6 +154,7 @@ namespace alpaka static_cast(AccCpuOmp2Blocks::staticAllocBytes())}; } }; + //! The CPU OpenMP 2.0 block accelerator name trait specialization. template struct GetAccName> diff --git a/include/alpaka/acc/AccCpuOmp2Threads.hpp b/include/alpaka/acc/AccCpuOmp2Threads.hpp index b2f87525cc1b..558ab2f39d1c 100644 --- a/include/alpaka/acc/AccCpuOmp2Threads.hpp +++ b/include/alpaka/acc/AccCpuOmp2Threads.hpp @@ -134,6 +134,7 @@ namespace alpaka { using type = AccCpuOmp2Threads; }; + //! The CPU OpenMP 2.0 thread accelerator device properties get trait specialization. template struct GetAccDevProps> @@ -163,6 +164,7 @@ namespace alpaka getMemBytes(dev)}; } }; + //! The CPU OpenMP 2.0 thread accelerator name trait specialization. template struct GetAccName> diff --git a/include/alpaka/acc/AccCpuSerial.hpp b/include/alpaka/acc/AccCpuSerial.hpp index f6c481a5511b..6458ac420c30 100644 --- a/include/alpaka/acc/AccCpuSerial.hpp +++ b/include/alpaka/acc/AccCpuSerial.hpp @@ -123,6 +123,7 @@ namespace alpaka { using type = AccCpuSerial; }; + //! The CPU serial accelerator device properties get trait specialization. template struct GetAccDevProps> @@ -147,6 +148,7 @@ namespace alpaka static_cast(AccCpuSerial::staticAllocBytes())}; } }; + //! The CPU serial accelerator name trait specialization. template struct GetAccName> diff --git a/include/alpaka/acc/AccCpuTbbBlocks.hpp b/include/alpaka/acc/AccCpuTbbBlocks.hpp index 73361ba5628b..e20f9dfd378d 100644 --- a/include/alpaka/acc/AccCpuTbbBlocks.hpp +++ b/include/alpaka/acc/AccCpuTbbBlocks.hpp @@ -120,6 +120,7 @@ namespace alpaka { using type = AccCpuTbbBlocks; }; + //! The CPU TBB block accelerator device properties get trait specialization. template struct GetAccDevProps> @@ -144,6 +145,7 @@ namespace alpaka static_cast(AccCpuTbbBlocks::staticAllocBytes())}; } }; + //! The CPU TBB block accelerator name trait specialization. template struct GetAccName> diff --git a/include/alpaka/acc/AccCpuThreads.hpp b/include/alpaka/acc/AccCpuThreads.hpp index b33c5bf7e53a..ccbe79a3f222 100644 --- a/include/alpaka/acc/AccCpuThreads.hpp +++ b/include/alpaka/acc/AccCpuThreads.hpp @@ -135,6 +135,7 @@ namespace alpaka { using type = AccCpuThreads; }; + //! The CPU threads accelerator device properties get trait specialization. template struct GetAccDevProps> @@ -169,6 +170,7 @@ namespace alpaka getMemBytes(dev)}; } }; + //! The CPU threads accelerator name trait specialization. template struct GetAccName> diff --git a/include/alpaka/acc/AccDevProps.hpp b/include/alpaka/acc/AccDevProps.hpp index 60a4c5141484..cd87e20cfb8b 100644 --- a/include/alpaka/acc/AccDevProps.hpp +++ b/include/alpaka/acc/AccDevProps.hpp @@ -22,6 +22,7 @@ namespace alpaka static_assert( sizeof(TIdx) >= sizeof(int), "Index type is not supported, consider using int or a larger type."); + ALPAKA_FN_HOST AccDevProps( TIdx const& multiProcessorCount, Vec const& gridBlockExtentMax, diff --git a/include/alpaka/atomic/AtomicAtomicRef.hpp b/include/alpaka/atomic/AtomicAtomicRef.hpp index 58e0833da2f4..f38b3dee7d0b 100644 --- a/include/alpaka/atomic/AtomicAtomicRef.hpp +++ b/include/alpaka/atomic/AtomicAtomicRef.hpp @@ -28,6 +28,7 @@ namespace alpaka using atomic_ref = boost::atomic_ref; # endif } // namespace detail + //! The atomic ops based on atomic_ref for CPU accelerators. // // Atomics can be used in the grids, blocks and threads hierarchy levels. diff --git a/include/alpaka/atomic/AtomicOmpBuiltIn.hpp b/include/alpaka/atomic/AtomicOmpBuiltIn.hpp index f0db6314bf97..6d4dc96e72ec 100644 --- a/include/alpaka/atomic/AtomicOmpBuiltIn.hpp +++ b/include/alpaka/atomic/AtomicOmpBuiltIn.hpp @@ -301,6 +301,7 @@ namespace alpaka } return old; } + ALPAKA_FN_HOST static auto atomicOp( AtomicOmpBuiltIn const&, T* const addr, diff --git a/include/alpaka/atomic/AtomicStdLibLock.hpp b/include/alpaka/atomic/AtomicStdLibLock.hpp index 7375fa8f151e..16a659fb07b9 100644 --- a/include/alpaka/atomic/AtomicStdLibLock.hpp +++ b/include/alpaka/atomic/AtomicStdLibLock.hpp @@ -86,6 +86,7 @@ namespace alpaka std::lock_guard lock(atomic.getMutex(addr)); return TOp()(addr, value); } + ALPAKA_FN_HOST static auto atomicOp( AtomicStdLibLock const& atomic, T* const addr, diff --git a/include/alpaka/atomic/AtomicUniformCudaHip.hpp b/include/alpaka/atomic/AtomicUniformCudaHip.hpp index 6191640b0a58..330e3a4d51d2 100644 --- a/include/alpaka/atomic/AtomicUniformCudaHip.hpp +++ b/include/alpaka/atomic/AtomicUniformCudaHip.hpp @@ -71,7 +71,7 @@ inline namespace alpakaGlobal decltype(atomicCAS(alpaka::core::declval(), alpaka::core::declval(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T compare, T value) + static __device__ T atomic(T* add, T compare, T value) { return atomicCAS(add, compare, value); } @@ -88,7 +88,7 @@ inline namespace alpakaGlobal alpaka::core::declval(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T compare, T value) + static __device__ T atomic(T* add, T compare, T value) { return atomicCAS_block(add, compare, value); } @@ -105,7 +105,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicAdd(add, value); } @@ -121,7 +121,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicAdd_block(add, value); } @@ -156,7 +156,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicSub(add, value); } @@ -171,7 +171,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicSub_block(add, value); } @@ -187,7 +187,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicMin(add, value); } @@ -202,7 +202,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicMin_block(add, value); } @@ -254,7 +254,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicMax(add, value); } @@ -269,7 +269,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicMax_block(add, value); } @@ -322,7 +322,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicExch(add, value); } @@ -337,7 +337,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicExch_block(add, value); } @@ -354,7 +354,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicInc(add, value); } @@ -369,7 +369,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicInc_block(add, value); } @@ -386,7 +386,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicDec(add, value); } @@ -401,7 +401,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicDec_block(add, value); } @@ -418,7 +418,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicAnd(add, value); } @@ -433,7 +433,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicAnd_block(add, value); } @@ -450,7 +450,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicOr(add, value); } @@ -465,7 +465,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicOr_block(add, value); } @@ -482,7 +482,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicXor(add, value); } @@ -497,7 +497,7 @@ inline namespace alpakaGlobal typename std::void_t(), alpaka::core::declval()))>> : std::true_type { - __device__ static T atomic(T* add, T value) + static __device__ T atomic(T* add, T value) { return atomicXor_block(add, value); } diff --git a/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp b/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp index 138cf503bde6..917616a41fa0 100644 --- a/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp +++ b/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp @@ -57,6 +57,7 @@ namespace alpaka return *reinterpretAddress(&value); } }; + //! Emulate atomic // // The default implementation will emulate all atomic functions with atomicCAS. diff --git a/include/alpaka/atomic/Op.hpp b/include/alpaka/atomic/Op.hpp index 86b62f328d19..2912556d5083 100644 --- a/include/alpaka/atomic/Op.hpp +++ b/include/alpaka/atomic/Op.hpp @@ -10,7 +10,6 @@ #include #include - namespace alpaka { //! The addition function object. @@ -34,6 +33,7 @@ namespace alpaka #endif } }; + //! The subtraction function object. struct AtomicSub { @@ -55,6 +55,7 @@ namespace alpaka return old; } }; + //! The minimum function object. struct AtomicMin { @@ -69,6 +70,7 @@ namespace alpaka return old; } }; + //! The maximum function object. struct AtomicMax { @@ -83,6 +85,7 @@ namespace alpaka return old; } }; + //! The exchange function object. struct AtomicExch { @@ -97,6 +100,7 @@ namespace alpaka return old; } }; + //! The increment function object. struct AtomicInc { @@ -113,6 +117,7 @@ namespace alpaka return old; } }; + //! The decrement function object. struct AtomicDec { @@ -129,6 +134,7 @@ namespace alpaka return old; } }; + //! The and function object. struct AtomicAnd { @@ -143,6 +149,7 @@ namespace alpaka return old; } }; + //! The or function object. struct AtomicOr { @@ -157,6 +164,7 @@ namespace alpaka return old; } }; + //! The exclusive or function object. struct AtomicXor { @@ -171,6 +179,7 @@ namespace alpaka return old; } }; + //! The compare and swap function object. struct AtomicCas { @@ -196,6 +205,7 @@ namespace alpaka #endif return old; } + //! AtomicCas for floating point values // \return The old value of addr. ALPAKA_NO_HOST_ACC_WARNING diff --git a/include/alpaka/atomic/Traits.hpp b/include/alpaka/atomic/Traits.hpp index a5905b3d2b68..160da8c86a6a 100644 --- a/include/alpaka/atomic/Traits.hpp +++ b/include/alpaka/atomic/Traits.hpp @@ -16,9 +16,11 @@ namespace alpaka struct ConceptAtomicGrids { }; + struct ConceptAtomicBlocks { }; + struct ConceptAtomicThreads { }; diff --git a/include/alpaka/block/shared/st/BlockSharedMemStMember.hpp b/include/alpaka/block/shared/st/BlockSharedMemStMember.hpp index e4b39ee357ee..93c65e5a768f 100644 --- a/include/alpaka/block/shared/st/BlockSharedMemStMember.hpp +++ b/include/alpaka/block/shared/st/BlockSharedMemStMember.hpp @@ -46,6 +46,7 @@ namespace alpaka return *data; } }; + template struct FreeSharedVars> { diff --git a/include/alpaka/block/shared/st/BlockSharedMemStMemberMasterSync.hpp b/include/alpaka/block/shared/st/BlockSharedMemStMemberMasterSync.hpp index a5c7d76f92cf..65bd3043f2fd 100644 --- a/include/alpaka/block/shared/st/BlockSharedMemStMemberMasterSync.hpp +++ b/include/alpaka/block/shared/st/BlockSharedMemStMemberMasterSync.hpp @@ -15,7 +15,6 @@ #include #include - namespace alpaka { template diff --git a/include/alpaka/block/shared/st/BlockSharedMemStUniformCudaHipBuiltIn.hpp b/include/alpaka/block/shared/st/BlockSharedMemStUniformCudaHipBuiltIn.hpp index b611f453ad15..9f4ed0ca207d 100644 --- a/include/alpaka/block/shared/st/BlockSharedMemStUniformCudaHipBuiltIn.hpp +++ b/include/alpaka/block/shared/st/BlockSharedMemStUniformCudaHipBuiltIn.hpp @@ -42,6 +42,7 @@ namespace alpaka return *(reinterpret_cast(shMem)); } }; + template<> struct FreeSharedVars { diff --git a/include/alpaka/block/sync/BlockSyncBarrierOmp.hpp b/include/alpaka/block/sync/BlockSyncBarrierOmp.hpp index 6424c4cd5dd1..c8d9ace077d0 100644 --- a/include/alpaka/block/sync/BlockSyncBarrierOmp.hpp +++ b/include/alpaka/block/sync/BlockSyncBarrierOmp.hpp @@ -38,6 +38,7 @@ namespace alpaka { template struct AtomicOp; + template<> struct AtomicOp { @@ -47,6 +48,7 @@ namespace alpaka result += static_cast(value); } }; + template<> struct AtomicOp { @@ -56,6 +58,7 @@ namespace alpaka result &= static_cast(value); } }; + template<> struct AtomicOp { @@ -71,6 +74,7 @@ namespace alpaka struct SyncBlockThreadsPredicate { ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC static auto syncBlockThreadsPredicate(BlockSyncBarrierOmp const& blockSync, int predicate) -> int { diff --git a/include/alpaka/block/sync/Traits.hpp b/include/alpaka/block/sync/Traits.hpp index 2411c6e478f4..f6c6563b73ee 100644 --- a/include/alpaka/block/sync/Traits.hpp +++ b/include/alpaka/block/sync/Traits.hpp @@ -54,6 +54,7 @@ namespace alpaka return currentResult + static_cast(value != static_cast(0)); } }; + //! The logical and function object. struct BlockAnd { @@ -69,6 +70,7 @@ namespace alpaka return static_cast(currentResult && (value != static_cast(0))); } }; + //! The logical or function object. struct BlockOr { diff --git a/include/alpaka/core/Align.hpp b/include/alpaka/core/Align.hpp index c87a1df9bcd6..d2be0149a50f 100644 --- a/include/alpaka/core/Align.hpp +++ b/include/alpaka/core/Align.hpp @@ -24,6 +24,7 @@ namespace alpaka::core struct RoundUpToPowerOfTwoHelper : std::integral_constant { }; + //! Case for N not being a power of two. // We could just use NextVal = N+1, but this converges faster. N|(N-1) sets // the right-most zero bits to one all at once, e.g. 0b0011000 -> 0b0011111. @@ -33,6 +34,7 @@ namespace alpaka::core { }; } // namespace detail + template struct RoundUpToPowerOfTwo : std::integral_constant::value> diff --git a/include/alpaka/core/ApiHipRt.hpp b/include/alpaka/core/ApiHipRt.hpp index 042bb7023247..e76294f068fb 100644 --- a/include/alpaka/core/ApiHipRt.hpp +++ b/include/alpaka/core/ApiHipRt.hpp @@ -79,7 +79,7 @@ namespace alpaka static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::hipDeviceAttributeMaxThreadsPerBlock; static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::hipDeviceAttributeMultiprocessorCount; -# if HIP_VERSION >= 40500000 +# if HIP_VERSION >= 40'500'000 static constexpr Limit_t limitPrintfFifoSize = ::hipLimitPrintfFifoSize; # else static constexpr Limit_t limitPrintfFifoSize @@ -118,7 +118,7 @@ namespace alpaka static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit) { -# if HIP_VERSION < 40500000 +# if HIP_VERSION < 40'500'000 if(limit == limitPrintfFifoSize) { // Implemented only in ROCm 4.5.0 and later. @@ -182,7 +182,7 @@ namespace alpaka static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream) { // hipFreeAsync is implemented only in ROCm 5.2.0 and later. -# if HIP_VERSION >= 50200000 +# if HIP_VERSION >= 50'200'000 return ::hipFreeAsync(devPtr, stream); # else // Not implemented. @@ -265,7 +265,7 @@ namespace alpaka static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData) { // hipLaunchHostFunc is implemented only in ROCm 5.4.0 and later. -# if HIP_VERSION >= 50400000 +# if HIP_VERSION >= 50'400'000 // Wrap the host function using the proper calling convention. return ::hipLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData}); # else @@ -290,7 +290,7 @@ namespace alpaka [[maybe_unused]] Stream_t stream) { // hipMallocAsync is implemented only in ROCm 5.2.0 and later. -# if HIP_VERSION >= 50200000 +# if HIP_VERSION >= 50'200'000 return ::hipMallocAsync(devPtr, size, stream); # else // Not implemented. diff --git a/include/alpaka/core/BarrierThread.hpp b/include/alpaka/core/BarrierThread.hpp index cd9b200d234e..ff38eb3b050a 100644 --- a/include/alpaka/core/BarrierThread.hpp +++ b/include/alpaka/core/BarrierThread.hpp @@ -5,7 +5,7 @@ #pragma once // Uncomment this to disable the standard spinlock behaviour of the threads -//#define ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK +// #define ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK #include "alpaka/block/sync/Traits.hpp" #include "alpaka/core/Common.hpp" @@ -82,6 +82,7 @@ namespace alpaka::core { template struct AtomicOp; + template<> struct AtomicOp { @@ -90,6 +91,7 @@ namespace alpaka::core result += static_cast(value); } }; + template<> struct AtomicOp { @@ -98,6 +100,7 @@ namespace alpaka::core result &= static_cast(value); } }; + template<> struct AtomicOp { diff --git a/include/alpaka/core/ClipCast.hpp b/include/alpaka/core/ClipCast.hpp index 3ac061c058bd..aa8c71272cae 100644 --- a/include/alpaka/core/ClipCast.hpp +++ b/include/alpaka/core/ClipCast.hpp @@ -19,8 +19,8 @@ namespace alpaka::core std::is_integral_v && std::is_integral_v, "clipCast can not be called with non-integral types!"); - auto constexpr max = static_cast(std::numeric_limits>::max()); - auto constexpr min = static_cast(std::numeric_limits>::min()); + constexpr auto max = static_cast(std::numeric_limits>::max()); + constexpr auto min = static_cast(std::numeric_limits>::min()); return static_cast(std::max(min, std::min(max, val))); } diff --git a/include/alpaka/core/Cuda.hpp b/include/alpaka/core/Cuda.hpp index 2c5be048037d..8332ad3da27c 100644 --- a/include/alpaka/core/Cuda.hpp +++ b/include/alpaka/core/Cuda.hpp @@ -51,7 +51,7 @@ namespace alpaka::cuda::detail } // namespace alpaka::cuda::detail //! CUDA driver error checking with log and exception. -# define ALPAKA_CUDA_DRV_CHECK(cmd) ::alpaka::cuda::detail::cudaDrvCheck(cmd, # cmd, __FILE__, __LINE__) +# define ALPAKA_CUDA_DRV_CHECK(cmd) ::alpaka::cuda::detail::cudaDrvCheck(cmd, #cmd, __FILE__, __LINE__) # include "alpaka/core/UniformCudaHip.hpp" diff --git a/include/alpaka/core/CudaHipCommon.hpp b/include/alpaka/core/CudaHipCommon.hpp index 1f4455d145be..b3fdd7d10304 100644 --- a/include/alpaka/core/CudaHipCommon.hpp +++ b/include/alpaka/core/CudaHipCommon.hpp @@ -91,18 +91,21 @@ namespace alpaka { using type = DimInt<1u>; }; + //! The CUDA/HIP vectors 2D dimension get trait specialization. template struct DimType::value>> { using type = DimInt<2u>; }; + //! The CUDA/HIP vectors 3D dimension get trait specialization. template struct DimType::value>> { using type = DimInt<3u>; }; + //! The CUDA/HIP vectors 4D dimension get trait specialization. template struct DimType::value>> @@ -140,14 +143,12 @@ namespace alpaka } }; - template struct GetOffsets>> : GetExtents { }; - //! The CUDA/HIP vectors idx type trait specialization. template struct IdxType>> diff --git a/include/alpaka/core/Debug.hpp b/include/alpaka/core/Debug.hpp index a5258cf9351d..dc70ed5138b1 100644 --- a/include/alpaka/core/Debug.hpp +++ b/include/alpaka/core/Debug.hpp @@ -32,10 +32,12 @@ namespace alpaka::core::detail { std::cout << "[+] " << m_sScope << std::endl; } + ScopeLogStdOut(ScopeLogStdOut const&) = delete; ScopeLogStdOut(ScopeLogStdOut&&) = delete; auto operator=(ScopeLogStdOut const&) -> ScopeLogStdOut& = delete; auto operator=(ScopeLogStdOut&&) -> ScopeLogStdOut& = delete; + ~ScopeLogStdOut() { std::cout << "[-] " << m_sScope << std::endl; @@ -68,7 +70,7 @@ namespace alpaka::core::detail # define ALPAKA_DEBUG_BREAK ::__debugbreak() # else # define ALPAKA_DEBUG_BREAK - //#error debug-break for current compiler not implemented! + // #error debug-break for current compiler not implemented! # endif #else # define ALPAKA_DEBUG_BREAK diff --git a/include/alpaka/core/OmpSchedule.hpp b/include/alpaka/core/OmpSchedule.hpp index e99e0acb56fa..722b77b3bb02 100644 --- a/include/alpaka/core/OmpSchedule.hpp +++ b/include/alpaka/core/OmpSchedule.hpp @@ -12,7 +12,6 @@ #include - namespace alpaka::omp { //! Representation of OpenMP schedule information: kind and chunk size. This class can be used regardless of diff --git a/include/alpaka/core/Positioning.hpp b/include/alpaka/core/Positioning.hpp index d20e8dc95b76..8f3d9b8cb687 100644 --- a/include/alpaka/core/Positioning.hpp +++ b/include/alpaka/core/Positioning.hpp @@ -21,6 +21,7 @@ namespace alpaka { }; } // namespace hierarchy + //! Defines the origins available for getting extent and indices of kernel executions. namespace origin { @@ -31,6 +32,7 @@ namespace alpaka //! This type is used to get the extents relative to the thread. struct Thread; } // namespace origin + //! Defines the units available for getting extent and indices of kernel executions. namespace unit { diff --git a/include/alpaka/core/Unroll.hpp b/include/alpaka/core/Unroll.hpp index 3ff1b0489c62..10794e6e3bf8 100644 --- a/include/alpaka/core/Unroll.hpp +++ b/include/alpaka/core/Unroll.hpp @@ -13,10 +13,10 @@ //! for(...){...}` // \TODO: Implement for other compilers. #if BOOST_ARCH_PTX -# define ALPAKA_UNROLL_STRINGIFY(x) # x +# define ALPAKA_UNROLL_STRINGIFY(x) #x # define ALPAKA_UNROLL(...) _Pragma(ALPAKA_UNROLL_STRINGIFY(unroll __VA_ARGS__)) #elif BOOST_COMP_IBM || BOOST_COMP_SUNPRO || BOOST_COMP_HPACC -# define ALPAKA_UNROLL_STRINGIFY(x) # x +# define ALPAKA_UNROLL_STRINGIFY(x) #x # define ALPAKA_UNROLL(...) _Pragma(ALPAKA_UNROLL_STRINGIFY(unroll(__VA_ARGS__))) #elif BOOST_COMP_PGI # define ALPAKA_UNROLL(...) _Pragma("unroll") diff --git a/include/alpaka/core/Vectorize.hpp b/include/alpaka/core/Vectorize.hpp index 6359a2987da4..55f0e6f73c66 100644 --- a/include/alpaka/core/Vectorize.hpp +++ b/include/alpaka/core/Vectorize.hpp @@ -78,6 +78,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -106,6 +107,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -139,6 +141,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -172,6 +175,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -205,6 +209,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -238,6 +243,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -266,6 +272,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -294,6 +301,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems @@ -320,6 +328,7 @@ namespace alpaka::core::vectorization 1u; #endif }; + // Number of elements of the given type that can be processed in parallel in a vector register. template<> struct GetVectorizationSizeElems diff --git a/include/alpaka/dev/DevCpu.hpp b/include/alpaka/dev/DevCpu.hpp index 82a385606f70..7a91652518ec 100644 --- a/include/alpaka/dev/DevCpu.hpp +++ b/include/alpaka/dev/DevCpu.hpp @@ -29,15 +29,17 @@ namespace alpaka { class DevCpu; + namespace cpu { using ICpuQueue = IGenericThreadsQueue; - } + } // namespace cpu + namespace trait { template struct GetDevByIdx; - } + } // namespace trait struct PlatformCpu; //! The CPU device. @@ -64,6 +66,7 @@ namespace alpaka { return true; } + auto operator!=(DevCpu const& rhs) const -> bool { return !((*this) == rhs); @@ -173,6 +176,7 @@ namespace alpaka using type = PlatformCpu; }; } // namespace trait + using QueueCpuNonBlocking = QueueGenericThreadsNonBlocking; using QueueCpuBlocking = QueueGenericThreadsBlocking; diff --git a/include/alpaka/dev/DevUniformCudaHipRt.hpp b/include/alpaka/dev/DevUniformCudaHipRt.hpp index 3048c6639697..632b49829dd2 100644 --- a/include/alpaka/dev/DevUniformCudaHipRt.hpp +++ b/include/alpaka/dev/DevUniformCudaHipRt.hpp @@ -30,13 +30,13 @@ namespace alpaka { template struct GetDevByIdx; - } + } // namespace trait namespace uniform_cuda_hip::detail { template class QueueUniformCudaHipRt; - } + } // namespace uniform_cuda_hip::detail template using QueueUniformCudaHipRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt; @@ -70,6 +70,7 @@ namespace alpaka { return m_iDevice == rhs.m_iDevice; } + ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const& rhs) const -> bool { return !((*this) == rhs); @@ -98,6 +99,7 @@ namespace alpaka , m_QueueRegistry(std::make_shared>()) { } + int m_iDevice; std::shared_ptr> m_QueueRegistry; diff --git a/include/alpaka/dev/cpu/SysInfo.hpp b/include/alpaka/dev/cpu/SysInfo.hpp index 6eeb280c5120..1dc989ff21cb 100644 --- a/include/alpaka/dev/cpu/SysInfo.hpp +++ b/include/alpaka/dev/cpu/SysInfo.hpp @@ -78,7 +78,7 @@ namespace alpaka::cpu::detail { // Get extended ids. std::uint32_t ex[4] = {0}; - cpuid(0x80000000, 0, ex); + cpuid(0x8000'0000, 0, ex); std::uint32_t const nExIds(ex[0]); if(!nExIds) @@ -96,20 +96,20 @@ namespace alpaka::cpu::detail #if BOOST_ARCH_X86 // Get the information associated with each extended ID. char cpuBrandString[0x40] = {0}; - for(std::uint32_t i(0x80000000); i <= nExIds; ++i) + for(std::uint32_t i(0x8000'0000); i <= nExIds; ++i) { cpuid(i, 0, ex); // Interpret CPU brand string and cache information. - if(i == 0x80000002) + if(i == 0x8000'0002) { std::memcpy(cpuBrandString, ex, sizeof(ex)); } - else if(i == 0x80000003) + else if(i == 0x8000'0003) { std::memcpy(cpuBrandString + 16, ex, sizeof(ex)); } - else if(i == 0x80000004) + else if(i == 0x8000'0004) { std::memcpy(cpuBrandString + 32, ex, sizeof(ex)); } diff --git a/include/alpaka/event/EventCpu.hpp b/include/alpaka/event/EventCpu.hpp index 3ab3e156df40..d8836210534c 100644 --- a/include/alpaka/event/EventCpu.hpp +++ b/include/alpaka/event/EventCpu.hpp @@ -10,4 +10,4 @@ namespace alpaka { using EventCpu = EventGenericThreads; -} +} // namespace alpaka diff --git a/include/alpaka/event/EventGenericThreads.hpp b/include/alpaka/event/EventGenericThreads.hpp index aee20cf51b2c..b5888395b09b 100644 --- a/include/alpaka/event/EventGenericThreads.hpp +++ b/include/alpaka/event/EventGenericThreads.hpp @@ -33,6 +33,7 @@ namespace alpaka EventGenericThreadsImpl(TDev dev) noexcept : m_dev(std::move(dev)) { } + EventGenericThreadsImpl(EventGenericThreadsImpl const&) = delete; auto operator=(EventGenericThreadsImpl const&) -> EventGenericThreadsImpl& = delete; @@ -78,10 +79,12 @@ namespace alpaka : m_spEventImpl(std::make_shared>(dev)) { } + auto operator==(EventGenericThreads const& rhs) const -> bool { return (m_spEventImpl == rhs.m_spEventImpl); } + auto operator!=(EventGenericThreads const& rhs) const -> bool { return !((*this) == rhs); @@ -90,6 +93,7 @@ namespace alpaka public: std::shared_ptr> m_spEventImpl; }; + namespace trait { //! The CPU device event device type trait specialization. @@ -98,6 +102,7 @@ namespace alpaka { using type = TDev; }; + //! The CPU device event device get trait specialization. template struct GetDev> @@ -158,6 +163,7 @@ namespace alpaka }); } }; + //! The CPU non-blocking device queue enqueue trait specialization. template struct Enqueue, EventGenericThreads> @@ -171,6 +177,7 @@ namespace alpaka alpaka::enqueue(*queue.m_spQueueImpl, event); } }; + //! The CPU blocking device queue enqueue trait specialization. template struct Enqueue, EventGenericThreads> @@ -205,6 +212,7 @@ namespace alpaka promise.set_value(); } }; + //! The CPU blocking device queue enqueue trait specialization. template struct Enqueue, EventGenericThreads> @@ -219,6 +227,7 @@ namespace alpaka } }; } // namespace trait + namespace trait { namespace generic @@ -258,6 +267,7 @@ namespace alpaka wait(*event.m_spEventImpl); } }; + //! The CPU device event implementation thread wait trait specialization. //! //! Waits until the event itself and therefore all tasks preceding it in the queue it is enqueued to have been @@ -276,6 +286,7 @@ namespace alpaka eventImpl.wait(enqueueCount, lk); } }; + //! The CPU non-blocking device queue event wait trait specialization. template struct WaiterWaitFor< @@ -302,6 +313,7 @@ namespace alpaka } } }; + //! The CPU non-blocking device queue event wait trait specialization. template struct WaiterWaitFor, EventGenericThreads> @@ -313,6 +325,7 @@ namespace alpaka wait(*queue.m_spQueueImpl, event); } }; + //! The CPU blocking device queue event wait trait specialization. template struct WaiterWaitFor, EventGenericThreads> @@ -325,6 +338,7 @@ namespace alpaka wait(*event.m_spEventImpl); } }; + //! The CPU blocking device queue event wait trait specialization. template struct WaiterWaitFor, EventGenericThreads> @@ -336,6 +350,7 @@ namespace alpaka wait(*queue.m_spQueueImpl, event); } }; + //! The CPU non-blocking device event wait trait specialization. //! //! Any future work submitted in any queue of this device will wait for event to complete before beginning diff --git a/include/alpaka/event/EventUniformCudaHipRt.hpp b/include/alpaka/event/EventUniformCudaHipRt.hpp index 6d699d88553a..63f1f2fab778 100644 --- a/include/alpaka/event/EventUniformCudaHipRt.hpp +++ b/include/alpaka/event/EventUniformCudaHipRt.hpp @@ -51,8 +51,10 @@ namespace alpaka &m_UniformCudaHipEvent, (bBusyWait ? TApi::eventDefault : TApi::eventBlockingSync) | TApi::eventDisableTiming)); } + EventUniformCudaHipImpl(EventUniformCudaHipImpl const&) = delete; auto operator=(EventUniformCudaHipImpl const&) -> EventUniformCudaHipImpl& = delete; + ALPAKA_FN_HOST ~EventUniformCudaHipImpl() { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -89,10 +91,12 @@ namespace alpaka { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; } + ALPAKA_FN_HOST auto operator==(EventUniformCudaHipRt const& rhs) const -> bool { return (m_spEventImpl == rhs.m_spEventImpl); } + ALPAKA_FN_HOST auto operator!=(EventUniformCudaHipRt const& rhs) const -> bool { return !((*this) == rhs); @@ -106,6 +110,7 @@ namespace alpaka public: std::shared_ptr> m_spEventImpl; }; + namespace trait { //! The CUDA/HIP RT device event device type trait specialization. @@ -114,6 +119,7 @@ namespace alpaka { using type = DevUniformCudaHipRt; }; + //! The CUDA/HIP RT device event device get trait specialization. template struct GetDev> @@ -154,6 +160,7 @@ namespace alpaka ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventRecord(event.getNativeHandle(), queue.getNativeHandle())); } }; + //! The CUDA/HIP RT queue enqueue trait specialization. template struct Enqueue, EventUniformCudaHipRt> @@ -183,6 +190,7 @@ namespace alpaka ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventSynchronize(event.getNativeHandle())); } }; + //! The CUDA/HIP RT queue event wait trait specialization. template struct WaiterWaitFor, EventUniformCudaHipRt> @@ -197,6 +205,7 @@ namespace alpaka TApi::streamWaitEvent(queue.getNativeHandle(), event.getNativeHandle(), 0)); } }; + //! The CUDA/HIP RT queue event wait trait specialization. template struct WaiterWaitFor, EventUniformCudaHipRt> @@ -211,6 +220,7 @@ namespace alpaka TApi::streamWaitEvent(queue.getNativeHandle(), event.getNativeHandle(), 0)); } }; + //! The CUDA/HIP RT device event wait trait specialization. //! //! Any future work submitted in any queue of this device will wait for event to complete before beginning @@ -237,6 +247,7 @@ namespace alpaka } } }; + //! The CUDA/HIP RT event native handle trait specialization. template struct NativeHandle> diff --git a/include/alpaka/extent/Traits.hpp b/include/alpaka/extent/Traits.hpp index 2ed3869cc9cd..61f3c2aa58a6 100644 --- a/include/alpaka/extent/Traits.hpp +++ b/include/alpaka/extent/Traits.hpp @@ -23,129 +23,133 @@ namespace alpaka //! //! If not specialized explicitly it returns 1. template - struct [[deprecated("Specialize GetExtents instead")]] GetExtent{ - ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC static auto getExtent(TExtent const&) - ->Idx{return static_cast>(1); + struct [[deprecated("Specialize GetExtents instead")]] GetExtent + { + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC static auto getExtent(TExtent const&) -> Idx + { + return static_cast>(1); + } // namespace trait + }; // namespace alpaka + + //! The GetExtents trait for getting the extents of an object as an alpaka::Vec. + template + struct GetExtents; } // namespace trait -}; // namespace alpaka - -//! The GetExtents trait for getting the extents of an object as an alpaka::Vec. -template -struct GetExtents; -} // namespace trait - -//! \return The extent in the given dimension. -ALPAKA_NO_HOST_ACC_WARNING -template -[[deprecated("use getExtents(extent)[Tidx] instead")]] ALPAKA_FN_HOST_ACC auto getExtent( - TExtent const& extent = TExtent()) -> Idx -{ + + //! \return The extent in the given dimension. + ALPAKA_NO_HOST_ACC_WARNING + template + [[deprecated("use getExtents(extent)[Tidx] instead")]] ALPAKA_FN_HOST_ACC auto getExtent( + TExtent const& extent = TExtent()) -> Idx + { #if BOOST_COMP_CLANG || BOOST_COMP_GNUC # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif - return trait::GetExtent, TExtent>::getExtent(extent); + return trait::GetExtent, TExtent>::getExtent(extent); #if BOOST_COMP_CLANG || BOOST_COMP_GNUC # pragma GCC diagnostic pop #endif -} - -//! \return The extents of the given object. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getExtents(T const& object) -> Vec, Idx> -{ - return trait::GetExtents{}(object); -} - -//! \tparam T has to specialize GetExtent. -//! \return The extents of the given object. -ALPAKA_NO_HOST_ACC_WARNING -template -[[deprecated("use getExtents() instead")]] ALPAKA_FN_HOST_ACC auto constexpr getExtentVec(T const& object = {}) - -> Vec, Idx> -{ - return getExtents(object); -} - -//! \tparam T has to specialize GetExtent. -//! \return The extent but only the last TDim elements. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto constexpr getExtentVecEnd(T const& object = {}) -> Vec> -{ - static_assert(TDim::value <= Dim::value, "Cannot get more items than the extent holds"); + } - [[maybe_unused]] auto const e = getExtents(object); - Vec> v{}; - if constexpr(TDim::value > 0) + //! \return The extents of the given object. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getExtents(T const& object) -> Vec, Idx> { - for(unsigned i = 0; i < TDim::value; i++) - v[i] = e[(Dim::value - TDim::value) + i]; + return trait::GetExtents{}(object); } - return v; -} -//! \return The width. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getWidth(TExtent const& extent = TExtent()) -> Idx -{ - if constexpr(Dim::value >= 1) - return getExtents(extent)[Dim::value - 1u]; - else - return 1; -} -//! \return The height. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getHeight(TExtent const& extent = TExtent()) -> Idx -{ - if constexpr(Dim::value >= 2) - return getExtents(extent)[Dim::value - 2u]; - else - return 1; -} -//! \return The depth. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getDepth(TExtent const& extent = TExtent()) -> Idx -{ - if constexpr(Dim::value >= 3) - return getExtents(extent)[Dim::value - 3u]; - else - return 1; -} - -//! \return The product of the extents of the given object. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getExtentProduct(T const& object) -> Idx -{ - return getExtents(object).prod(); -} + //! \tparam T has to specialize GetExtent. + //! \return The extents of the given object. + ALPAKA_NO_HOST_ACC_WARNING + template + [[deprecated("use getExtents() instead")]] ALPAKA_FN_HOST_ACC constexpr auto getExtentVec(T const& object = {}) + -> Vec, Idx> + { + return getExtents(object); + } -namespace trait -{ - //! The Vec extent get trait specialization. - template - struct GetExtents> + //! \tparam T has to specialize GetExtent. + //! \return The extent but only the last TDim elements. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC constexpr auto getExtentVecEnd(T const& object = {}) -> Vec> { - ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC constexpr auto operator()(Vec const& extent) const -> Vec + static_assert(TDim::value <= Dim::value, "Cannot get more items than the extent holds"); + + [[maybe_unused]] auto const e = getExtents(object); + Vec> v{}; + if constexpr(TDim::value > 0) { - return extent; + for(unsigned i = 0; i < TDim::value; i++) + v[i] = e[(Dim::value - TDim::value) + i]; } - }; + return v; + } + + //! \return The width. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getWidth(TExtent const& extent = TExtent()) -> Idx + { + if constexpr(Dim::value >= 1) + return getExtents(extent)[Dim::value - 1u]; + else + return 1; + } + + //! \return The height. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getHeight(TExtent const& extent = TExtent()) -> Idx + { + if constexpr(Dim::value >= 2) + return getExtents(extent)[Dim::value - 2u]; + else + return 1; + } + + //! \return The depth. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getDepth(TExtent const& extent = TExtent()) -> Idx + { + if constexpr(Dim::value >= 3) + return getExtents(extent)[Dim::value - 3u]; + else + return 1; + } - template - struct GetExtents>> + //! \return The product of the extents of the given object. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getExtentProduct(T const& object) -> Idx + { + return getExtents(object).prod(); + } + + namespace trait { - ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC auto operator()(Integral i) const + //! The Vec extent get trait specialization. + template + struct GetExtents> { - return Vec{i}; - } - }; -} // namespace trait + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_HOST_ACC constexpr auto operator()(Vec const& extent) const -> Vec + { + return extent; + } + }; + + template + struct GetExtents>> + { + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_HOST_ACC auto operator()(Integral i) const + { + return Vec{i}; + } + }; + } // namespace trait } // namespace alpaka diff --git a/include/alpaka/idx/Accessors.hpp b/include/alpaka/idx/Accessors.hpp index 8f790900d59e..f329728ca208 100644 --- a/include/alpaka/idx/Accessors.hpp +++ b/include/alpaka/idx/Accessors.hpp @@ -24,6 +24,7 @@ namespace alpaka { return trait::GetIdx::getIdx(idx, workDiv); } + //! Get the indices requested. ALPAKA_NO_HOST_ACC_WARNING template @@ -39,6 +40,7 @@ namespace alpaka struct GetIdx { using ImplementationBase = concepts::ImplementationBase; + //! \return The index of the current thread in the grid. ALPAKA_NO_HOST_ACC_WARNING template @@ -54,6 +56,7 @@ namespace alpaka struct GetIdx { using ImplementationBase = concepts::ImplementationBase; + //! \return The index of the current thread in the grid. ALPAKA_NO_HOST_ACC_WARNING template @@ -79,6 +82,7 @@ namespace alpaka } }; } // namespace trait + //! Get the index of the first element this thread computes. ALPAKA_NO_HOST_ACC_WARNING template @@ -89,6 +93,7 @@ namespace alpaka { return gridThreadIdx * threadElemExtent; } + //! Get the index of the first element this thread computes. ALPAKA_NO_HOST_ACC_WARNING template @@ -98,6 +103,7 @@ namespace alpaka auto const threadElemExtent(alpaka::getWorkDiv(idxWorkDiv)); return getIdxThreadFirstElem(idxWorkDiv, gridThreadIdx, threadElemExtent); } + //! Get the index of the first element this thread computes. ALPAKA_NO_HOST_ACC_WARNING template diff --git a/include/alpaka/idx/Traits.hpp b/include/alpaka/idx/Traits.hpp index 116544393a69..88e2365e2b89 100644 --- a/include/alpaka/idx/Traits.hpp +++ b/include/alpaka/idx/Traits.hpp @@ -12,6 +12,7 @@ namespace alpaka struct ConceptIdxBt { }; + struct ConceptIdxGb { }; diff --git a/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp b/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp index 37ae482c2729..4d94d0f39d0a 100644 --- a/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp +++ b/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp @@ -30,6 +30,7 @@ namespace alpaka : m_threadToIndexMap(mThreadToIndices) { } + ALPAKA_FN_HOST IdxBtRefThreadIdMap(IdxBtRefThreadIdMap const&) = delete; ALPAKA_FN_HOST auto operator=(IdxBtRefThreadIdMap const&) -> IdxBtRefThreadIdMap& = delete; diff --git a/include/alpaka/kernel/TaskKernelGpuCudaRt.hpp b/include/alpaka/kernel/TaskKernelGpuCudaRt.hpp index 98b968e9c542..59aa4761cef7 100644 --- a/include/alpaka/kernel/TaskKernelGpuCudaRt.hpp +++ b/include/alpaka/kernel/TaskKernelGpuCudaRt.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using TaskKernelGpuCudaRt = TaskKernelGpuUniformCudaHipRt; -} +} // namespace alpaka #endif // ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/include/alpaka/kernel/TaskKernelGpuHipRt.hpp b/include/alpaka/kernel/TaskKernelGpuHipRt.hpp index 6d0a96c5accb..b4b284c679da 100644 --- a/include/alpaka/kernel/TaskKernelGpuHipRt.hpp +++ b/include/alpaka/kernel/TaskKernelGpuHipRt.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using TaskKernelGpuHipRt = TaskKernelGpuUniformCudaHipRt; -} +} // namespace alpaka #endif // ALPAKA_ACC_GPU_HIP_ENABLED diff --git a/include/alpaka/kernel/Traits.hpp b/include/alpaka/kernel/Traits.hpp index 384b82873759..d9720479c816 100644 --- a/include/alpaka/kernel/Traits.hpp +++ b/include/alpaka/kernel/Traits.hpp @@ -216,6 +216,7 @@ namespace alpaka } }; } // namespace detail + //! Creates a kernel execution task. //! //! \tparam TAcc The accelerator type. @@ -251,8 +252,8 @@ namespace alpaka "The idx type of TAcc and the idx type of TWorkDiv have to be identical!"); #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL - std::cout << __func__ << " workDiv: " << workDiv - << ", kernelFnObj: " << core::demangled << std::endl; + std::cout << __func__ << " workDiv: " << workDiv << ", kernelFnObj: " << core::demangled + << std::endl; #endif return trait::CreateTaskKernel::createTaskKernel( workDiv, diff --git a/include/alpaka/math/MathStdLib.hpp b/include/alpaka/math/MathStdLib.hpp index 5a3080637c40..e74380f300d2 100644 --- a/include/alpaka/math/MathStdLib.hpp +++ b/include/alpaka/math/MathStdLib.hpp @@ -263,9 +263,8 @@ namespace alpaka::math if constexpr(std::is_integral_v && std::is_integral_v) return max(x, y); else if constexpr( - is_decayed_v< - Tx, - float> || is_decayed_v || is_decayed_v || is_decayed_v) + is_decayed_v || is_decayed_v || is_decayed_v + || is_decayed_v) return fmax(x, y); else static_assert(!sizeof(Tx), "Unsupported data type"); @@ -286,9 +285,8 @@ namespace alpaka::math if constexpr(std::is_integral_v && std::is_integral_v) return min(x, y); else if constexpr( - is_decayed_v< - Tx, - float> || is_decayed_v || is_decayed_v || is_decayed_v) + is_decayed_v || is_decayed_v || is_decayed_v + || is_decayed_v) return fmin(x, y); else static_assert(!sizeof(Tx), "Unsupported data type"); diff --git a/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp b/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp index 60ac5647d028..ef89423f7c62 100644 --- a/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp +++ b/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp @@ -933,9 +933,8 @@ namespace alpaka::math else if constexpr(is_decayed_v && is_decayed_v) return ::fmaxf(x, y); else if constexpr( - is_decayed_v< - Tx, - double> || is_decayed_v || (is_decayed_v && std::is_integral_v) + is_decayed_v || is_decayed_v + || (is_decayed_v && std::is_integral_v) || (std::is_integral_v && is_decayed_v) ) return ::fmax(x, y); else @@ -967,9 +966,8 @@ namespace alpaka::math else if constexpr(is_decayed_v && is_decayed_v) return ::fminf(x, y); else if constexpr( - is_decayed_v< - Tx, - double> || is_decayed_v || (is_decayed_v && std::is_integral_v) + is_decayed_v || is_decayed_v + || (is_decayed_v && std::is_integral_v) || (std::is_integral_v && is_decayed_v) ) return ::fmin(x, y); else @@ -1076,7 +1074,6 @@ namespace alpaka::math } }; - //! The CUDA round trait specialization. template struct Round>> diff --git a/include/alpaka/math/Traits.hpp b/include/alpaka/math/Traits.hpp index 17aa0eeb6615..c63b662acbbc 100644 --- a/include/alpaka/math/Traits.hpp +++ b/include/alpaka/math/Traits.hpp @@ -1333,6 +1333,7 @@ namespace alpaka::math using ImplementationBase = concepts::ImplementationBase; return trait::Round{}(round_ctx, arg); } + //! Computes the nearest integer value to arg (in integer format), rounding halfway cases away from zero, //! regardless of the current rounding mode. //! @@ -1347,6 +1348,7 @@ namespace alpaka::math using ImplementationBase = concepts::ImplementationBase; return trait::Lround{}(lround_ctx, arg); } + //! Computes the nearest integer value to arg (in integer format), rounding halfway cases away from zero, //! regardless of the current rounding mode. //! @@ -1424,7 +1426,6 @@ namespace alpaka::math trait::SinCos{}(sincos_ctx, arg, result_sin, result_cos); } - //! Computes the square root of arg. //! //! Valid real arguments are non-negative. For other values the result diff --git a/include/alpaka/mem/buf/BufCpu.hpp b/include/alpaka/mem/buf/BufCpu.hpp index e1fa094846e0..4bfc91c73332 100644 --- a/include/alpaka/mem/buf/BufCpu.hpp +++ b/include/alpaka/mem/buf/BufCpu.hpp @@ -64,8 +64,10 @@ namespace alpaka << std::endl; #endif } + BufCpuImpl(BufCpuImpl&&) = delete; auto operator=(BufCpuImpl&&) -> BufCpuImpl& = delete; + ALPAKA_FN_HOST ~BufCpuImpl() { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -106,6 +108,7 @@ namespace alpaka { using type = DevCpu; }; + //! The BufCpu device get trait specialization. template struct GetDev> @@ -148,11 +151,13 @@ namespace alpaka { return buf.m_spBufCpuImpl->m_pMem; } + ALPAKA_FN_HOST static auto getPtrNative(BufCpu& buf) -> TElem* { return buf.m_spBufCpuImpl->m_pMem; } }; + //! The BufCpu pointer on device get trait specialization. template struct GetPtrDev, DevCpu> @@ -169,6 +174,7 @@ namespace alpaka throw std::runtime_error("The buffer is not accessible from the given device!"); } } + ALPAKA_FN_HOST static auto getPtrDev(BufCpu& buf, DevCpu const& dev) -> TElem* { if(dev == getDev(buf)) @@ -212,6 +218,7 @@ namespace alpaka return BufCpu(dev, memPtr, std::move(deleter), extent); } }; + //! The BufCpu stream-ordered memory allocation trait specialization. template struct AsyncBufAlloc diff --git a/include/alpaka/mem/buf/BufCpuSycl.hpp b/include/alpaka/mem/buf/BufCpuSycl.hpp index d9bf620f62c2..d63eebf540ca 100644 --- a/include/alpaka/mem/buf/BufCpuSycl.hpp +++ b/include/alpaka/mem/buf/BufCpuSycl.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using BufCpuSycl = BufGenericSycl; -} +} // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufCudaRt.hpp b/include/alpaka/mem/buf/BufCudaRt.hpp index c34885f4735d..a5e0020bdb62 100644 --- a/include/alpaka/mem/buf/BufCudaRt.hpp +++ b/include/alpaka/mem/buf/BufCudaRt.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using BufCudaRt = BufUniformCudaHipRt; -} +} // namespace alpaka #endif // ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp index 6a46a4ef9a4c..2dca26f1984f 100644 --- a/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufFpgaSyclIntel.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using BufFpgaSyclIntel = BufGenericSycl; -} +} // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufGenericSycl.hpp b/include/alpaka/mem/buf/BufGenericSycl.hpp index 98f3458e3775..b4a5fd94ed54 100644 --- a/include/alpaka/mem/buf/BufGenericSycl.hpp +++ b/include/alpaka/mem/buf/BufGenericSycl.hpp @@ -252,6 +252,7 @@ namespace alpaka::trait { return getPtrNative(buf); } + static auto getPtrDev(BufCpu& buf, DevGenericSycl const&) -> TElem* { return getPtrNative(buf); diff --git a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp index 907ec4488e7d..dd20f8a39648 100644 --- a/include/alpaka/mem/buf/BufGpuSyclIntel.hpp +++ b/include/alpaka/mem/buf/BufGpuSyclIntel.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using BufGpuSyclIntel = BufGenericSycl; -} +} // namespace alpaka #endif diff --git a/include/alpaka/mem/buf/BufHipRt.hpp b/include/alpaka/mem/buf/BufHipRt.hpp index 9ffacf2a92ce..4a59bc46e5d5 100644 --- a/include/alpaka/mem/buf/BufHipRt.hpp +++ b/include/alpaka/mem/buf/BufHipRt.hpp @@ -13,6 +13,6 @@ namespace alpaka { template using BufHipRt = BufUniformCudaHipRt; -} +} // namespace alpaka #endif // ALPAKA_ACC_GPU_HIP_ENABLED diff --git a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp index 554077562aaf..04dcbb10afd3 100644 --- a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp +++ b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp @@ -140,6 +140,7 @@ namespace alpaka { return buf.m_spMem.get(); } + ALPAKA_FN_HOST static auto getPtrNative(BufUniformCudaHipRt& buf) -> TElem* { return buf.m_spMem.get(); @@ -163,6 +164,7 @@ namespace alpaka throw std::runtime_error("The buffer is not accessible from the given device!"); } } + ALPAKA_FN_HOST static auto getPtrDev( BufUniformCudaHipRt& buf, DevUniformCudaHipRt const& dev) -> TElem* @@ -393,6 +395,7 @@ namespace alpaka return pDev; } + ALPAKA_FN_HOST static auto getPtrDev(BufCpu& buf, DevUniformCudaHipRt const&) -> TElem* { diff --git a/include/alpaka/mem/buf/Traits.hpp b/include/alpaka/mem/buf/Traits.hpp index d3e5c097535a..5494b7fa5619 100644 --- a/include/alpaka/mem/buf/Traits.hpp +++ b/include/alpaka/mem/buf/Traits.hpp @@ -92,7 +92,7 @@ namespace alpaka //! \tparam TDev The type of device to allocate the buffer on. //! \tparam TDim The dimensionality of the buffer to allocate. template - constexpr inline bool hasAsyncBufSupport = trait::HasAsyncBufSupport::value; + inline constexpr bool hasAsyncBufSupport = trait::HasAsyncBufSupport::value; #if BOOST_COMP_CLANG # pragma clang diagnostic pop #endif @@ -153,7 +153,7 @@ namespace alpaka //! //! \tparam TPlatform The platform from which the buffer is accessible. template - constexpr inline bool hasMappedBufSupport = trait::HasMappedBufSupport::value; + inline constexpr bool hasMappedBufSupport = trait::HasMappedBufSupport::value; #if BOOST_COMP_CLANG # pragma clang diagnostic pop #endif diff --git a/include/alpaka/mem/buf/cpu/Copy.hpp b/include/alpaka/mem/buf/cpu/Copy.hpp index 238001de6cca..dd707bdaa227 100644 --- a/include/alpaka/mem/buf/cpu/Copy.hpp +++ b/include/alpaka/mem/buf/cpu/Copy.hpp @@ -17,7 +17,7 @@ namespace alpaka { class DevCpu; -} +} // namespace alpaka namespace alpaka { diff --git a/include/alpaka/mem/buf/sycl/Set.hpp b/include/alpaka/mem/buf/sycl/Set.hpp index b11d2508b933..17187fd0fa84 100644 --- a/include/alpaka/mem/buf/sycl/Set.hpp +++ b/include/alpaka/mem/buf/sycl/Set.hpp @@ -192,7 +192,6 @@ namespace alpaka } // namespace detail - namespace trait { //! The SYCL device memory set trait specialization. diff --git a/include/alpaka/mem/view/Traits.hpp b/include/alpaka/mem/view/Traits.hpp index 091799e832c3..8493a2d343b9 100644 --- a/include/alpaka/mem/view/Traits.hpp +++ b/include/alpaka/mem/view/Traits.hpp @@ -32,7 +32,7 @@ namespace alpaka { //! Calculate the pitches purely from the extents. template - ALPAKA_FN_HOST_ACC constexpr inline auto calculatePitchesFromExtents(Vec const& extent) + ALPAKA_FN_HOST_ACC inline constexpr auto calculatePitchesFromExtents(Vec const& extent) { Vec pitchBytes{}; constexpr auto dim = TIdx{TDim::value}; @@ -66,13 +66,13 @@ namespace alpaka { using ViewIdx = Idx; - ALPAKA_FN_HOST static auto getPitchBytes(TView const& view)->ViewIdx + ALPAKA_FN_HOST static auto getPitchBytes(TView const& view) -> ViewIdx { return getPitchBytesDefault(view); } private: - static auto getPitchBytesDefault(TView const& view)->ViewIdx + static auto getPitchBytesDefault(TView const& view) -> ViewIdx { constexpr auto idx = TIdx::value; constexpr auto viewDim = Dim::value; @@ -353,6 +353,7 @@ namespace alpaka os << rowSuffix; } }; + template struct Print::value - 1u>, TView> { @@ -385,6 +386,7 @@ namespace alpaka } }; } // namespace detail + //! Prints the content of the view to the given queue. // \TODO: Add precision flag. // \TODO: Add column alignment flag. @@ -535,14 +537,12 @@ namespace alpaka constexpr ByteIndexedAccessor() noexcept = default; - ALPAKA_FN_HOST_ACC - constexpr data_handle_type offset(data_handle_type p, size_t i) const noexcept + ALPAKA_FN_HOST_ACC constexpr data_handle_type offset(data_handle_type p, size_t i) const noexcept { return p + i; } - ALPAKA_FN_HOST_ACC - constexpr reference access(data_handle_type p, size_t i) const noexcept + ALPAKA_FN_HOST_ACC constexpr reference access(data_handle_type p, size_t i) const noexcept { assert(i % alignof(ElementType) == 0); # if BOOST_COMP_GNUC diff --git a/include/alpaka/mem/view/ViewAccessOps.hpp b/include/alpaka/mem/view/ViewAccessOps.hpp index 3ec262d3292a..27056678dd17 100644 --- a/include/alpaka/mem/view/ViewAccessOps.hpp +++ b/include/alpaka/mem/view/ViewAccessOps.hpp @@ -28,7 +28,8 @@ namespace alpaka::internal Dim, decltype(getPtrNative(std::declval())), decltype(getPitchesInBytes(std::declval())), - decltype(getExtents(std::declval()))>> = true; + decltype(getExtents(std::declval()))>> + = true; template struct ViewAccessOps diff --git a/include/alpaka/mem/view/ViewPlainPtr.hpp b/include/alpaka/mem/view/ViewPlainPtr.hpp index c3f225a9dc51..532545c9694c 100644 --- a/include/alpaka/mem/view/ViewPlainPtr.hpp +++ b/include/alpaka/mem/view/ViewPlainPtr.hpp @@ -85,8 +85,7 @@ namespace alpaka template struct GetExtents> { - ALPAKA_FN_HOST - auto operator()(ViewPlainPtr const& view) const + ALPAKA_FN_HOST auto operator()(ViewPlainPtr const& view) const { return view.m_extentElements; } @@ -100,6 +99,7 @@ namespace alpaka { return view.m_pMem; } + static auto getPtrNative(ViewPlainPtr& view) -> TElem* { return view.m_pMem; @@ -225,8 +225,7 @@ namespace alpaka template struct GetOffsets> { - ALPAKA_FN_HOST - auto operator()(ViewPlainPtr const&) const -> Vec + ALPAKA_FN_HOST auto operator()(ViewPlainPtr const&) const -> Vec { return Vec::zeros(); } diff --git a/include/alpaka/mem/view/ViewStdArray.hpp b/include/alpaka/mem/view/ViewStdArray.hpp index 123652e8f436..de01ec879378 100644 --- a/include/alpaka/mem/view/ViewStdArray.hpp +++ b/include/alpaka/mem/view/ViewStdArray.hpp @@ -67,6 +67,7 @@ namespace alpaka::trait { return std::data(view); } + ALPAKA_FN_HOST static auto getPtrNative(std::array& view) -> TElem* { return std::data(view); diff --git a/include/alpaka/mem/view/ViewStdVector.hpp b/include/alpaka/mem/view/ViewStdVector.hpp index cc10d214eb17..e09b370c1a22 100644 --- a/include/alpaka/mem/view/ViewStdVector.hpp +++ b/include/alpaka/mem/view/ViewStdVector.hpp @@ -65,6 +65,7 @@ namespace alpaka::trait { return std::data(view); } + ALPAKA_FN_HOST static auto getPtrNative(std::vector& view) -> TElem* { return std::data(view); diff --git a/include/alpaka/mem/view/ViewSubView.hpp b/include/alpaka/mem/view/ViewSubView.hpp index 514d76143246..88857b08cec1 100644 --- a/include/alpaka/mem/view/ViewSubView.hpp +++ b/include/alpaka/mem/view/ViewSubView.hpp @@ -163,6 +163,7 @@ namespace alpaka { return view.m_nativePtr; } + ALPAKA_FN_HOST static auto getPtrNative(ViewSubView& view) -> TElem* { return view.m_nativePtr; diff --git a/include/alpaka/meta/Apply.hpp b/include/alpaka/meta/Apply.hpp index 95023ce3488f..bcffe8cbc4e3 100644 --- a/include/alpaka/meta/Apply.hpp +++ b/include/alpaka/meta/Apply.hpp @@ -10,6 +10,7 @@ namespace alpaka::meta { template class TApplicant> struct ApplyImpl; + template class TList, template class TApplicant, typename... T> struct ApplyImpl, TApplicant> { diff --git a/include/alpaka/meta/CartesianProduct.hpp b/include/alpaka/meta/CartesianProduct.hpp index 5731e447fa0e..dc1a1d68d407 100644 --- a/include/alpaka/meta/CartesianProduct.hpp +++ b/include/alpaka/meta/CartesianProduct.hpp @@ -15,30 +15,35 @@ namespace alpaka::meta { template struct CartesianProductImplHelper; + // Stop condition. template class TList, typename... Ts> struct CartesianProductImplHelper> { using type = TList; }; + // Catches first empty tuple. template class TList, typename... Ts> struct CartesianProductImplHelper>, Ts...> { using type = TList<>; }; + // Catches any empty tuple except first. template class TList, typename... Ts, typename... Rests> struct CartesianProductImplHelper, TList<>, Rests...> { using type = TList<>; }; + template class TList, typename... X, typename H, typename... Rests> struct CartesianProductImplHelper, TList, Rests...> { using type1 = TList>...>; using type = typename CartesianProductImplHelper::type; }; + template< template class TList, @@ -58,12 +63,14 @@ namespace alpaka::meta template class TList, typename... Ts> struct CartesianProductImpl; + // The base case for no input returns an empty sequence. template class TList> struct CartesianProductImpl { using type = TList<>; }; + // R is the return type, Head is the first input list template class TList, template class Head, typename... Ts, typename... Tail> struct CartesianProductImpl, Tail...> diff --git a/include/alpaka/meta/Concatenate.hpp b/include/alpaka/meta/Concatenate.hpp index f639e74c7f19..9133eb659587 100644 --- a/include/alpaka/meta/Concatenate.hpp +++ b/include/alpaka/meta/Concatenate.hpp @@ -10,17 +10,20 @@ namespace alpaka::meta { template struct ConcatenateImpl; + template struct ConcatenateImpl { using type = T; }; + template class TList, typename... As, typename... Bs, typename... TRest> struct ConcatenateImpl, TList, TRest...> { using type = typename ConcatenateImpl, TRest...>::type; }; } // namespace detail + template using Concatenate = typename detail::ConcatenateImpl::type; } // namespace alpaka::meta diff --git a/include/alpaka/meta/CudaVectorArrayWrapper.hpp b/include/alpaka/meta/CudaVectorArrayWrapper.hpp index a2f1a261d47a..57010ef27669 100644 --- a/include/alpaka/meta/CudaVectorArrayWrapper.hpp +++ b/include/alpaka/meta/CudaVectorArrayWrapper.hpp @@ -133,7 +133,8 @@ namespace alpaka::meta struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type { using value_type = TScalar; - constexpr static unsigned size = 4; + static constexpr unsigned size = 4; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) { auto it = std::begin(init); @@ -142,6 +143,7 @@ namespace alpaka::meta this->z = *it++; this->w = *it++; } + template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) { @@ -154,6 +156,7 @@ namespace alpaka::meta this->z = o[2]; this->w = o[3]; } + ALPAKA_FN_HOST_ACC constexpr operator std::array() const { std::array ret; @@ -181,7 +184,8 @@ namespace alpaka::meta struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type { using value_type = TScalar; - constexpr static unsigned size = 3; + static constexpr unsigned size = 3; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) { auto it = std::begin(init); @@ -189,6 +193,7 @@ namespace alpaka::meta this->y = *it++; this->z = *it++; } + template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) { @@ -200,6 +205,7 @@ namespace alpaka::meta this->y = o[1]; this->z = o[2]; } + ALPAKA_FN_HOST_ACC constexpr operator std::array() const { std::array ret; @@ -226,13 +232,15 @@ namespace alpaka::meta struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type { using value_type = TScalar; - constexpr static unsigned size = 2; + static constexpr unsigned size = 2; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) { auto it = std::begin(init); this->x = *it++; this->y = *it++; } + template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) { @@ -243,6 +251,7 @@ namespace alpaka::meta this->x = o[0]; this->y = o[1]; } + ALPAKA_FN_HOST_ACC constexpr operator std::array() const { std::array ret; @@ -268,12 +277,14 @@ namespace alpaka::meta struct CudaVectorArrayWrapper : public detail::CudaVectorArrayTypeTraits::type { using value_type = TScalar; - constexpr static unsigned size = 1; + static constexpr unsigned size = 1; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(std::initializer_list init) { auto it = std::begin(init); this->x = *it; } + template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE CudaVectorArrayWrapper(Other const& o) { @@ -283,6 +294,7 @@ namespace alpaka::meta "Can only convert between vectors of same element type."); this->x = o[0]; } + ALPAKA_FN_HOST_ACC constexpr operator std::array() const { std::array ret; diff --git a/include/alpaka/meta/Filter.hpp b/include/alpaka/meta/Filter.hpp index 79a7ec96063f..4a260777a4e5 100644 --- a/include/alpaka/meta/Filter.hpp +++ b/include/alpaka/meta/Filter.hpp @@ -14,11 +14,13 @@ namespace alpaka::meta { template class TList, template class TPred, typename... Ts> struct FilterImplHelper; + template class TList, template class TPred> struct FilterImplHelper { using type = TList<>; }; + template class TList, template class TPred, typename T, typename... Ts> struct FilterImplHelper { @@ -30,6 +32,7 @@ namespace alpaka::meta template class TPred> struct FilterImpl; + template class TList, template class TPred, typename... Ts> struct FilterImpl, TPred> { diff --git a/include/alpaka/meta/Fold.hpp b/include/alpaka/meta/Fold.hpp index c91c71e4dff1..1a258f4d8d67 100644 --- a/include/alpaka/meta/Fold.hpp +++ b/include/alpaka/meta/Fold.hpp @@ -14,6 +14,7 @@ namespace alpaka::meta { return t; } + ALPAKA_NO_HOST_ACC_WARNING template ALPAKA_FN_HOST_ACC constexpr auto foldr(TFnObj const& f, T0 const& t0, T1 const& t1, Ts const&... ts) diff --git a/include/alpaka/meta/ForEachType.hpp b/include/alpaka/meta/ForEachType.hpp index 6a33410fd1c6..030851f70cc0 100644 --- a/include/alpaka/meta/ForEachType.hpp +++ b/include/alpaka/meta/ForEachType.hpp @@ -14,6 +14,7 @@ namespace alpaka::meta { template struct ForEachTypeHelper; + template class TList> struct ForEachTypeHelper> { @@ -23,6 +24,7 @@ namespace alpaka::meta { } }; + template class TList, typename T, typename... Ts> struct ForEachTypeHelper> { diff --git a/include/alpaka/meta/Functional.hpp b/include/alpaka/meta/Functional.hpp index ca050f919c59..0a5d8486be1e 100644 --- a/include/alpaka/meta/Functional.hpp +++ b/include/alpaka/meta/Functional.hpp @@ -12,8 +12,7 @@ namespace alpaka::meta struct min { ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC - constexpr auto operator()(const T& lhs, const T& rhs) const + ALPAKA_FN_HOST_ACC constexpr auto operator()(T const& lhs, T const& rhs) const { return (lhs < rhs) ? lhs : rhs; } @@ -23,8 +22,7 @@ namespace alpaka::meta struct max { ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC - constexpr auto operator()(const T& lhs, const T& rhs) const + ALPAKA_FN_HOST_ACC constexpr auto operator()(T const& lhs, T const& rhs) const { return (lhs > rhs) ? lhs : rhs; } diff --git a/include/alpaka/meta/IntegerSequence.hpp b/include/alpaka/meta/IntegerSequence.hpp index 3ffba8db4ccf..bc8bfac39049 100644 --- a/include/alpaka/meta/IntegerSequence.hpp +++ b/include/alpaka/meta/IntegerSequence.hpp @@ -16,12 +16,14 @@ namespace alpaka::meta { template struct ConvertIntegerSequence; + template struct ConvertIntegerSequence> { using type = std::integer_sequence(Tvals)...>; }; } // namespace detail + template using ConvertIntegerSequence = typename detail::ConvertIntegerSequence::type; @@ -32,6 +34,7 @@ namespace alpaka::meta { static_assert(!TisSizeNegative, "MakeIntegerSequence requires N to be non-negative."); }; + template struct MakeIntegerSequenceHelper< false, @@ -43,6 +46,7 @@ namespace alpaka::meta { using type = std::integer_sequence; }; + template struct MakeIntegerSequenceHelper< false, @@ -81,6 +85,7 @@ namespace alpaka::meta //! Checks if the values in the index sequence are unique. template struct IntegerSequenceValuesUnique; + //! Checks if the values in the index sequence are unique. template struct IntegerSequenceValuesUnique> @@ -91,12 +96,14 @@ namespace alpaka::meta //! Checks if the integral values are within the given range. template struct IntegralValuesInRange; + //! Checks if the integral values are within the given range. template struct IntegralValuesInRange { static constexpr bool value = true; }; + //! Checks if the integral values are within the given range. template struct IntegralValuesInRange @@ -108,6 +115,7 @@ namespace alpaka::meta //! Checks if the values in the index sequence are within the given range. template struct IntegerSequenceValuesInRange; + //! Checks if the values in the index sequence are within the given range. template struct IntegerSequenceValuesInRange, T, Tmin, Tmax> diff --git a/include/alpaka/meta/Integral.hpp b/include/alpaka/meta/Integral.hpp index 0aa82beb6d62..48f486713336 100644 --- a/include/alpaka/meta/Integral.hpp +++ b/include/alpaka/meta/Integral.hpp @@ -15,11 +15,11 @@ namespace alpaka::meta std::is_integral_v && std::is_integral_v && ( // If the signdness is equal, the sizes have to be greater or equal to be a superset. - ((std::is_unsigned_v == std::is_unsigned_v) - && (sizeof(TSuperset) >= sizeof(TSubset))) + ((std::is_unsigned_v + == std::is_unsigned_v) &&(sizeof(TSuperset) >= sizeof(TSubset))) // If the signdness is non-equal, the superset has to have at least one bit more. - || ((std::is_unsigned_v != std::is_unsigned_v) - && (sizeof(TSuperset) > sizeof(TSubset))))>; + || ((std::is_unsigned_v != std::is_unsigned_v) &&( + sizeof(TSuperset) > sizeof(TSubset))))>; //! The type that has the higher max value. template diff --git a/include/alpaka/meta/Set.hpp b/include/alpaka/meta/Set.hpp index 57841c0c02f9..a4e387cf2ef5 100644 --- a/include/alpaka/meta/Set.hpp +++ b/include/alpaka/meta/Set.hpp @@ -18,11 +18,13 @@ namespace alpaka::meta template struct IsParameterPackSetImpl; + template<> struct IsParameterPackSetImpl<> { static constexpr bool value = true; }; + // Based on code by Roland Bock: https://gist.github.com/rbock/ad8eedde80c060132a18 // Linearly inherits from empty and checks if it has already inherited from this type. template @@ -35,6 +37,7 @@ namespace alpaka::meta static constexpr bool value = Base::value && !std::is_base_of_v, Base>; }; } // namespace detail + //! Trait that tells if the parameter pack contains only unique (no equal) types. template using IsParameterPackSet = detail::IsParameterPackSetImpl; @@ -43,12 +46,14 @@ namespace alpaka::meta { template struct IsSetImpl; + template class TList, typename... Ts> struct IsSetImpl> { static constexpr bool value = IsParameterPackSet::value; }; } // namespace detail + //! Trait that tells if the template contains only unique (no equal) types. template using IsSet = detail::IsSetImpl; diff --git a/include/alpaka/meta/Transform.hpp b/include/alpaka/meta/Transform.hpp index d763175edca1..d7d079a2ea10 100644 --- a/include/alpaka/meta/Transform.hpp +++ b/include/alpaka/meta/Transform.hpp @@ -10,6 +10,7 @@ namespace alpaka::meta { template class TOp> struct TransformImpl; + template class TList, typename... Ts, template class TOp> struct TransformImpl, TOp> { diff --git a/include/alpaka/offset/Traits.hpp b/include/alpaka/offset/Traits.hpp index 1f0f0a6f3e7e..c2edb3bc3d5c 100644 --- a/include/alpaka/offset/Traits.hpp +++ b/include/alpaka/offset/Traits.hpp @@ -20,109 +20,113 @@ namespace alpaka //! //! If not specialized explicitly it returns 0. template - struct [[deprecated("Specialize GetOffsets instead")]] GetOffset{ - ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC static auto getOffset(TOffsets const&) - ->Idx{return static_cast>(0); - } // namespace trait -}; // namespace alpaka + struct [[deprecated("Specialize GetOffsets instead")]] GetOffset + { + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC static auto getOffset(TOffsets const&) -> Idx + { + return static_cast>(0); + } // namespace trait + }; // namespace alpaka -//! The GetOffsets trait for getting the offsets of an object as an alpaka::Vec. -template -struct GetOffsets; -} // namespace trait + //! The GetOffsets trait for getting the offsets of an object as an alpaka::Vec. + template + struct GetOffsets; + } // namespace trait -//! \return The offset in the given dimension. -ALPAKA_NO_HOST_ACC_WARNING -template -[[deprecated("use getOffsets(offsets)[Tidx] instead")]] ALPAKA_FN_HOST_ACC auto getOffset(TOffsets const& offsets) - -> Idx -{ + //! \return The offset in the given dimension. + ALPAKA_NO_HOST_ACC_WARNING + template + [[deprecated("use getOffsets(offsets)[Tidx] instead")]] ALPAKA_FN_HOST_ACC auto getOffset(TOffsets const& offsets) + -> Idx + { #if BOOST_COMP_CLANG || BOOST_COMP_GNUC # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif - return trait::GetOffset, TOffsets>::getOffset(offsets); + return trait::GetOffset, TOffsets>::getOffset(offsets); #if BOOST_COMP_CLANG || BOOST_COMP_GNUC # pragma GCC diagnostic pop #endif -} + } -//! \return The extents of the given object. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getOffsets(T const& object) -> Vec, Idx> -{ - return trait::GetOffsets{}(object); -} + //! \return The extents of the given object. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getOffsets(T const& object) -> Vec, Idx> + { + return trait::GetOffsets{}(object); + } -//! \tparam T has to specialize GetOffsets. -//! \return The offset vector. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC constexpr auto getOffsetVec(T const& object = {}) -> Vec, Idx> -{ - return getOffsets(object); -} + //! \tparam T has to specialize GetOffsets. + //! \return The offset vector. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC constexpr auto getOffsetVec(T const& object = {}) -> Vec, Idx> + { + return getOffsets(object); + } -//! \tparam T has to specialize GetOffsets. -//! \return The offset vector but only the last TDim elements. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC constexpr auto getOffsetVecEnd(T const& object = {}) -> Vec> -{ - static_assert(TDim::value <= Dim::value, "Cannot get more items than the offsets hold"); + //! \tparam T has to specialize GetOffsets. + //! \return The offset vector but only the last TDim elements. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC constexpr auto getOffsetVecEnd(T const& object = {}) -> Vec> + { + static_assert(TDim::value <= Dim::value, "Cannot get more items than the offsets hold"); - auto const o = getOffsets(object); - Vec> v; - for(unsigned i = 0; i < TDim::value; i++) - v[i] = o[(Dim::value - TDim::value) + i]; - return v; -} + auto const o = getOffsets(object); + Vec> v; + for(unsigned i = 0; i < TDim::value; i++) + v[i] = o[(Dim::value - TDim::value) + i]; + return v; + } -//! \return The offset in x dimension. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getOffsetX(TOffsets const& offsets = TOffsets()) -> Idx -{ - return getOffsets(offsets)[Dim::value - 1u]; -} -//! \return The offset in y dimension. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getOffsetY(TOffsets const& offsets = TOffsets()) -> Idx -{ - return getOffsets(offsets)[Dim::value - 2u]; -} -//! \return The offset in z dimension. -ALPAKA_NO_HOST_ACC_WARNING -template -ALPAKA_FN_HOST_ACC auto getOffsetZ(TOffsets const& offsets = TOffsets()) -> Idx -{ - return getOffsets(offsets)[Dim::value - 3u]; -} + //! \return The offset in x dimension. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getOffsetX(TOffsets const& offsets = TOffsets()) -> Idx + { + return getOffsets(offsets)[Dim::value - 1u]; + } -namespace trait -{ - //! The Vec offset get trait specialization. - template - struct GetOffsets> + //! \return The offset in y dimension. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getOffsetY(TOffsets const& offsets = TOffsets()) -> Idx { - ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC constexpr auto operator()(Vec const& offsets) const -> Vec - { - return offsets; - } - }; + return getOffsets(offsets)[Dim::value - 2u]; + } - //! The unsigned integral x offset get trait specialization. - template - struct GetOffsets>> + //! \return The offset in z dimension. + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_HOST_ACC auto getOffsetZ(TOffsets const& offsets = TOffsets()) -> Idx { - ALPAKA_NO_HOST_ACC_WARNING - ALPAKA_FN_HOST_ACC constexpr auto operator()(TIntegral const& i) const + return getOffsets(offsets)[Dim::value - 3u]; + } + + namespace trait + { + //! The Vec offset get trait specialization. + template + struct GetOffsets> { - return Vec{i}; - } - }; -} // namespace trait + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_HOST_ACC constexpr auto operator()(Vec const& offsets) const -> Vec + { + return offsets; + } + }; + + //! The unsigned integral x offset get trait specialization. + template + struct GetOffsets>> + { + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_HOST_ACC constexpr auto operator()(TIntegral const& i) const + { + return Vec{i}; + } + }; + } // namespace trait } // namespace alpaka diff --git a/include/alpaka/queue/Properties.hpp b/include/alpaka/queue/Properties.hpp index 4340895d9a9e..d3e3b554f4c7 100644 --- a/include/alpaka/queue/Properties.hpp +++ b/include/alpaka/queue/Properties.hpp @@ -15,5 +15,6 @@ namespace alpaka //! The caller is NOT waiting until the enqueued task is finished struct NonBlocking; } // namespace property + using namespace property; } // namespace alpaka diff --git a/include/alpaka/queue/QueueCpuBlocking.hpp b/include/alpaka/queue/QueueCpuBlocking.hpp index 3d94b86faf92..8cf4746ced7e 100644 --- a/include/alpaka/queue/QueueCpuBlocking.hpp +++ b/include/alpaka/queue/QueueCpuBlocking.hpp @@ -10,4 +10,4 @@ namespace alpaka { using QueueCpuBlocking = QueueGenericThreadsBlocking; -} +} // namespace alpaka diff --git a/include/alpaka/queue/QueueCpuNonBlocking.hpp b/include/alpaka/queue/QueueCpuNonBlocking.hpp index 27c9799d54df..78eb02895841 100644 --- a/include/alpaka/queue/QueueCpuNonBlocking.hpp +++ b/include/alpaka/queue/QueueCpuNonBlocking.hpp @@ -10,4 +10,4 @@ namespace alpaka { using QueueCpuNonBlocking = QueueGenericThreadsNonBlocking; -} +} // namespace alpaka diff --git a/include/alpaka/queue/QueueCpuSyclBlocking.hpp b/include/alpaka/queue/QueueCpuSyclBlocking.hpp index a2e1fa893d63..63dc39fc0c16 100644 --- a/include/alpaka/queue/QueueCpuSyclBlocking.hpp +++ b/include/alpaka/queue/QueueCpuSyclBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueCpuSyclBlocking = QueueGenericSyclBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp index be7cc2dc06ae..d3fab4dcfbdb 100644 --- a/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp +++ b/include/alpaka/queue/QueueCpuSyclNonBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueCpuSyclNonBlocking = QueueGenericSyclNonBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp index 09a0d2658f74..9ff2e58dc48d 100644 --- a/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp +++ b/include/alpaka/queue/QueueFpgaSyclIntelBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueFpgaSyclIntelBlocking = QueueGenericSyclBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp index 92a6501ed573..20ea0bb83e81 100644 --- a/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp +++ b/include/alpaka/queue/QueueFpgaSyclIntelNonBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueFpgaSyclIntelNonBlocking = QueueGenericSyclNonBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGenericSyclBlocking.hpp b/include/alpaka/queue/QueueGenericSyclBlocking.hpp index 24f9697666e5..bb743226c5d3 100644 --- a/include/alpaka/queue/QueueGenericSyclBlocking.hpp +++ b/include/alpaka/queue/QueueGenericSyclBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { template using QueueGenericSyclBlocking = detail::QueueGenericSyclBase; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp index 024a432a9ebd..b5dcbe84004c 100644 --- a/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp +++ b/include/alpaka/queue/QueueGenericSyclNonBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { template using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGenericThreadsBlocking.hpp b/include/alpaka/queue/QueueGenericThreadsBlocking.hpp index c4dcd3bc6fa3..65361bd0a0ad 100644 --- a/include/alpaka/queue/QueueGenericThreadsBlocking.hpp +++ b/include/alpaka/queue/QueueGenericThreadsBlocking.hpp @@ -42,6 +42,7 @@ namespace alpaka , m_bCurrentlyExecutingTask(false) { } + QueueGenericThreadsBlockingImpl(QueueGenericThreadsBlockingImpl const&) = delete; auto operator=(QueueGenericThreadsBlockingImpl const&) -> QueueGenericThreadsBlockingImpl& = delete; @@ -79,10 +80,12 @@ namespace alpaka dev.registerQueue(m_spQueueImpl); } + auto operator==(QueueGenericThreadsBlocking const& rhs) const -> bool { return (m_spQueueImpl == rhs.m_spQueueImpl); } + auto operator!=(QueueGenericThreadsBlocking const& rhs) const -> bool { return !((*this) == rhs); @@ -100,6 +103,7 @@ namespace alpaka { using type = TDev; }; + //! The CPU blocking device queue device get trait specialization. template struct GetDev> @@ -133,6 +137,7 @@ namespace alpaka queue.m_spQueueImpl->m_bCurrentlyExecutingTask = false; } }; + //! The CPU blocking device queue test trait specialization. template struct Empty> diff --git a/include/alpaka/queue/QueueGenericThreadsNonBlocking.hpp b/include/alpaka/queue/QueueGenericThreadsNonBlocking.hpp index 2687f638badb..4e02a911a383 100644 --- a/include/alpaka/queue/QueueGenericThreadsNonBlocking.hpp +++ b/include/alpaka/queue/QueueGenericThreadsNonBlocking.hpp @@ -45,12 +45,14 @@ namespace alpaka explicit QueueGenericThreadsNonBlockingImpl(TDev dev) : m_dev(std::move(dev)) { } + QueueGenericThreadsNonBlockingImpl(QueueGenericThreadsNonBlockingImpl const&) = delete; QueueGenericThreadsNonBlockingImpl(QueueGenericThreadsNonBlockingImpl&&) = delete; auto operator=(QueueGenericThreadsNonBlockingImpl const&) -> QueueGenericThreadsNonBlockingImpl& = delete; auto operator=(QueueGenericThreadsNonBlockingImpl&&) -> QueueGenericThreadsNonBlockingImpl& = delete; + ~QueueGenericThreadsNonBlockingImpl() override { } @@ -87,10 +89,12 @@ namespace alpaka dev.registerQueue(m_spQueueImpl); } + auto operator==(QueueGenericThreadsNonBlocking const& rhs) const -> bool { return (m_spQueueImpl == rhs.m_spQueueImpl); } + auto operator!=(QueueGenericThreadsNonBlocking const& rhs) const -> bool { return !((*this) == rhs); @@ -108,6 +112,7 @@ namespace alpaka { using type = TDev; }; + //! The CPU non-blocking device queue device get trait specialization. template struct GetDev> @@ -135,6 +140,7 @@ namespace alpaka queue.m_spQueueImpl->m_workerThread.submit(task); } }; + //! The CPU non-blocking device queue test trait specialization. template struct Empty> diff --git a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp index 28b52f22e353..358513e1e2fc 100644 --- a/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp +++ b/include/alpaka/queue/QueueGpuSyclIntelBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueGpuSyclIntelBlocking = QueueGenericSyclBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp index 63fd4d66db5a..f3be15c9dcb2 100644 --- a/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp +++ b/include/alpaka/queue/QueueGpuSyclIntelNonBlocking.hpp @@ -12,6 +12,6 @@ namespace alpaka { using QueueGpuSyclIntelNonBlocking = QueueGenericSyclNonBlocking; -} +} // namespace alpaka #endif diff --git a/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp b/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp index cd29ec5377a2..50d2285c1fcd 100644 --- a/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp +++ b/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp @@ -60,8 +60,10 @@ namespace alpaka ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( TApi::streamCreateWithFlags(&m_UniformCudaHipQueue, TApi::streamNonBlocking)); } + QueueUniformCudaHipRtImpl(QueueUniformCudaHipRtImpl&&) = default; auto operator=(QueueUniformCudaHipRtImpl&&) -> QueueUniformCudaHipRtImpl& = delete; + ALPAKA_FN_HOST ~QueueUniformCudaHipRtImpl() { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -99,10 +101,12 @@ namespace alpaka { dev.registerQueue(m_spQueueImpl); } + ALPAKA_FN_HOST auto operator==(QueueUniformCudaHipRt const& rhs) const -> bool { return (m_spQueueImpl == rhs.m_spQueueImpl); } + ALPAKA_FN_HOST auto operator!=(QueueUniformCudaHipRt const& rhs) const -> bool { return !((*this) == rhs); @@ -112,6 +116,7 @@ namespace alpaka { return m_spQueueImpl->getNativeHandle(); } + auto getCallbackThread() -> core::CallbackThread& { return m_spQueueImpl->m_callbackThread; diff --git a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp index ca62f7250831..38fed8159f82 100644 --- a/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp +++ b/include/alpaka/queue/sycl/QueueGenericSyclBase.hpp @@ -202,7 +202,7 @@ namespace alpaka { template class EventGenericSycl; -} +} // namespace alpaka namespace alpaka::trait { diff --git a/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp b/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp index 2f71313a0de8..c9518b1b9fef 100644 --- a/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp +++ b/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp @@ -11,15 +11,15 @@ namespace alpaka::rand { /// Get high 32 bits of a 64-bit number - ALPAKA_FN_HOST_ACC constexpr static auto high32Bits(std::uint64_t const x) -> std::uint32_t + ALPAKA_FN_HOST_ACC static constexpr auto high32Bits(std::uint64_t const x) -> std::uint32_t { return static_cast(x >> 32); } /// Get low 32 bits of a 64-bit number - ALPAKA_FN_HOST_ACC constexpr static auto low32Bits(std::uint64_t const x) -> std::uint32_t + ALPAKA_FN_HOST_ACC static constexpr auto low32Bits(std::uint64_t const x) -> std::uint32_t { - return static_cast(x & 0xffffffff); + return static_cast(x & 0xffff'ffff); } /** Multiply two 64-bit numbers and split the result into high and low 32 bits, also known as "mulhilo32" @@ -30,7 +30,7 @@ namespace alpaka::rand * @param resultLow low 32 bits of the product a*b */ // TODO: See single-instruction implementations in original Philox source code - ALPAKA_FN_HOST_ACC constexpr static void multiplyAndSplit64to32( + ALPAKA_FN_HOST_ACC static constexpr void multiplyAndSplit64to32( std::uint64_t const a, std::uint64_t const b, std::uint32_t& resultHigh, diff --git a/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp b/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp index df6866ab4fed..3e790fabed6c 100644 --- a/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp +++ b/include/alpaka/rand/Philox/PhiloxBaseCommon.hpp @@ -8,7 +8,6 @@ #include - namespace alpaka::rand::engine { /** Common class for Philox family engines diff --git a/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp b/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp index 9887f0decf51..8c782fb93f9a 100644 --- a/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp +++ b/include/alpaka/rand/Philox/PhiloxBaseTraits.hpp @@ -15,17 +15,17 @@ namespace alpaka { template class AccGpuUniformCudaHipRt; -} +} // namespace alpaka #endif namespace alpaka::rand::engine::trait { template - constexpr inline bool isGPU = false; + inline constexpr bool isGPU = false; #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) template - constexpr inline bool isGPU> = true; + inline constexpr bool isGPU> = true; #endif /** Selection of default backend diff --git a/include/alpaka/rand/Philox/PhiloxConstants.hpp b/include/alpaka/rand/Philox/PhiloxConstants.hpp index 64f274f896b3..f6000d6ceff8 100644 --- a/include/alpaka/rand/Philox/PhiloxConstants.hpp +++ b/include/alpaka/rand/Philox/PhiloxConstants.hpp @@ -9,7 +9,6 @@ #include #include - namespace alpaka::rand::engine { /** Constants used in the Philox algorithm @@ -34,17 +33,19 @@ namespace alpaka::rand::engine public: static constexpr std::uint64_t WEYL_64_0() { - return 0x9E3779B97F4A7C15; ///< First Weyl sequence parameter: the golden ratio + return 0x9E37'79B9'7F4A'7C15; ///< First Weyl sequence parameter: the golden ratio } + static constexpr std::uint64_t WEYL_64_1() { - return 0xBB67AE8584CAA73B; ///< Second Weyl sequence parameter: \f$ \sqrt{3}-1 \f$ + return 0xBB67'AE85'84CA'A73B; ///< Second Weyl sequence parameter: \f$ \sqrt{3}-1 \f$ } static constexpr std::uint32_t WEYL_32_0() { return high32Bits(WEYL_64_0()); ///< 1st Weyl sequence parameter, 32 bits } + static constexpr std::uint32_t WEYL_32_1() { return high32Bits(WEYL_64_1()); ///< 2nd Weyl sequence parameter, 32 bits @@ -52,11 +53,12 @@ namespace alpaka::rand::engine static constexpr std::uint32_t MULTIPLITER_4x32_0() { - return 0xCD9E8D57; ///< First Philox S-box multiplier + return 0xCD9E'8D57; ///< First Philox S-box multiplier } + static constexpr std::uint32_t MULTIPLITER_4x32_1() { - return 0xD2511F53; ///< Second Philox S-box multiplier + return 0xD251'1F53; ///< Second Philox S-box multiplier } }; } // namespace alpaka::rand::engine diff --git a/include/alpaka/rand/Philox/PhiloxStateless.hpp b/include/alpaka/rand/Philox/PhiloxStateless.hpp index ab8db93b0fef..094b9d3de117 100644 --- a/include/alpaka/rand/Philox/PhiloxStateless.hpp +++ b/include/alpaka/rand/Philox/PhiloxStateless.hpp @@ -10,7 +10,6 @@ #include - namespace alpaka::rand::engine { /** Philox algorithm parameters @@ -42,10 +41,12 @@ namespace alpaka::rand::engine { return TParams::rounds; } + static constexpr unsigned vectorSize() { return TParams::counterSize; } + static constexpr unsigned numberWidth() { return TParams::width; diff --git a/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp b/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp index dc2866640325..49a7fa1b1923 100644 --- a/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp +++ b/include/alpaka/rand/Philox/PhiloxStatelessVector.hpp @@ -8,7 +8,6 @@ #include - namespace alpaka::rand::engine { /** Philox-stateless engine generating a vector of numbers diff --git a/include/alpaka/rand/Philox/PhiloxVector.hpp b/include/alpaka/rand/Philox/PhiloxVector.hpp index b20f0c2d875f..648399caa022 100644 --- a/include/alpaka/rand/Philox/PhiloxVector.hpp +++ b/include/alpaka/rand/Philox/PhiloxVector.hpp @@ -9,7 +9,6 @@ #include - namespace alpaka::rand::engine { /** Philox state for vector generator diff --git a/include/alpaka/rand/RandDefault.hpp b/include/alpaka/rand/RandDefault.hpp index c19613d288fc..6cb1701810b2 100644 --- a/include/alpaka/rand/RandDefault.hpp +++ b/include/alpaka/rand/RandDefault.hpp @@ -31,6 +31,7 @@ namespace alpaka::rand { using type = std::uint32_t; }; + template<> struct BitsType { @@ -173,6 +174,7 @@ namespace alpaka::rand return {acc}; } }; + //! The GPU device random number float uniform distribution get trait specialization. template struct CreateUniformReal>> @@ -182,6 +184,7 @@ namespace alpaka::rand return {}; } }; + //! The GPU device random number integer uniform distribution get trait specialization. template struct CreateUniformUint>> diff --git a/include/alpaka/rand/RandGenericSycl.hpp b/include/alpaka/rand/RandGenericSycl.hpp index c5edd6721df2..c114a4fd7ca6 100644 --- a/include/alpaka/rand/RandGenericSycl.hpp +++ b/include/alpaka/rand/RandGenericSycl.hpp @@ -89,10 +89,12 @@ namespace alpaka::rand { return std::numeric_limits::min(); } + ALPAKA_FN_HOST_ACC static result_type max() { return std::numeric_limits::max(); } + result_type operator()() { oneapi::dpl::uniform_real_distribution distr; diff --git a/include/alpaka/rand/RandPhilox.hpp b/include/alpaka/rand/RandPhilox.hpp index 85cda832bb75..72cf99b297b0 100644 --- a/include/alpaka/rand/RandPhilox.hpp +++ b/include/alpaka/rand/RandPhilox.hpp @@ -54,14 +54,17 @@ namespace alpaka::rand // STL UniformRandomBitGenerator concept // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator using result_type = std::uint32_t; + ALPAKA_FN_HOST_ACC constexpr auto min() -> result_type { return 0; } + ALPAKA_FN_HOST_ACC constexpr auto max() -> result_type { return std::numeric_limits::max(); } + ALPAKA_FN_HOST_ACC auto operator()() -> result_type { return engineVariant(); @@ -110,14 +113,17 @@ namespace alpaka::rand using ResultInt = std::uint32_t; using ResultVec = decltype(std::declval()()); + ALPAKA_FN_HOST_ACC constexpr auto min() -> ResultInt { return 0; } + ALPAKA_FN_HOST_ACC constexpr auto max() -> ResultInt { return std::numeric_limits::max(); } + ALPAKA_FN_HOST_ACC auto operator()() -> ResultVec { return engineVariant(); diff --git a/include/alpaka/rand/RandStdLib.hpp b/include/alpaka/rand/RandStdLib.hpp index c9b7e1c01696..ec507e0bf9ef 100644 --- a/include/alpaka/rand/RandStdLib.hpp +++ b/include/alpaka/rand/RandStdLib.hpp @@ -19,6 +19,7 @@ namespace alpaka::rand class TinyMersenneTwister : public concepts::Implements { }; + using RandStdLib = TinyMersenneTwister; //! The standard library mersenne twister implementation. @@ -54,14 +55,17 @@ namespace alpaka::rand // STL UniformRandomBitGenerator concept interface using result_type = std::mt19937::result_type; - ALPAKA_FN_HOST constexpr static auto min() -> result_type + + ALPAKA_FN_HOST static constexpr auto min() -> result_type { return std::mt19937::min(); } - ALPAKA_FN_HOST constexpr static auto max() -> result_type + + ALPAKA_FN_HOST static constexpr auto max() -> result_type { return std::mt19937::max(); } + ALPAKA_FN_HOST auto operator()() -> result_type { return state(); @@ -96,14 +100,17 @@ namespace alpaka::rand // STL UniformRandomBitGenerator concept interface using result_type = TinyMTengine::result_type; - ALPAKA_FN_HOST constexpr static auto min() -> result_type + + ALPAKA_FN_HOST static constexpr auto min() -> result_type { return TinyMTengine::min(); } - ALPAKA_FN_HOST constexpr static auto max() -> result_type + + ALPAKA_FN_HOST static constexpr auto max() -> result_type { return TinyMTengine::max(); } + ALPAKA_FN_HOST auto operator()() -> result_type { return state(); @@ -129,14 +136,17 @@ namespace alpaka::rand // STL UniformRandomBitGenerator concept interface using result_type = std::random_device::result_type; - ALPAKA_FN_HOST constexpr static auto min() -> result_type + + ALPAKA_FN_HOST static constexpr auto min() -> result_type { return std::random_device::min(); } - ALPAKA_FN_HOST constexpr static auto max() -> result_type + + ALPAKA_FN_HOST static constexpr auto max() -> result_type { return std::random_device::max(); } + ALPAKA_FN_HOST auto operator()() -> result_type { return state(); @@ -202,6 +212,7 @@ namespace alpaka::rand return {}; } }; + //! The CPU device random number float uniform distribution get trait specialization. template struct CreateUniformReal>> @@ -211,6 +222,7 @@ namespace alpaka::rand return {}; } }; + //! The CPU device random number integer uniform distribution get trait specialization. template struct CreateUniformUint>> diff --git a/include/alpaka/rand/RandUniformCudaHipRand.hpp b/include/alpaka/rand/RandUniformCudaHipRand.hpp index 23a463f735f9..97090be3ed6d 100644 --- a/include/alpaka/rand/RandUniformCudaHipRand.hpp +++ b/include/alpaka/rand/RandUniformCudaHipRand.hpp @@ -23,7 +23,7 @@ # pragma clang diagnostic ignored "-Wduplicate-decl-specifier" # endif -# if HIP_VERSION >= 50200000 +# if HIP_VERSION >= 50'200'000 # include # else # include @@ -113,14 +113,16 @@ namespace alpaka::rand # else using result_type = decltype(hiprand(&state)); # endif - ALPAKA_FN_HOST_ACC constexpr static result_type min() + ALPAKA_FN_HOST_ACC static constexpr result_type min() { return std::numeric_limits::min(); } - ALPAKA_FN_HOST_ACC constexpr static result_type max() + + ALPAKA_FN_HOST_ACC static constexpr result_type max() { return std::numeric_limits::max(); } + __device__ result_type operator()() { # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/include/alpaka/rand/TinyMT/Engine.hpp b/include/alpaka/rand/TinyMT/Engine.hpp index 915128e0c2ca..9f5d05ea4b45 100644 --- a/include/alpaka/rand/TinyMT/Engine.hpp +++ b/include/alpaka/rand/TinyMT/Engine.hpp @@ -23,9 +23,9 @@ namespace alpaka::rand::engine::cpu void seed(result_type value = default_seed()) { // parameters from TinyMT/jump/sample.c - prng.mat1 = 0x8f7011ee; - prng.mat2 = 0xfc78ff1f; - prng.tmat = 0x3793fdff; + prng.mat1 = 0x8f70'11ee; + prng.mat2 = 0xfc78'ff1f; + prng.tmat = 0x3793'fdff; tinymt32_init(&prng, value); } diff --git a/include/alpaka/rand/Traits.hpp b/include/alpaka/rand/Traits.hpp index 87d2b0e69cf3..1ccd1ba479c3 100644 --- a/include/alpaka/rand/Traits.hpp +++ b/include/alpaka/rand/Traits.hpp @@ -45,6 +45,7 @@ namespace alpaka::rand using ImplementationBase = concepts::ImplementationBase; return trait::CreateNormalReal::createNormalReal(rand); } + //! \return A uniform floating point distribution [0.0, 1.0). ALPAKA_NO_HOST_ACC_WARNING template @@ -55,6 +56,7 @@ namespace alpaka::rand using ImplementationBase = concepts::ImplementationBase; return trait::CreateUniformReal::createUniformReal(rand); } + //! \return A uniform integer distribution [0, UINT_MAX]. ALPAKA_NO_HOST_ACC_WARNING template @@ -79,6 +81,7 @@ namespace alpaka::rand template struct CreateDefault; } // namespace trait + //! \return A default random number generator engine. Its type is guaranteed to be trivially copyable. //! Except HIP accelerator for HIP versions below 5.2 as its internal state was not trivially copyable. //! The limitation was discussed in PR #1778. diff --git a/include/alpaka/test/event/EventHostManualTrigger.hpp b/include/alpaka/test/event/EventHostManualTrigger.hpp index 3c4cf51406ae..4ce363907475 100644 --- a/include/alpaka/test/event/EventHostManualTrigger.hpp +++ b/include/alpaka/test/event/EventHostManualTrigger.hpp @@ -47,6 +47,7 @@ namespace alpaka::test , m_bIsReady(true) { } + EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const& other) = delete; auto operator=(EventHostManualTriggerCpuImpl const&) -> EventHostManualTriggerCpuImpl& = delete; @@ -84,11 +85,13 @@ namespace alpaka::test : m_spEventImpl(std::make_shared>(dev)) { } + //! Equality comparison operator. ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const& rhs) const -> bool { return (m_spEventImpl == rhs.m_spEventImpl); } + //! Inequality comparison operator. ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const& rhs) const -> bool { @@ -241,7 +244,6 @@ namespace alpaka::trait # include "alpaka/core/Cuda.hpp" - namespace alpaka::test { namespace uniform_cuda_hip::detail @@ -266,8 +268,10 @@ namespace alpaka::test ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( cudaMemset(m_devMem, static_cast(0u), static_cast(sizeof(int32_t)))); } + EventHostManualTriggerCudaImpl(EventHostManualTriggerCudaImpl const&) = delete; auto operator=(EventHostManualTriggerCudaImpl const&) -> EventHostManualTriggerCudaImpl& = delete; + ALPAKA_FN_HOST ~EventHostManualTriggerCudaImpl() { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -309,10 +313,12 @@ namespace alpaka::test { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; } + ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const& rhs) const -> bool { return (m_spEventImpl == rhs.m_spEventImpl); } + ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const& rhs) const -> bool { return !((*this) == rhs); @@ -336,6 +342,7 @@ namespace alpaka::test { using type = test::EventHostManualTriggerCuda; }; + //! The CPU event host manual trigger support get trait specialization. template<> struct IsEventHostManualTriggerSupported @@ -426,10 +433,11 @@ namespace alpaka::trait ALPAKA_CUDA_DRV_CHECK(detail::streamWaitValue( static_cast(queue.getNativeHandle()), reinterpret_cast(event.m_spEventImpl->m_devMem), - 0x01010101u, + 0x0101'0101u, CU_STREAM_WAIT_VALUE_GEQ)); } }; + template<> struct Enqueue { @@ -458,7 +466,7 @@ namespace alpaka::trait ALPAKA_CUDA_DRV_CHECK(detail::streamWaitValue( static_cast(queue.getNativeHandle()), reinterpret_cast(event.m_spEventImpl->m_devMem), - 0x01010101u, + 0x0101'0101u, CU_STREAM_WAIT_VALUE_GEQ)); } }; @@ -497,8 +505,10 @@ namespace alpaka::test ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( hipMemset(m_devMem, static_cast(0u), static_cast(sizeof(int32_t)))); } + EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl const&) = delete; auto operator=(EventHostManualTriggerHipImpl const&) -> EventHostManualTriggerHipImpl& = delete; + ALPAKA_FN_HOST ~EventHostManualTriggerHipImpl() { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; @@ -540,10 +550,12 @@ namespace alpaka::test { ALPAKA_DEBUG_MINIMAL_LOG_SCOPE; } + ALPAKA_FN_HOST auto operator==(EventHostManualTriggerHip const& rhs) const -> bool { return (m_spEventImpl == rhs.m_spEventImpl); } + ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerHip const& rhs) const -> bool { return !((*this) == rhs); @@ -641,7 +653,7 @@ namespace alpaka::trait std::cerr << "[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not " "available.\n"; # endif - while(hostMem < 0x01010101) + while(hostMem < 0x0101'0101) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpyDtoHAsync( &hostMem, @@ -689,7 +701,7 @@ namespace alpaka::trait std::this_thread::sleep_for(std::chrono::milliseconds(10u)); ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault)); - } while(hmem < 0x01010101u); + } while(hmem < 0x0101'0101u); } }; } // namespace alpaka::trait diff --git a/include/alpaka/test/queue/Queue.hpp b/include/alpaka/test/queue/Queue.hpp index a181cb660d64..22432fc719b4 100644 --- a/include/alpaka/test/queue/Queue.hpp +++ b/include/alpaka/test/queue/Queue.hpp @@ -156,6 +156,7 @@ namespace alpaka::test # endif #endif } // namespace trait + //! The queue type that should be used for the given accelerator. template using IsBlockingQueue = trait::IsBlockingQueue; diff --git a/include/alpaka/vec/Traits.hpp b/include/alpaka/vec/Traits.hpp index f9a4e7d329d5..531fe04847ce 100644 --- a/include/alpaka/vec/Traits.hpp +++ b/include/alpaka/vec/Traits.hpp @@ -43,6 +43,7 @@ namespace alpaka { return trait::SubVecFromIndices::subVecFromIndices(vec); } + //! \tparam TVec has to specialize SubVecFromIndices. //! \return The sub-vector consisting of the first N elements of the source vector. ALPAKA_NO_HOST_ACC_WARNING @@ -57,6 +58,7 @@ namespace alpaka using IdxSubSequence = std::make_integer_sequence; return subVecFromIndices(vec); } + //! \tparam TVec has to specialize SubVecFromIndices. //! \return The sub-vector consisting of the last N elements of the source vector. ALPAKA_NO_HOST_ACC_WARNING diff --git a/include/alpaka/warp/WarpUniformCudaHipBuiltIn.hpp b/include/alpaka/warp/WarpUniformCudaHipBuiltIn.hpp index 04f3b2aa5cfc..1e7d98d82d7d 100644 --- a/include/alpaka/warp/WarpUniformCudaHipBuiltIn.hpp +++ b/include/alpaka/warp/WarpUniformCudaHipBuiltIn.hpp @@ -126,6 +126,7 @@ namespace alpaka::warp return __shfl(val, srcLane, width); # endif } + //------------------------------------------------------------- __device__ static auto shfl( [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp, diff --git a/include/alpaka/workdiv/Traits.hpp b/include/alpaka/workdiv/Traits.hpp index 50383c93f79a..211d6889ee86 100644 --- a/include/alpaka/workdiv/Traits.hpp +++ b/include/alpaka/workdiv/Traits.hpp @@ -49,6 +49,7 @@ namespace alpaka * alpaka::getWorkDiv(workDiv); } }; + //! The work div grid element extent trait specialization. template struct GetWorkDiv @@ -60,6 +61,7 @@ namespace alpaka * alpaka::getWorkDiv(workDiv); } }; + //! The work div block element extent trait specialization. template struct GetWorkDiv diff --git a/include/alpaka/workdiv/WorkDivHelpers.hpp b/include/alpaka/workdiv/WorkDivHelpers.hpp index b7b8bbbda3e5..99f136f4b38a 100644 --- a/include/alpaka/workdiv/WorkDivHelpers.hpp +++ b/include/alpaka/workdiv/WorkDivHelpers.hpp @@ -51,6 +51,7 @@ namespace alpaka --divisor; return divisor; } + //! \param val The value to find divisors of. //! \param maxDivisor The maximum. //! \return A list of all divisors less then or equal to the given maximum. @@ -370,6 +371,7 @@ namespace alpaka return true; } + //! \tparam TAcc The accelerator to test the validity on. //! \param dev The device to test the work division for validity on. //! \param workDiv The work division to test for validity. diff --git a/include/alpaka/workdiv/WorkDivMembers.hpp b/include/alpaka/workdiv/WorkDivMembers.hpp index f651c97c472e..f81a15676ab1 100644 --- a/include/alpaka/workdiv/WorkDivMembers.hpp +++ b/include/alpaka/workdiv/WorkDivMembers.hpp @@ -20,6 +20,7 @@ namespace alpaka { public: ALPAKA_FN_HOST_ACC WorkDivMembers() = delete; + ALPAKA_NO_HOST_ACC_WARNING template ALPAKA_FN_HOST_ACC explicit WorkDivMembers( @@ -31,6 +32,7 @@ namespace alpaka , m_threadElemExtent(getExtentVecEnd(threadElemExtent)) { } + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(WorkDivMembers const& other) : m_gridBlockExtent(other.m_gridBlockExtent) @@ -38,6 +40,7 @@ namespace alpaka , m_threadElemExtent(other.m_threadElemExtent) { } + ALPAKA_NO_HOST_ACC_WARNING template ALPAKA_FN_HOST_ACC explicit WorkDivMembers(TWorkDiv const& other) @@ -50,6 +53,7 @@ namespace alpaka WorkDivMembers(WorkDivMembers&&) = default; auto operator=(WorkDivMembers const&) -> WorkDivMembers& = default; auto operator=(WorkDivMembers&&) -> WorkDivMembers& = default; + ALPAKA_NO_HOST_ACC_WARNING template ALPAKA_FN_HOST_ACC auto operator=(TWorkDiv const& other) -> WorkDivMembers& diff --git a/test/analysis/headerCheck/src/main.cpp b/test/analysis/headerCheck/src/main.cpp index 7611ccf3b5bb..4a2d28170421 100644 --- a/test/analysis/headerCheck/src/main.cpp +++ b/test/analysis/headerCheck/src/main.cpp @@ -4,7 +4,6 @@ #include - TEST_CASE("headerCheckMain", "[headerCheck]") { REQUIRE(true); diff --git a/test/integ/mandelbrot/src/mandelbrot.cpp b/test/integ/mandelbrot/src/mandelbrot.cpp index 9cfeac4bdacf..b56b11ca5802 100644 --- a/test/integ/mandelbrot/src/mandelbrot.cpp +++ b/test/integ/mandelbrot/src/mandelbrot.cpp @@ -25,26 +25,31 @@ class SimpleComplex ALPAKA_FN_HOST_ACC SimpleComplex(T const& a, T const& b) : r(a), i(b) { } + ALPAKA_NO_HOST_ACC_WARNING [[nodiscard]] ALPAKA_FN_INLINE ALPAKA_FN_HOST_ACC auto absSq() const -> T { return r * r + i * i; } + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto operator*(SimpleComplex const& a) -> SimpleComplex { return SimpleComplex(r * a.r - i * a.i, i * a.r + r * a.i); } + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto operator*(float const& a) -> SimpleComplex { return SimpleComplex(r * a, i * a); } + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto operator+(SimpleComplex const& a) -> SimpleComplex { return SimpleComplex(r + a.r, i + a.i); } + ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto operator+(float const& a) -> SimpleComplex { @@ -127,6 +132,7 @@ class MandelbrotKernel #endif } } + //! \return The number of iterations until the Mandelbrot iteration with the given Value reaches the absolute value //! of 2. //! Only does maxIterations steps and returns maxIterations if the value would be higher. @@ -150,7 +156,7 @@ class MandelbrotKernel std::uint32_t const& g, std::uint32_t const& b) -> std::uint32_t { - return 0xFF000000 | (r << 16) | (g << 8) | b; + return 0xFF00'0000 | (r << 16) | (g << 8) | b; } #ifdef ALPAKA_MANDELBROT_TEST_CONTINOUS_COLOR_MAPPING diff --git a/test/unit/acc/src/AccTagTest.cpp b/test/unit/acc/src/AccTagTest.cpp index 930463f254a3..49024fc73a96 100644 --- a/test/unit/acc/src/AccTagTest.cpp +++ b/test/unit/acc/src/AccTagTest.cpp @@ -103,18 +103,19 @@ std::string specialized_function_2(TTag) // is required because of -Werror=missing-declarations std::string specialized_function_2(alpaka::TagCpuSerial); + std::string specialized_function_2(alpaka::TagCpuSerial) { return "Serial"; } std::string specialized_function_2(alpaka::TagGpuCudaRt); + std::string specialized_function_2(alpaka::TagGpuCudaRt) { return "CUDA"; } - TEMPLATE_LIST_TEST_CASE("specialization of functions with tags", "[acc][tag]", TestAccs) { using TestAcc = TestType; diff --git a/test/unit/core/src/ConceptsTest.cpp b/test/unit/core/src/ConceptsTest.cpp index 1d28ab49c2ca..801f44655f45 100644 --- a/test/unit/core/src/ConceptsTest.cpp +++ b/test/unit/core/src/ConceptsTest.cpp @@ -11,6 +11,7 @@ struct ConceptExample { }; + struct ConceptNonMatchingExample { }; diff --git a/test/unit/kernel/src/KernelLambda.cpp b/test/unit/kernel/src/KernelLambda.cpp index 77e45398bc4e..d07ba66b74b8 100644 --- a/test/unit/kernel/src/KernelLambda.cpp +++ b/test/unit/kernel/src/KernelLambda.cpp @@ -77,7 +77,6 @@ struct TestTemplateCapture } }; - TEST_CASE("lambdaKernelIsWorking", "[kernel]") { alpaka::meta::forEachType(TestTemplateLambda()); diff --git a/test/unit/kernel/src/KernelWithOmpSchedule.cpp b/test/unit/kernel/src/KernelWithOmpSchedule.cpp index c65aff5ab0ad..217d960b3841 100644 --- a/test/unit/kernel/src/KernelWithOmpSchedule.cpp +++ b/test/unit/kernel/src/KernelWithOmpSchedule.cpp @@ -54,6 +54,7 @@ struct KernelWithStaticMemberOmpScheduleChunkSize : KernelWithMemberOmpScheduleK { static int const ompScheduleChunkSize; }; + // In this case, the member has to be defined externally template int const KernelWithStaticMemberOmpScheduleChunkSize::ompScheduleChunkSize = 2; diff --git a/test/unit/kernel/src/KernelWithoutTemplatedAccParam.cpp b/test/unit/kernel/src/KernelWithoutTemplatedAccParam.cpp index 58ade4557d1a..64d11308711c 100644 --- a/test/unit/kernel/src/KernelWithoutTemplatedAccParam.cpp +++ b/test/unit/kernel/src/KernelWithoutTemplatedAccParam.cpp @@ -38,8 +38,7 @@ using AccGpu = alpaka::AccGpuCudaRt; #if defined(ALPAKA_ACC_CPU_SERIAL_ENABLED) struct KernelNoTemplateCpu { - ALPAKA_FN_ACC - auto operator()(AccCpu const& acc, bool* success) const -> void + ALPAKA_FN_ACC auto operator()(AccCpu const& acc, bool* success) const -> void { ALPAKA_CHECK( *success, diff --git a/test/unit/math/src/Buffer.hpp b/test/unit/math/src/Buffer.hpp index 734be6f75a8b..2bfc7f156ba5 100644 --- a/test/unit/math/src/Buffer.hpp +++ b/test/unit/math/src/Buffer.hpp @@ -86,20 +86,17 @@ namespace alpaka alpaka::memcpy(queue, hostBuffer, devBuffer); } - ALPAKA_FN_ACC - auto operator()(size_t idx, TAcc const& /* acc */) const -> TData& + ALPAKA_FN_ACC auto operator()(size_t idx, TAcc const& /* acc */) const -> TData& { return pDevBuffer[idx]; } - ALPAKA_FN_HOST - auto operator()(size_t idx) const -> TData& + ALPAKA_FN_HOST auto operator()(size_t idx) const -> TData& { return pHostBuffer[idx]; } - ALPAKA_FN_HOST - friend auto operator<<(std::ostream& os, Buffer const& buffer) -> std::ostream& + ALPAKA_FN_HOST friend auto operator<<(std::ostream& os, Buffer const& buffer) -> std::ostream& { os << "capacity: " << capacity << "\n"; for(size_t i = 0; i < capacity; ++i) diff --git a/test/unit/math/src/TestTemplate.hpp b/test/unit/math/src/TestTemplate.hpp index 740783b0b26d..aa62afab8d2e 100644 --- a/test/unit/math/src/TestTemplate.hpp +++ b/test/unit/math/src/TestTemplate.hpp @@ -65,9 +65,8 @@ struct TestTemplate auto const seed = rd(); INFO( "testing" - << " acc:" << alpaka::core::demangled << " data type:" - << alpaka::core::demangled << " functor:" << alpaka::core::demangled << " seed:" - << seed); + << " acc:" << alpaka::core::demangled << " data type:" << alpaka::core::demangled + << " functor:" << alpaka::core::demangled << " seed:" << seed); // SETUP (defines and initialising) // DevAcc is defined in Buffer.hpp too. diff --git a/test/unit/math/src/mathADL.cpp b/test/unit/math/src/mathADL.cpp index 8c9b0ce9aedb..59acf839cff3 100644 --- a/test/unit/math/src/mathADL.cpp +++ b/test/unit/math/src/mathADL.cpp @@ -68,204 +68,238 @@ namespace custom }; ALPAKA_FN_HOST_ACC auto abs(Custom c); + ALPAKA_FN_HOST_ACC auto abs(Custom c) { return Custom::Abs | c; } ALPAKA_FN_HOST_ACC auto acos(Custom c); + ALPAKA_FN_HOST_ACC auto acos(Custom c) { return Custom::Acos | c; } ALPAKA_FN_HOST_ACC auto acosh(Custom c); + ALPAKA_FN_HOST_ACC auto acosh(Custom c) { return Custom::Acosh | c; } ALPAKA_FN_HOST_ACC auto arg(Custom c); + ALPAKA_FN_HOST_ACC auto arg(Custom c) { return Custom::Arg | c; } ALPAKA_FN_HOST_ACC auto asin(Custom c); + ALPAKA_FN_HOST_ACC auto asin(Custom c) { return Custom::Asin | c; } ALPAKA_FN_HOST_ACC auto asinh(Custom c); + ALPAKA_FN_HOST_ACC auto asinh(Custom c) { return Custom::Asinh | c; } ALPAKA_FN_HOST_ACC auto atan(Custom c); + ALPAKA_FN_HOST_ACC auto atan(Custom c) { return Custom::Atan | c; } ALPAKA_FN_HOST_ACC auto atanh(Custom c); + ALPAKA_FN_HOST_ACC auto atanh(Custom c) { return Custom::Atanh | c; } ALPAKA_FN_HOST_ACC auto atan2(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto atan2(Custom a, Custom b) { return Custom::Atan2 | a | b; } ALPAKA_FN_HOST_ACC auto cbrt(Custom c); + ALPAKA_FN_HOST_ACC auto cbrt(Custom c) { return Custom::Cbrt | c; } ALPAKA_FN_HOST_ACC auto ceil(Custom c); + ALPAKA_FN_HOST_ACC auto ceil(Custom c) { return Custom::Ceil | c; } ALPAKA_FN_HOST_ACC auto conj(Custom c); + ALPAKA_FN_HOST_ACC auto conj(Custom c) { return Custom::Conj | c; } ALPAKA_FN_HOST_ACC auto copysign(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto copysign(Custom a, Custom b) { return Custom::Copysign | a | b; } ALPAKA_FN_HOST_ACC auto cos(Custom c); + ALPAKA_FN_HOST_ACC auto cos(Custom c) { return Custom::Cos | c; } ALPAKA_FN_HOST_ACC auto cosh(Custom c); + ALPAKA_FN_HOST_ACC auto cosh(Custom c) { return Custom::Cosh | c; } ALPAKA_FN_HOST_ACC auto erf(Custom c); + ALPAKA_FN_HOST_ACC auto erf(Custom c) { return Custom::Erf | c; } ALPAKA_FN_HOST_ACC auto exp(Custom c); + ALPAKA_FN_HOST_ACC auto exp(Custom c) { return Custom::Exp | c; } ALPAKA_FN_HOST_ACC auto floor(Custom c); + ALPAKA_FN_HOST_ACC auto floor(Custom c) { return Custom::Floor | c; } ALPAKA_FN_HOST_ACC auto fma(Custom a, Custom b, Custom c); + ALPAKA_FN_HOST_ACC auto fma(Custom a, Custom b, Custom c) { return Custom::Fma | a | b | c; } ALPAKA_FN_HOST_ACC auto fmod(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto fmod(Custom a, Custom b) { return Custom::Fmod | a | b; } ALPAKA_FN_HOST_ACC auto log(Custom c); + ALPAKA_FN_HOST_ACC auto log(Custom c) { return Custom::Log | c; } ALPAKA_FN_HOST_ACC auto log2(Custom c); + ALPAKA_FN_HOST_ACC auto log2(Custom c) { return Custom::Log2 | c; } ALPAKA_FN_HOST_ACC auto log10(Custom c); + ALPAKA_FN_HOST_ACC auto log10(Custom c) { return Custom::Log10 | c; } ALPAKA_FN_HOST_ACC auto max(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto max(Custom a, Custom b) { return Custom::Max | a | b; } ALPAKA_FN_HOST_ACC auto min(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto min(Custom a, Custom b) { return Custom::Min | a | b; } ALPAKA_FN_HOST_ACC auto pow(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto pow(Custom a, Custom b) { return Custom::Pow | a | b; } ALPAKA_FN_HOST_ACC auto remainder(Custom a, Custom b); + ALPAKA_FN_HOST_ACC auto remainder(Custom a, Custom b) { return Custom::Remainder | a | b; } ALPAKA_FN_HOST_ACC auto round(Custom c); + ALPAKA_FN_HOST_ACC auto round(Custom c) { return Custom::Round | c; } ALPAKA_FN_HOST_ACC auto lround(Custom c); + ALPAKA_FN_HOST_ACC auto lround(Custom c) { return Custom::Lround | c; } ALPAKA_FN_HOST_ACC auto llround(Custom c); + ALPAKA_FN_HOST_ACC auto llround(Custom c) { return Custom::Llround | c; } ALPAKA_FN_HOST_ACC auto rsqrt(Custom c); + ALPAKA_FN_HOST_ACC auto rsqrt(Custom c) { return Custom::Rsqrt | c; } ALPAKA_FN_HOST_ACC auto sin(Custom c); + ALPAKA_FN_HOST_ACC auto sin(Custom c) { return Custom::Sin | c; } ALPAKA_FN_HOST_ACC auto sinh(Custom c); + ALPAKA_FN_HOST_ACC auto sinh(Custom c) { return Custom::Sinh | c; } ALPAKA_FN_HOST_ACC void sincos(Custom c, Custom& a, Custom& b); + ALPAKA_FN_HOST_ACC void sincos(Custom c, Custom& a, Custom& b) { a = static_cast(Custom::Sincos | c | Custom::Arg2); @@ -273,24 +307,28 @@ namespace custom } ALPAKA_FN_HOST_ACC auto sqrt(Custom c); + ALPAKA_FN_HOST_ACC auto sqrt(Custom c) { return Custom::Sqrt | c; } ALPAKA_FN_HOST_ACC auto tan(Custom c); + ALPAKA_FN_HOST_ACC auto tan(Custom c) { return Custom::Tan | c; } ALPAKA_FN_HOST_ACC auto tanh(Custom c); + ALPAKA_FN_HOST_ACC auto tanh(Custom c) { return Custom::Tanh | c; } ALPAKA_FN_HOST_ACC auto trunc(Custom c); + ALPAKA_FN_HOST_ACC auto trunc(Custom c) { return Custom::Trunc | c; diff --git a/test/unit/math/src/sincos.cpp b/test/unit/math/src/sincos.cpp index 5b480e048ad5..55edd467a94c 100644 --- a/test/unit/math/src/sincos.cpp +++ b/test/unit/math/src/sincos.cpp @@ -15,7 +15,6 @@ #include - class SinCosTestKernel { public: diff --git a/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp b/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp index e54a279057a3..d325fc838045 100644 --- a/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp +++ b/test/unit/meta/src/CudaVectorArrayWrapperTest.cpp @@ -109,7 +109,6 @@ TEMPLATE_LIST_TEST_CASE("cudaVectorArrayWrapperDevice", "[meta]", alpaka::test:: REQUIRE(fixture(kernelDouble)); } - TEST_CASE("cudaVectorArrayWrapperHost", "[meta]") { // TODO: It would be nice to check all possible type vs. size combinations. diff --git a/test/unit/meta/src/IntegralTest.cpp b/test/unit/meta/src/IntegralTest.cpp index 2deeb2ab3690..28c139329c45 100644 --- a/test/unit/meta/src/IntegralTest.cpp +++ b/test/unit/meta/src/IntegralTest.cpp @@ -860,6 +860,7 @@ TEST_CASE("higherMin", "[meta]") std::is_same_v, std::uint64_t>, "alpaka::meta::HigherMin failed!"); } + TEST_CASE("lowerMin", "[meta]") { static_assert( diff --git a/test/unit/meta/src/IsStrictBaseTest.cpp b/test/unit/meta/src/IsStrictBaseTest.cpp index fb64ff800c6b..96662fae9eea 100644 --- a/test/unit/meta/src/IsStrictBaseTest.cpp +++ b/test/unit/meta/src/IsStrictBaseTest.cpp @@ -12,9 +12,11 @@ class A { }; + class B : A { }; + class C { }; diff --git a/test/unit/queue/src/CollectiveQueue.cpp b/test/unit/queue/src/CollectiveQueue.cpp index b124804d5c75..6c2cc5d57ca4 100644 --- a/test/unit/queue/src/CollectiveQueue.cpp +++ b/test/unit/queue/src/CollectiveQueue.cpp @@ -56,7 +56,7 @@ TEST_CASE("queueCollective", "[queue]") using WorkDiv = alpaka::WorkDivMembers; WorkDiv const workDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); -# pragma omp parallel num_threads(static_cast (results.size())) +# pragma omp parallel num_threads(static_cast(results.size())) { // The kernel will be performed collectively. // OpenMP will distribute the work between the threads from the parallel region @@ -96,7 +96,7 @@ TEST_CASE("TestCollectiveMemcpy", "[queue]") using WorkDiv = alpaka::WorkDivMembers; WorkDiv const workDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); -# pragma omp parallel num_threads(static_cast (results.size())) +# pragma omp parallel num_threads(static_cast(results.size())) { int threadId = omp_get_thread_num(); diff --git a/test/unit/vec/src/VecTest.cpp b/test/unit/vec/src/VecTest.cpp index 171ec013187f..8480b0967c2a 100644 --- a/test/unit/vec/src/VecTest.cpp +++ b/test/unit/vec/src/VecTest.cpp @@ -59,6 +59,7 @@ TEST_CASE("basicVecTraits", "[vec]") return 5; } } s; + STATIC_REQUIRE(std::is_convertible_v); [[maybe_unused]] constexpr Vec v(s, s, s); @@ -331,6 +332,7 @@ struct NonAlpakaVec return result; } + auto operator[](TIdx /*idx*/) const -> TIdx { return static_cast(0);