diff --git a/example/heatEquation2D/src/heatEquation2D.cpp b/example/heatEquation2D/src/heatEquation2D.cpp index d356af0e6f1..8631c3617ea 100644 --- a/example/heatEquation2D/src/heatEquation2D.cpp +++ b/example/heatEquation2D/src/heatEquation2D.cpp @@ -51,12 +51,12 @@ auto example(TAccTag const&) -> int // simulation defines // {Y, X} - constexpr alpaka::Vec numNodes{256, 256}; + constexpr alpaka::Vec numNodes{64, 64}; constexpr alpaka::Vec haloSize{2, 2}; constexpr alpaka::Vec extent = numNodes + haloSize; - constexpr uint32_t numTimeSteps = 6000; - constexpr double tMax = 0.02; + constexpr uint32_t numTimeSteps = 4000; + constexpr double tMax = 0.1; // x, y in [0, 1], t in [0, tMax] constexpr double dx = 1.0 / static_cast(extent[1] - 1); @@ -100,8 +100,8 @@ auto example(TAccTag const&) -> int constexpr alpaka::Vec elemPerThread{1, 1}; // Appropriate chunk size to split your problem for your Acc - constexpr Idx xSize = 64u; - constexpr Idx ySize = 64u; + constexpr Idx xSize = 16u; + constexpr Idx ySize = 16u; constexpr Idx halo = 2u; constexpr alpaka::Vec chunkSize{ySize, xSize}; constexpr auto sharedMemSize = (ySize + halo) * (xSize + halo); @@ -136,8 +136,6 @@ auto example(TAccTag const&) -> int = maxThreadsPerBlock < chunkSize.prod() ? alpaka::Vec{maxThreadsPerBlock, 1} : chunkSize; alpaka::WorkDivMembers workDiv_manual{numChunks, threadsPerBlock, elemPerThread}; - // Timing start - auto startTime = std::chrono::high_resolution_clock::now(); // Simulate for(uint32_t step = 1; step <= numTimeSteps; ++step) @@ -169,21 +167,19 @@ auto example(TAccTag const&) -> int dy, dt); -// #ifdef PNGWRITER_ENABLED -// if((step - 1) % 100 == 0) -// { -// alpaka::wait(computeQueue); -// alpaka::memcpy(dumpQueue, uBufHost, uCurrBufAcc); -// alpaka::wait(dumpQueue); -// writeImage(step - 1, uBufHost); -// } -// #endif +#ifdef PNGWRITER_ENABLED + if((step - 1) % 100 == 0) + { + alpaka::wait(computeQueue); + alpaka::memcpy(dumpQueue, uBufHost, uCurrBufAcc); + alpaka::wait(dumpQueue); + writeImage(step - 1, uBufHost); + } +#endif // So we just swap next and curr (shallow copy) std::swap(uNextBufAcc, uCurrBufAcc); } - auto endTime = std::chrono::high_resolution_clock::now(); - std::cout << "Simulation took " << (endTime - startTime).count() << " nano seconds." << std::endl; // Copy device -> host alpaka::wait(computeQueue); diff --git a/include/alpaka/vec/Vec.hpp b/include/alpaka/vec/Vec.hpp index d327f60ff45..376d08787d4 100644 --- a/include/alpaka/vec/Vec.hpp +++ b/include/alpaka/vec/Vec.hpp @@ -227,6 +227,11 @@ namespace alpaka return m_data[Dim::value - 4]; } + ALPAKA_FN_HOST_ACC constexpr static decltype(auto) size() + { + return Dim::value; + } + //! @} //! Value reference accessor at the given non-unsigned integer index. diff --git a/test/unit/vec/src/VecTest.cpp b/test/unit/vec/src/VecTest.cpp index d559b47a5d3..a407dc87998 100644 --- a/test/unit/vec/src/VecTest.cpp +++ b/test/unit/vec/src/VecTest.cpp @@ -313,6 +313,31 @@ TEST_CASE("basicVecTraits", "[vec]") STATIC_REQUIRE(Vec{1, 2, 3}.front() == 1); // non-const overload STATIC_REQUIRE(Vec{1, 2, 3}.back() == 3); // non-const overload } + { + constexpr alpaka::Vec vec4(static_cast(47u), static_cast(8u), static_cast(3u)); + // compile time tests + STATIC_REQUIRE(vec4.size() == 3); + STATIC_REQUIRE((alpaka::Vec{4, 8, 3}).size() == 3); + STATIC_REQUIRE((alpaka::Vec{4, 8}).size() == 2); + STATIC_REQUIRE((alpaka::Vec{4}).size() == 1); + + + STATIC_REQUIRE(decltype(vec4)::size() == 3); + STATIC_REQUIRE(decltype(alpaka::Vec{4, 8, 3})::size() == 3); + + using Vec3DType = alpaka::Vec; + STATIC_REQUIRE(Vec3DType::size() == 3); + + constexpr alpaka::Vec vec5{8, 3}; + STATIC_REQUIRE(vec5.size() == 2); + + // runtime tests + REQUIRE(vec4.size() == 3); + REQUIRE((alpaka::Vec{4, 8, 3}).size() == 3); + REQUIRE((alpaka::Vec{4, 8}).size() == 2); + REQUIRE((alpaka::Vec{4}).size() == 1); + REQUIRE(vec5.size() == 2); + } } template