Skip to content

Commit

Permalink
fix getFunctionAttributes for the SYCL backend
Browse files Browse the repository at this point in the history
  • Loading branch information
AuroraPerego authored and psychocoderHPC committed Aug 7, 2024
1 parent f36e115 commit 106a497
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 27 deletions.
31 changes: 31 additions & 0 deletions include/alpaka/kernel/TaskKernelCpuSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,37 @@ namespace alpaka

template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
using TaskKernelCpuSycl = TaskKernelGenericSycl<AccCpuSycl<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;

namespace trait
{
//! \brief Specialisation of the class template FunctionAttributes
//! \tparam TDev The device type.
//! \tparam TDim The dimensionality of the accelerator device properties.
//! \tparam TIdx The idx type of the accelerator device properties.
//! \tparam TKernelFn Kernel function object type.
//! \tparam TArgs Kernel function object argument types as a parameter pack.
template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
struct FunctionAttributes<AccCpuSycl<TDim, TIdx>, TDev, KernelBundle<TKernelFn, TArgs...>>
{
//! \param dev The device instance
//! \param kernelBundle Kernel bundeled with it's arguments. The function attributes of this kernel will be
//! determined. Max threads per block is one of the attributes.
//! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
//! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
ALPAKA_FN_HOST static auto getFunctionAttributes(
TDev const& dev,
[[maybe_unused]] KernelBundle<TKernelFn, TArgs...> const& kernelBundle)
-> alpaka::KernelFunctionAttributes
{
alpaka::KernelFunctionAttributes kernelFunctionAttributes;

// set function properties for maxThreadsPerBlock to device properties
auto const& props = alpaka::getAccDevProps<AccCpuSycl<TDim, TIdx>>(dev);
kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
return kernelFunctionAttributes;
}
};
} // namespace trait
} // namespace alpaka

#endif
31 changes: 31 additions & 0 deletions include/alpaka/kernel/TaskKernelFpgaSyclIntel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ namespace alpaka
template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
using TaskKernelFpgaSyclIntel
= TaskKernelGenericSycl<AccFpgaSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;

namespace trait
{
//! \brief Specialisation of the class template FunctionAttributes
//! \tparam TDev The device type.
//! \tparam TDim The dimensionality of the accelerator device properties.
//! \tparam TIdx The idx type of the accelerator device properties.
//! \tparam TKernelFn Kernel function object type.
//! \tparam TArgs Kernel function object argument types as a parameter pack.
template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
struct FunctionAttributes<AccFpgaSyclIntel<TDim, TIdx>, TDev, KernelBundle<TKernelFn, TArgs...>>
{
//! \param dev The device instance
//! \param kernelBundle Kernel bundeled with it's arguments. The function attributes of this kernel will be
//! determined. Max threads per block is one of the attributes.
//! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
//! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
ALPAKA_FN_HOST static auto getFunctionAttributes(
TDev const& dev,
[[maybe_unused]] KernelBundle<TKernelFn, TArgs...> const& kernelBundle)
-> alpaka::KernelFunctionAttributes
{
alpaka::KernelFunctionAttributes kernelFunctionAttributes;

// set function properties for maxThreadsPerBlock to device properties
auto const& props = alpaka::getAccDevProps<AccFpgaSyclIntel<TDim, TIdx>>(dev);
kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
return kernelFunctionAttributes;
}
};
} // namespace trait
} // namespace alpaka

#endif
27 changes: 0 additions & 27 deletions include/alpaka/kernel/TaskKernelGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,33 +279,6 @@ namespace alpaka::trait
using type = TIdx;
};

//! \brief Specialisation of the class template FunctionAttributes
//! \tparam TDev The device type.
//! \tparam TDim The dimensionality of the accelerator device properties.
//! \tparam TIdx The idx type of the accelerator device properties.
//! \tparam TKernelFn Kernel function object type.
//! \tparam TArgs Kernel function object argument types as a parameter pack.
template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
struct FunctionAttributes<AccGenericSycl<TDim, TIdx>, TDev, KernelBundle<TKernelFn, TArgs...>>
{
//! \param dev The device instance
//! \param kernelBundle Kernel bundeled with it's arguments. The function attributes of this kernel will be
//! determined. Max threads per block is one of the attributes.
//! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
//! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
ALPAKA_FN_HOST static auto getFunctionAttributes(
TDev const& dev,
[[maybe_unused]] KernelBundle<TKernelFn, TArgs...> const& kernelBundle) -> alpaka::KernelFunctionAttributes
{
alpaka::KernelFunctionAttributes kernelFunctionAttributes;

// set function properties for maxThreadsPerBlock to device properties
auto const& props = alpaka::getAccDevProps<AccGenericSycl<TDim, TIdx>>(dev);
kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
return kernelFunctionAttributes;
}
};

} // namespace alpaka::trait

# undef LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS
Expand Down
31 changes: 31 additions & 0 deletions include/alpaka/kernel/TaskKernelGpuSyclIntel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ namespace alpaka
template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
using TaskKernelGpuSyclIntel
= TaskKernelGenericSycl<AccGpuSyclIntel<TDim, TIdx>, TDim, TIdx, TKernelFnObj, TArgs...>;

namespace trait
{
//! \brief Specialisation of the class template FunctionAttributes
//! \tparam TDev The device type.
//! \tparam TDim The dimensionality of the accelerator device properties.
//! \tparam TIdx The idx type of the accelerator device properties.
//! \tparam TKernelFn Kernel function object type.
//! \tparam TArgs Kernel function object argument types as a parameter pack.
template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
struct FunctionAttributes<AccGpuSyclIntel<TDim, TIdx>, TDev, KernelBundle<TKernelFn, TArgs...>>
{
//! \param dev The device instance
//! \param kernelBundle Kernel bundeled with it's arguments. The function attributes of this kernel will be
//! determined. Max threads per block is one of the attributes.
//! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
//! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
ALPAKA_FN_HOST static auto getFunctionAttributes(
TDev const& dev,
[[maybe_unused]] KernelBundle<TKernelFn, TArgs...> const& kernelBundle)
-> alpaka::KernelFunctionAttributes
{
alpaka::KernelFunctionAttributes kernelFunctionAttributes;

// set function properties for maxThreadsPerBlock to device properties
auto const& props = alpaka::getAccDevProps<AccGpuSyclIntel<TDim, TIdx>>(dev);
kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
return kernelFunctionAttributes;
}
};
} // namespace trait
} // namespace alpaka

#endif

0 comments on commit 106a497

Please sign in to comment.