Skip to content

Commit

Permalink
Add warp-level primitives (#592)
Browse files Browse the repository at this point in the history
- Add wavefront/warp-level ops:
  - `activelane`, `ballot`, `activemask`, `bpermute`, `permute`.
  - `shfl`, `shfl_up`, `shfl_down`, `shfl_xor`.
  - `ballot_sync`, `any_sync`, `all_sync`, `shfl_sync`, `shfl_up_sync`, `shfl_down_sync`, `shfl_xor_sync`.
- Bump LLVM dep to allow Julia 1.11.
- Add doctests.
  • Loading branch information
pxl-th authored Feb 19, 2024
1 parent 417d2b6 commit 0176b9d
Show file tree
Hide file tree
Showing 15 changed files with 633 additions and 44 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ GPUArrays = "10"
GPUCompiler = "0.25"
HIP_jll = "5.4"
KernelAbstractions = "0.9.2"
LLD_jll = "14, 15"
LLD_jll = "14, 15, 16"
LLVM = "6"
LLVM_jll = "14, 15"
LLVM_jll = "14, 15, 16"
Preferences = "1"
ROCmDeviceLibs_jll = "5.6.1"
Random123 = "1.6"
Expand Down
18 changes: 11 additions & 7 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ function main()
ci = get(ENV, "CI", "") == "true"

@info "Generating Documenter site"
makedocs(
DocMeta.setdocmeta!(AMDGPU, :DocTestSetup, :(using AMDGPU); recursive=true)
makedocs(;
modules=[AMDGPU],
sitename="AMDGPU.jl",
format = Documenter.HTML(
format=Documenter.HTML(
# Use clean URLs on CI
prettyurls = ci,
canonical = dst,
assets = ["assets/favicon.ico"],
analytics = "UA-154489943-2",
),
pages = [
pages=[
"Home" => "index.md",
"Quick Start" => "quickstart.md",
"Devices" => "devices.md",
Expand All @@ -32,13 +34,15 @@ function main()
"Printing" => "printing.md",
"Logging" => "logging.md",
"API Reference" => "api.md"
]
],
doctest=true,
warnonly=[:missing_docs],
)
if ci
@info "Deploying to GitHub"
deploydocs(
repo = "github.com/JuliaGPU/AMDGPU.jl.git",
push_preview = true
deploydocs(;
repo="github.com/JuliaGPU/AMDGPU.jl.git",
push_preview=true,
)
end
end
Expand Down
14 changes: 12 additions & 2 deletions docs/src/devices.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ If one is not specified via `@roc` or an equivalent interface,
then the default device is used for those operations,
which affects compilation and kernel launch.

The device bound to a current Julia task is accessible via [`AMDGPU.device()`](@ref).
The list of available devices can be queried with [`AMDGPU.devices`](@ref).
The device bound to a current Julia task is accessible via [`AMDGPU.device`](@ref) method.
The list of available devices can be queried with [`AMDGPU.devices`](@ref) method.

If you have a `HIPDevice` object, you can also switch
the device with [`AMDGPU.device!`](@ref).
Expand All @@ -35,3 +35,13 @@ AMDGPU.device!
AMDGPU.device_id
AMDGPU.device_id!
```

## Device Properties

```@docs
AMDGPU.HIP.name
AMDGPU.HIP.wavefrontsize
AMDGPU.HIP.gcn_arch
AMDGPU.HIP.device_id
AMDGPU.HIP.properties
```
28 changes: 28 additions & 0 deletions docs/src/kernel_programming.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,31 @@ indices = ROCArray(rand(1:bins, n))
target = ROCArray(zeros(UInt32, bins))
@roc groupsize=256 gridsize=4 ker_atomic_max!(target, source, indices)
```

## Device Intrinsics

### Wavefront-Level Primitives

```@docs
AMDGPU.Device.wavefrontsize
AMDGPU.Device.activelane
AMDGPU.Device.ballot
AMDGPU.Device.ballot_sync
AMDGPU.Device.activemask
AMDGPU.Device.bpermute
AMDGPU.Device.permute
AMDGPU.Device.shfl
AMDGPU.Device.shfl_sync
AMDGPU.Device.shfl_up
AMDGPU.Device.shfl_up_sync
AMDGPU.Device.shfl_down
AMDGPU.Device.shfl_down_sync
AMDGPU.Device.shfl_xor
AMDGPU.Device.shfl_xor_sync
AMDGPU.Device.any_sync
AMDGPU.Device.all_sync
```
2 changes: 1 addition & 1 deletion src/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ function compiler_config(
)
dev_isa, features = parse_llvm_features(HIP.gcn_arch(dev))
target = GCNCompilerTarget(; dev_isa, features)
params = HIPCompilerParams(HIP.wavefront_size(dev) == 64)
params = HIPCompilerParams(HIP.wavefrontsize(dev) == 64)
CompilerConfig(target, params; kernel, name, always_inline)
end

Expand Down
3 changes: 2 additions & 1 deletion src/device/gcn.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
include(joinpath("gcn", "helpers.jl"))
include(joinpath("gcn", "assertion.jl"))
include(joinpath("gcn", "array.jl"))
include(joinpath("gcn", "math.jl"))
include(joinpath("gcn", "wavefront.jl"))
include(joinpath("gcn", "wavefront_sync.jl"))
include(joinpath("gcn", "indexing.jl"))
include(joinpath("gcn", "assertion.jl"))
include(joinpath("gcn", "synchronization.jl"))
include(joinpath("gcn", "memory_static.jl"))
include(joinpath("gcn", "execution_control.jl"))
Expand Down
1 change: 0 additions & 1 deletion src/device/gcn/execution_control.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,3 @@ signal_completion(value::Int64) = device_signal_store!(_completion_signal(), val
@inline sendmsg(x1, x2=Int32(0)) = ccall("llvm.amdgcn.s.sendmsg", llvmcall, Cvoid, (Int32, Int32), x1, x2)
@inline sendmsghalt(x1, x2=Int32(0)) = ccall("llvm.amdgcn.s.sendmsghalt", llvmcall, Cvoid, (Int32, Int32), x1, x2)
@inline endpgm() = ccall("llvm.amdgcn.endpgm", llvmcall, Cvoid, ())
@inline readfirstlane(x::Int32) = ccall("llvm.amdgcn.readfirstlane", llvmcall, Int32, (Int32,), x)
Loading

0 comments on commit 0176b9d

Please sign in to comment.