From 900f4fd21010fddf34fc20d8afe754e3e6d2f49d Mon Sep 17 00:00:00 2001 From: Julian Samaroo Date: Mon, 6 Jul 2020 13:16:02 -0500 Subject: [PATCH] Changes for the package merge (#1) * Update docs and README --- Manifest.toml | 12 ++++++------ Project.toml | 4 ++-- README.md | 32 ++++++++++++++++---------------- deps/deps.jl | 8 +++++++- deps/hsa/build.jl | 3 --- deps/rocm-external/build.jl | 2 +- docs/make.jl | 2 +- docs/src/index.md | 29 +++++++++++++++++++++++++++++ src/AMDGPU.jl | 26 ++++++++++++++------------ src/array.jl | 8 ++++---- src/device/gcn/hostcall.jl | 4 ++-- src/executable.jl | 2 +- src/execution.jl | 2 +- test/hsa/global.jl | 1 - 14 files changed, 84 insertions(+), 51 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 51e71634d..00adb361b 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -49,11 +49,11 @@ version = "2.0.1" [[GPUCompiler]] deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "95aa07bfda5c80ccd57b038570ef79f663ce531f" +git-tree-sha1 = "3226928dacf00271c95f8d4325cbae34d60878d5" repo-rev = "jps/gcn-workaround-allocas" repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.4.0" +version = "0.5.0" [[InteractiveUtils]] deps = ["Markdown"] @@ -61,9 +61,9 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[LLVM]] deps = ["CEnum", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "d9c6e1efcaa6c2fcd043da812a62b3e489a109a3" +git-tree-sha1 = "a662366a5d485dee882077e8da3e1a95a86d097f" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "1.7.0" +version = "2.0.0" [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -86,9 +86,9 @@ deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[OrderedCollections]] -git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3" +git-tree-sha1 = "293b70ac1780f9584c89268a6e2a560d938a7065" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.2.0" +version = "1.3.0" [[Printf]] deps = ["Unicode"] diff --git a/Project.toml b/Project.toml index 0ef5b5279..1fedcbe55 100644 --- a/Project.toml +++ b/Project.toml @@ -21,8 +21,8 @@ Adapt = "0.4, 1.0, 2.0" BinaryProvider = "0.5" CEnum = "0.2, 0.3, 0.4" GPUArrays = "2" -GPUCompiler = "0.4" -LLVM = "1.5" +GPUCompiler = "0.4, 0.5" +LLVM = "2" Requires = "1" Setfield = "0.5, 0.6" julia = "1.4" diff --git a/README.md b/README.md index 9a2345c00..fa3428c3c 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ -# AMDGPUnative.jl +# AMDGPU.jl -*Support for compiling and executing native Julia kernels on AMD GPUs.* +*AMD GPU (ROCm) programming in Julia* | **Documentation** | **Build Status** | |:---------------------------------------:|:-------------------------------------------------------------:| | [![][docs-master-img]][docs-master-url] | [![][gitlab-img]][gitlab-url] [![][codecov-img]][codecov-url] | -[gitlab-img]: https://gitlab.com/JuliaGPU/AMDGPUnative.jl/badges/master/pipeline.svg -[gitlab-url]: https://gitlab.com/JuliaGPU/AMDGPUnative.jl/commits/master +[gitlab-img]: https://gitlab.com/JuliaGPU/AMDGPU.jl/badges/master/pipeline.svg +[gitlab-url]: https://gitlab.com/JuliaGPU/AMDGPU.jl/commits/master -[codecov-img]: https://codecov.io/gh/JuliaGPU/AMDGPUnative.jl/branch/master/graph/badge.svg -[codecov-url]: https://codecov.io/gh/JuliaGPU/AMDGPUnative.jl +[codecov-img]: https://codecov.io/gh/JuliaGPU/AMDGPU.jl/branch/master/graph/badge.svg +[codecov-url]: https://codecov.io/gh/JuliaGPU/AMDGPU.jl [docs-master-img]: https://img.shields.io/badge/docs-master-blue.svg -[docs-master-url]: https://juliagpu.gitlab.io/AMDGPUnative.jl/ +[docs-master-url]: https://juliagpu.gitlab.io/AMDGPU.jl/ @@ -23,24 +23,24 @@ The package can be installed with the Julia package manager. From the Julia REPL, type `]` to enter the Pkg REPL mode and run: ``` -pkg> add AMDGPUnative +pkg> add AMDGPU ``` Or, equivalently, via the `Pkg` API: ```julia -julia> import Pkg; Pkg.add("AMDGPUnative") +julia> import Pkg; Pkg.add("AMDGPU") ``` ## Project Status -The package is tested against, and being developed for, Julia `1.3` and above. +The package is tested against, and being developed for, Julia `1.4` and above. Only 64-bit Linux is supported and working at this time, until ROCm is ported to other platforms. It is recommended to use a version of Julia with LLVM 9.0 or higher. This package is under active maintenance and is reasonably complete, however not all features (and especially performance) are up to par -with CUDAnative. +with CUDA.jl. ### Supported Functionality @@ -48,8 +48,8 @@ with CUDAnative. |:---|:---:|:---| | Host-side kernel launches | :heavy_check_mark: | See #58 | | Dynamic parallelism | :x: | -| Local (shared) memory | :x: | -| Coarse-grained memory | :x: | +| Local (shared) memory | :heavy_check_mark: | +| Coarse-grained memory | :heavy_check_mark: | | Page-locked (pinned) memory | :x: | ## Questions and Contributions @@ -59,14 +59,14 @@ forum](https://discourse.julialang.org/c/domain/gpu) under the GPU domain and/or channel of the [Julia Slack](https://julialang.org/community/). Contributions are very welcome, as are feature requests and suggestions. Please open an -[issue](https://github.com/JuliaGPU/AMDGPUnative.jl/issues) if you encounter any problems. +[issue](https://github.com/JuliaGPU/AMDGPU.jl/issues) if you encounter any problems. ## Acknowledgment -AMDGPUnative would not have been possible without the work by Tim Besard and +AMDGPU would not have been possible without the work by Tim Besard and contributors to [CUDAnative.jl](https://github.com/JuliaGPU/CUDAnative.jl) and [LLVM.jl](https://github.com/maleadt/LLVM.jl). ## License -AMDGPUnative.jl is licensed under the [MIT License](LICENSE.md). +AMDGPU.jl is licensed under the [MIT License](LICENSE.md). diff --git a/deps/deps.jl b/deps/deps.jl index da543b85f..2a4d69434 100644 --- a/deps/deps.jl +++ b/deps/deps.jl @@ -4,6 +4,9 @@ const hsa_ext = joinpath(@__DIR__, "hsa", "ext.jl") if isfile(hsa_ext) include(hsa_ext) end +if !isdefined(@__MODULE__, :hsa_configured) + const hsa_configured = false +end if !hsa_configured const hsa_configured = false const libhsaruntime_version = v"0.0" @@ -19,6 +22,9 @@ const libs_ext = joinpath(@__DIR__, "rocm-external", "ext.jl") if isfile(libs_ext) include(libs_ext) end +if !isdefined(@__MODULE__, :ext_libs_configured) + const ext_libs_configured = false +end if !ext_libs_configured # default (non-functional) values for critical variables, # making it possible to _load_ the package at all times. @@ -30,4 +36,4 @@ if !ext_libs_configured const libmiopen = nothing end -const configured = hsa_configured && ext_libs_configured +const configured = hsa_configured diff --git a/deps/hsa/build.jl b/deps/hsa/build.jl index 2d6dd6022..8757799f1 100644 --- a/deps/hsa/build.jl +++ b/deps/hsa/build.jl @@ -146,9 +146,6 @@ function main() end # find the ld.lld program for linking kernels - # NOTE: This isn't needed by HSARuntime.jl directly, but other packages - # (like AMDGPUnative.jl) will want it to be available, so we find it for - # them ld_path = find_ld_lld() if ld_path == "" build_error("Couldn't find ld.lld.") diff --git a/deps/rocm-external/build.jl b/deps/rocm-external/build.jl index c5fa4c16e..883c09bcd 100644 --- a/deps/rocm-external/build.jl +++ b/deps/rocm-external/build.jl @@ -28,7 +28,7 @@ end ## main -config_path = joinpath(@__DIR__, "ext.jl") +const config_path = joinpath(@__DIR__, "ext.jl") const previous_config_path = config_path * ".bak" function write_ext(config) diff --git a/docs/make.jl b/docs/make.jl index dc99dd7db..d37937248 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Documenter, AMDGPUnative +using Documenter, AMDGPU makedocs( sitename="AMDGPU.jl", diff --git a/docs/src/index.md b/docs/src/index.md index 765eb5fea..337e38b9b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -28,3 +28,32 @@ Julia support for programming AMD GPUs is currently provided by the [AMDGPU.jl p * An interface for compiling and running kernels written in Julia through LLVM's AMDGPU backend. * An array type implementing the [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl) interface, providing high-level array operations. +## Requirements +* [ROCR](https://github.com/RadeonOpenCompute/ROCR-Runtime) +* [ROCT](https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface) +* Recent Linux kernel with AMDGPU and HSA enabled + +### Setup Instructions +Currently, the requirements to get everything working properly is a bit poorly +documented in the upstream docs for any distro other than Ubuntu. So here is a +list of requirements I've found through the process of making this work: + +Make sure /dev/kfd has a group other than root that you can add your user to. +I recommend adding your user to the "video" group, and setting the +ownership of /dev/kfd to root:video with 660 permissions. + +The correct libraries in your LD_LIBRARY_PATH or standard library locations: +* libhsa-runtime64.so +* libhsakmt.so + +In terms of Linux kernel versions, just pick the newest one you can. If +building your own kernel, make sure all the regular AMDGPU and HSA options are +enabled. + +You will also need `ld.lld` installed on your system (provided by LLVM/Clang); +if you built Julia from source, you should have a copy somewhere in +`deps/scratch/llvm-*/*/bin/` that you can add to your PATH. + +Once all of this is setup properly, you should be able to `] build AMDGPU` +successfully; after that, if you have a supported GPU attached and enabled, `] +test AMDGPU` should work exactly as you might expect. diff --git a/src/AMDGPU.jl b/src/AMDGPU.jl index c04701500..312b1ed43 100644 --- a/src/AMDGPU.jl +++ b/src/AMDGPU.jl @@ -100,21 +100,15 @@ atexit() do configured && HSA.shut_down() end function __init__() - # We want to always be able to load the package - if !configured - @warn("AMDGPU.jl has not been successfully built, and will not work properly.") - @warn("Please run Pkg.build(\"AMDGPU\") and restart Julia.") - return - end - - # Try to load deps if possible - try - check_deps() - catch err - @warn """ + deps_failed() = @warn """ AMDGPU dependencies have not been built, some functionality may be missing. Please run Pkg.build("AMDGPU") and reload AMDGPU. """ + + # We want to always be able to load the package + if !configured + deps_failed() + return end # Make sure we load the library found by the last `] build` @@ -130,6 +124,14 @@ function __init__() DEFAULT_AGENT[] = first(agents) end + try + # Try to load device libs if possible + check_deps() + @assert configured + catch err + deps_failed() + end + # Load optional OpenCL integrations @require OpenCL="08131aa3-fb12-5dee-8b74-c09406e224a2" include("opencl.jl") diff --git a/src/array.jl b/src/array.jl index 07105de39..953512f92 100644 --- a/src/array.jl +++ b/src/array.jl @@ -152,13 +152,13 @@ take ownership of the memory, calling `free` when the array is no longer referen """ function Base.unsafe_wrap(::Union{Type{ROCArray},Type{ROCArray{T}},Type{ROCArray{T,N}}}, p::Ptr{T}, dims::NTuple{N,Int}; - own::Bool=false, agent::HSAAgent=HSARuntime.get_default_agent()) where {T,N} + own::Bool=false, agent::HSAAgent=get_default_agent()) where {T,N} buf = Mem.Buffer(convert(Ptr{Cvoid}, p), prod(dims) * sizeof(T), agent) return ROCArray{T, length(dims)}(buf, dims; own=own) end function Base.unsafe_wrap(Atype::Union{Type{ROCArray},Type{ROCArray{T}},Type{ROCArray{T,1}}}, p::Ptr{T}, dim::Integer; - own::Bool=false, agent::HSAAgent=HSARuntime.get_default_agent()) where {T} + own::Bool=false, agent::HSAAgent=get_default_agent()) where {T} unsafe_wrap(Atype, p, (dim,); own=own, agent=agent) end @@ -221,14 +221,14 @@ Base.cconvert(::Type{Ptr{T}}, x::ROCArray{T}) where T = buffer(x) Base.cconvert(::Type{Ptr{Nothing}}, x::ROCArray) = buffer(x) -## interop with AMDGPUnative +## interop with AMDGPU function Base.convert(::Type{ROCDeviceArray{T,N,AS.Global}}, a::ROCArray{T,N}) where {T,N} ptr = Base.unsafe_convert(Ptr{T}, a.buf) ROCDeviceArray{T,N,AS.Global}(a.dims, DevicePtr{T,AS.Global}(ptr+a.offset)) end -Adapt.adapt_storage(::AMDGPUnative.Adaptor, xs::ROCArray{T,N}) where {T,N} = +Adapt.adapt_storage(::AMDGPU.Adaptor, xs::ROCArray{T,N}) where {T,N} = convert(ROCDeviceArray{T,N,AS.Global}, xs) diff --git a/src/device/gcn/hostcall.jl b/src/device/gcn/hostcall.jl index dad0d6520..308ae9d0a 100644 --- a/src/device/gcn/hostcall.jl +++ b/src/device/gcn/hostcall.jl @@ -37,7 +37,7 @@ function HostCall(RT::Type, AT::Type{<:Tuple}, signal::S; buf_len += sizeof(T) end buf_len = max(sizeof(UInt64), buf_len) # make room for return buffer pointer - buf = Mem.alloc(agent, buf_len) + buf = Mem.alloc(agent, buf_len; coherent=true) buf_ptr = DevicePtr{UInt8,AS.Global}(Base.unsafe_convert(Ptr{UInt8}, buf)) HostCall{S,RT,AT}(signal, host_sentinel, device_sentinel, buf_ptr, buf_len) end @@ -213,7 +213,7 @@ function HostCall(func, rettype, argtypes; return_task=false, @debug "Hostcall: Host function returning value of type $(typeof(ret))" try ret_len = sizeof(ret) - ret_buf = Mem.alloc(agent, ret_len) + ret_buf = Mem.alloc(agent, ret_len; coherent=true) # FIXME: Don't be coherent ret_buf_ptr = Base.unsafe_convert(Ptr{typeof(ret)}, ret_buf) Base.unsafe_store!(ret_buf_ptr, ret) ret_buf_ptr = Base.unsafe_convert(Ptr{UInt64}, ret_buf) diff --git a/src/executable.jl b/src/executable.jl index 93932c1c2..38ee7f727 100644 --- a/src/executable.jl +++ b/src/executable.jl @@ -46,7 +46,7 @@ function HSAExecutable(agent::HSAAgent, data::Vector{UInt8}, symbol::String; glo _globals = Dict{Symbol,Any}() for (gbl,sz) in globals - gbl_buf = Mem.alloc(agent, sz) + gbl_buf = Mem.alloc(agent, sz; coherent=true) HSA.executable_agent_global_variable_define(executable[], agent.agent, string(gbl), gbl_buf.ptr) |> check _globals[gbl] = gbl_buf diff --git a/src/execution.jl b/src/execution.jl index 1f3ec5545..5edd50bf7 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -257,7 +257,7 @@ AbstractKernel call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]] # replace non-isbits arguments (they should be unused, or compilation would have failed) - # alternatively, make HSARuntime allow `launch` with non-isbits arguments. + # alternatively, allow `launch` with non-isbits arguments. for (i,dt) in enumerate(call_t) if !isbitstype(dt) call_t[i] = Ptr{Any} diff --git a/test/hsa/global.jl b/test/hsa/global.jl index 96f958428..49268a371 100644 --- a/test/hsa/global.jl +++ b/test/hsa/global.jl @@ -10,7 +10,6 @@ hk = AMDGPU.rocfunction(kernel, Tuple{Int32}) exe = hk.mod.exe gbl = AMDGPU.get_global(exe.exe, :myglobal) gbl_ptr = Base.unsafe_convert(Ptr{Float32}, gbl.ptr) -@show gbl_ptr @test Base.unsafe_load(gbl_ptr) == 0f0 Base.unsafe_store!(gbl_ptr, 2f0) @test Base.unsafe_load(gbl_ptr) == 2f0