Skip to content

Commit

Permalink
Move to stateless LLVM contexts
Browse files Browse the repository at this point in the history
  • Loading branch information
jpsamaroo committed Aug 21, 2020
1 parent b8b140d commit 6810f0a
Show file tree
Hide file tree
Showing 14 changed files with 524 additions and 486 deletions.
10 changes: 4 additions & 6 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,19 @@ version = "5.1.0"

[[GPUCompiler]]
deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "f0fdbfcd2bcd380aa50c43bdab753780f248581e"
repo-rev = "master"
repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git"
git-tree-sha1 = "10b1a3aa52de30e9219f3ed147cb09e72cf6d2e8"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.6.0"
version = "0.7.0"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[LLVM]]
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "a662366a5d485dee882077e8da3e1a95a86d097f"
git-tree-sha1 = "d57affa9580f5e9fb44260e8f9366dc977f01a60"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "2.0.0"
version = "3.0.0"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ Adapt = "0.4, 1.0, 2.0"
BinaryProvider = "0.5"
CEnum = "0.2, 0.3, 0.4"
GPUArrays = "2, 3, 4, 5"
GPUCompiler = "0.4, 0.5, 0.6"
LLVM = "2"
GPUCompiler = "0.7"
LLVM = "3"
Requires = "1"
Setfield = "0.5, 0.6, 0.7"
julia = "1.4"
Expand Down
59 changes: 30 additions & 29 deletions src/device/gcn/assertion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,39 @@ end

assert_counter = 0

@generated function rocassert_fail(::Val{msg}, ::Val{file}, ::Val{line}) where
{msg, file, line}
T_void = LLVM.VoidType(JuliaContext())
T_int32 = LLVM.Int32Type(JuliaContext())
T_pint8 = LLVM.PointerType(LLVM.Int8Type(JuliaContext()))
@generated function rocassert_fail(::Val{msg}, ::Val{file}, ::Val{line}) where {msg, file, line}
JuliaContext() do ctx
T_void = LLVM.VoidType(ctx)
T_int32 = LLVM.Int32Type(ctx)
T_pint8 = LLVM.PointerType(LLVM.Int8Type(ctx))

# create function
llvm_f, _ = create_function()
mod = LLVM.parent(llvm_f)
# create function
llvm_f, _ = create_function()
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
global assert_counter
assert_counter += 1
message = globalstring_ptr!(builder, String(msg), "assert_message_$(assert_counter)")
file = globalstring_ptr!(builder, String(file), "assert_file_$(assert_counter)")
line = ConstantInt(T_int32, line)
func = globalstring_ptr!(builder, "unknown", "assert_function_$(assert_counter)")
charSize = ConstantInt(Csize_t(1), JuliaContext())
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)
global assert_counter
assert_counter += 1
message = globalstring_ptr!(builder, String(msg), "assert_message_$(assert_counter)")
file = globalstring_ptr!(builder, String(file), "assert_file_$(assert_counter)")
line = ConstantInt(T_int32, line)
func = globalstring_ptr!(builder, "unknown", "assert_function_$(assert_counter)")
charSize = ConstantInt(Csize_t(1), ctx)

# invoke __assertfail and return
# TODO: mark noreturn since we don't use ptxas?
assertfail_typ =
LLVM.FunctionType(T_void,
[T_pint8, T_pint8, T_int32, T_pint8, llvmtype(charSize)])
assertfail = LLVM.Function(mod, "__assertfail", assertfail_typ)
call!(builder, assertfail, [message, file, line, func, charSize])
ret!(builder)
end
# invoke __assertfail and return
# TODO: mark noreturn since we don't use ptxas?
assertfail_typ =
LLVM.FunctionType(T_void,
[T_pint8, T_pint8, T_int32, T_pint8, llvmtype(charSize)])
assertfail = LLVM.Function(mod, "__assertfail", assertfail_typ)
call!(builder, assertfail, [message, file, line, func, charSize])
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{})
call_function(llvm_f, Nothing, Tuple{})
end
end

62 changes: 33 additions & 29 deletions src/device/gcn/atomics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,28 @@
# > that points to either the global address space or the shared address space.

@generated function llvm_atomic_op(::Val{binop}, ptr::DevicePtr{T,A}, val::T) where {binop, T, A}
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, DevicePtr{T,A})
T_actual_ptr = LLVM.PointerType(T_val)
JuliaContext() do ctx
T_val = convert(LLVMType, T, ctx)
T_ptr = convert(LLVMType, DevicePtr{T,A}, ctx)
T_actual_ptr = LLVM.PointerType(T_val)

llvm_f, _ = create_function(T_val, [T_ptr, T_val])
llvm_f, _ = create_function(T_val, [T_ptr, T_val])

Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)
actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)

rv = atomic_rmw!(builder, binop,
actual_ptr, parameters(llvm_f)[2],
atomic_acquire_release, #=single_threaded=# false)
rv = atomic_rmw!(builder, binop,
actual_ptr, parameters(llvm_f)[2],
atomic_acquire_release, #=single_threaded=# false)

ret!(builder, rv)
end
ret!(builder, rv)
end

call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T}, :((ptr,val)))
call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T}, :((ptr,val)))
end
end

const binops = Dict(
Expand Down Expand Up @@ -82,28 +84,30 @@ for T in (Int32, Int64, UInt32, UInt64)
end

@generated function llvm_atomic_cas(ptr::DevicePtr{T,A}, cmp::T, val::T) where {T, A}
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, DevicePtr{T,A})
T_actual_ptr = LLVM.PointerType(T_val)
JuliaContext() do ctx
T_val = convert(LLVMType, T, ctx)
T_ptr = convert(LLVMType, DevicePtr{T,A}, ctx)
T_actual_ptr = LLVM.PointerType(T_val)

llvm_f, _ = create_function(T_val, [T_ptr, T_val, T_val])
llvm_f, _ = create_function(T_val, [T_ptr, T_val, T_val])

Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)
actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)

res = atomic_cmpxchg!(builder, actual_ptr, parameters(llvm_f)[2],
parameters(llvm_f)[3], atomic_acquire_release, atomic_acquire,
#=single threaded=# false)
res = atomic_cmpxchg!(builder, actual_ptr, parameters(llvm_f)[2],
parameters(llvm_f)[3], atomic_acquire_release, atomic_acquire,
#=single threaded=# false)

rv = extract_value!(builder, res, 0)
rv = extract_value!(builder, res, 0)

ret!(builder, rv)
end
ret!(builder, rv)
end

call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T, T}, :((ptr,cmp,val)))
call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T, T}, :((ptr,cmp,val)))
end
end

for T in (Int32, Int64, UInt32, UInt64)
Expand Down
56 changes: 29 additions & 27 deletions src/device/gcn/execution_control.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,36 @@
const completion_signal_base = _packet_offsets[findfirst(x->x==:completion_signal,_packet_names)]

@generated function _completion_signal()
T_int8 = LLVM.Int8Type(JuliaContext())
T_int64 = LLVM.Int64Type(JuliaContext())
_as = convert(Int, AS.Constant)
T_ptr_i8 = LLVM.PointerType(T_int8, _as)
T_ptr_i64 = LLVM.PointerType(T_int64, _as)

# create function
llvm_f, _ = create_function(T_int64)
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)

# get the kernel dispatch pointer
intr_typ = LLVM.FunctionType(T_ptr_i8)
intr = LLVM.Function(mod, "llvm.amdgcn.dispatch.ptr", intr_typ)
ptr = call!(builder, intr)

# load the index
signal_ptr_i8 = inbounds_gep!(builder, ptr, [ConstantInt(completion_signal_base, JuliaContext())])
signal_ptr = bitcast!(builder, signal_ptr_i8, T_ptr_i64)
signal = load!(builder, signal_ptr)
ret!(builder, signal)
JuliaContext() do ctx
T_int8 = LLVM.Int8Type(ctx)
T_int64 = LLVM.Int64Type(ctx)
_as = convert(Int, AS.Constant)
T_ptr_i8 = LLVM.PointerType(T_int8, _as)
T_ptr_i64 = LLVM.PointerType(T_int64, _as)

# create function
llvm_f, _ = create_function(T_int64)
mod = LLVM.parent(llvm_f)

# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

# get the kernel dispatch pointer
intr_typ = LLVM.FunctionType(T_ptr_i8)
intr = LLVM.Function(mod, "llvm.amdgcn.dispatch.ptr", intr_typ)
ptr = call!(builder, intr)

# load the index
signal_ptr_i8 = inbounds_gep!(builder, ptr, [ConstantInt(completion_signal_base, ctx)])
signal_ptr = bitcast!(builder, signal_ptr_i8, T_ptr_i64)
signal = load!(builder, signal_ptr)
ret!(builder, signal)
end

call_function(llvm_f, UInt64)
end

call_function(llvm_f, UInt64)
end

signal_completion(value::Int64) = device_signal_store!(_completion_signal(), value)
Expand Down
98 changes: 51 additions & 47 deletions src/device/gcn/hostcall.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,66 +45,70 @@ end
## device signal functions
# TODO: device_signal_load, device_signal_add!, etc.
@inline @generated function device_signal_store!(signal::UInt64, value::Int64)
T_nothing = convert(LLVMType, Nothing)
T_i32 = LLVM.Int32Type(JuliaContext())
T_i64 = LLVM.Int64Type(JuliaContext())
JuliaContext() do ctx
T_nothing = convert(LLVMType, Nothing, ctx)
T_i32 = LLVM.Int32Type(ctx)
T_i64 = LLVM.Int64Type(ctx)

# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)
# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

T_signal_store = LLVM.FunctionType(T_nothing, [T_i64, T_i64, T_i32])
signal_store = LLVM.Function(mod, "__ockl_hsa_signal_store", T_signal_store)
call!(builder, signal_store, [parameters(llvm_f)[1],
parameters(llvm_f)[2],
# __ATOMIC_RELEASE == 3
ConstantInt(Int32(3), JuliaContext())])
T_signal_store = LLVM.FunctionType(T_nothing, [T_i64, T_i64, T_i32])
signal_store = LLVM.Function(mod, "__ockl_hsa_signal_store", T_signal_store)
call!(builder, signal_store, [parameters(llvm_f)[1],
parameters(llvm_f)[2],
# __ATOMIC_RELEASE == 3
ConstantInt(Int32(3), ctx)])

ret!(builder)
end
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
end
end
@inline @generated function device_signal_wait(signal::UInt64, value::Int64)
T_nothing = convert(LLVMType, Nothing)
T_i32 = LLVM.Int32Type(JuliaContext())
T_i64 = LLVM.Int64Type(JuliaContext())
JuliaContext() do ctx
T_nothing = convert(LLVMType, Nothing, ctx)
T_i32 = LLVM.Int32Type(ctx)
T_i64 = LLVM.Int64Type(ctx)

# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)
# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
signal_match = BasicBlock(llvm_f, "signal_match", JuliaContext())
signal_miss = BasicBlock(llvm_f, "signal_miss", JuliaContext())
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
signal_match = BasicBlock(llvm_f, "signal_match", ctx)
signal_miss = BasicBlock(llvm_f, "signal_miss", ctx)

position!(builder, entry)
br!(builder, signal_miss)
position!(builder, entry)
br!(builder, signal_miss)

position!(builder, signal_miss)
T_sleep = LLVM.FunctionType(T_nothing, [T_i32])
sleep_f = LLVM.Function(mod, "llvm.amdgcn.s.sleep", T_sleep)
call!(builder, sleep_f, [ConstantInt(Int32(1), JuliaContext())])
T_signal_load = LLVM.FunctionType(T_i64, [T_i64, T_i32])
signal_load = LLVM.Function(mod, "__ockl_hsa_signal_load", T_signal_load)
loaded_value = call!(builder, signal_load, [parameters(llvm_f)[1],
# __ATOMIC_ACQUIRE == 2
ConstantInt(Int32(2), JuliaContext())])
cond = icmp!(builder, LLVM.API.LLVMIntEQ, loaded_value, parameters(llvm_f)[2])
br!(builder, cond, signal_match, signal_miss)
position!(builder, signal_miss)
T_sleep = LLVM.FunctionType(T_nothing, [T_i32])
sleep_f = LLVM.Function(mod, "llvm.amdgcn.s.sleep", T_sleep)
call!(builder, sleep_f, [ConstantInt(Int32(1), ctx)])
T_signal_load = LLVM.FunctionType(T_i64, [T_i64, T_i32])
signal_load = LLVM.Function(mod, "__ockl_hsa_signal_load", T_signal_load)
loaded_value = call!(builder, signal_load, [parameters(llvm_f)[1],
# __ATOMIC_ACQUIRE == 2
ConstantInt(Int32(2), ctx)])
cond = icmp!(builder, LLVM.API.LLVMIntEQ, loaded_value, parameters(llvm_f)[2])
br!(builder, cond, signal_match, signal_miss)

position!(builder, signal_match)
ret!(builder)
end
position!(builder, signal_match)
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
end
end
"Calls the host function stored in `hc` with arguments `args`."
@inline @generated function hostcall!(hc::HostCall{UInt64,RT,AT}, args...) where {RT,AT}
Expand Down
Loading

0 comments on commit 6810f0a

Please sign in to comment.