Skip to content

Commit

Permalink
ForceInline ByteAddressBuffer operations in stdlib (#4003)
Browse files Browse the repository at this point in the history
* ForceInline ByteAddressBuffer operations in stdlib

* fixup
  • Loading branch information
sriramm-nv authored Apr 23, 2024
1 parent 22fbca5 commit 484c1e6
Showing 1 changed file with 38 additions and 13 deletions.
51 changes: 38 additions & 13 deletions source/slang/hlsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ struct ByteAddressBuffer
uint4 Load4(int location, out uint status);

[__readNone]
[ForceInline]
T Load<T>(int location)
{
return __byteAddressBufferLoad<T>(this, location);
Expand Down Expand Up @@ -325,8 +326,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
__intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler))
SamplerComparisonState __getComparisonSampler();

[ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_querylod)]
float CalculateLevelOfDetail(TextureCoord location)
{
Expand All @@ -346,8 +347,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
}

[ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_querylod)]
float CalculateLevelOfDetailUnclamped(TextureCoord location)
{
Expand All @@ -368,6 +369,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location)
{
Expand Down Expand Up @@ -417,6 +419,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp)
Expand All @@ -439,6 +442,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
__target_intrinsic(hlsl)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
Expand All @@ -448,6 +452,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias)
{
Expand All @@ -469,6 +474,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
{
Expand Down Expand Up @@ -599,6 +605,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
{
Expand All @@ -620,6 +627,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
{
Expand All @@ -639,8 +647,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
}

__glsl_extension(GL_ARB_sparse_texture_clamp)
[__readNone]
[ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
{
Expand Down Expand Up @@ -785,6 +794,7 @@ __generic<T:IFloat, Shape: __ITextureShape, let isArray:int, let isMS:int, let s
extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
{
[__readNone]
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location)
{
Expand Down Expand Up @@ -837,6 +847,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset)
{
Expand All @@ -858,6 +869,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}

[__readNone]
[ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp)
Expand All @@ -880,15 +892,17 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

[__readNone]
__target_intrinsic(hlsl)
[__readNone]
[ForceInline]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
{
status = 0;
return Sample(s, location, offset, clamp);
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias)
{
Expand All @@ -910,6 +924,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
{
Expand All @@ -930,7 +945,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

[__readNone] [ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
{
Expand Down Expand Up @@ -960,7 +976,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

[__readNone] [ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
{
Expand All @@ -987,7 +1004,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

[__readNone] [ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
{
Expand All @@ -1013,7 +1031,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

[__readNone] [ForceInline]
[__readNone]
[ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
{
Expand Down Expand Up @@ -1041,6 +1060,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
{
Expand All @@ -1062,6 +1082,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}

[__readNone]
[ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
{
Expand All @@ -1083,8 +1104,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}

__glsl_extension(GL_ARB_sparse_texture_clamp)
[__readNone]
[ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
{
Expand Down Expand Up @@ -2813,7 +2835,6 @@ ${{{{
[__requiresNVAPI]
[ForceInline]
__cuda_sm_version(2.0)
[ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)]
void InterlockedAddF32(uint byteAddress, float valueToAdd)
{
Expand All @@ -2834,7 +2855,6 @@ ${{{{
// Int64 Add
[ForceInline]
__cuda_sm_version(6.0)
[ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
{
Expand All @@ -2858,15 +2878,13 @@ ${{{{
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd);

[ForceInline]
__specialized_for_target(hlsl)
[ForceInline]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd)
{
__atomicAdd(this, byteAddress, __asuint2(valueToAdd));
}

[ForceInline]
__specialized_for_target(glsl)
__specialized_for_target(spirv)
[ForceInline]
Expand Down Expand Up @@ -2906,6 +2924,7 @@ ${{{{
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -2965,6 +2984,7 @@ ${{{{
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -3024,6 +3044,7 @@ ${{{{
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -3063,6 +3084,7 @@ ${{{{
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -3102,6 +3124,7 @@ ${{{{
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -3140,6 +3163,7 @@ ${{{{
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value);

__specialized_for_target(hlsl)
[ForceInline]
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); }

__specialized_for_target(glsl)
Expand Down Expand Up @@ -3255,6 +3279,7 @@ ${{{{
return;
}
}

[ForceInline]
void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
{
Expand Down

0 comments on commit 484c1e6

Please sign in to comment.