diff --git a/data/shader/ao/rtao.csh b/data/shader/ao/rtao.csh index 1140d13aa..1e5681ece 100644 --- a/data/shader/ao/rtao.csh +++ b/data/shader/ao/rtao.csh @@ -91,11 +91,13 @@ void main() { ray.hitID = -1; ray.hitDistance = 0.0; - //float hit = 1.0 - HitAnyShadow(ray, 0.0, uniforms.radius); - //ao += hit; - +#ifdef OPACITY_CHECK + float hit = 1.0 - HitAnyTransparency(ray, 0.0, uniforms.radius); + ao += hit; +#else bool hit = HitAny(ray, 0.0, uniforms.radius); ao += hit ? 1.0 : 0.0; +#endif } diff --git a/data/shader/pathtracer/rayHit.csh b/data/shader/pathtracer/rayHit.csh index 077e1f6ea..f0a84a668 100644 --- a/data/shader/pathtracer/rayHit.csh +++ b/data/shader/pathtracer/rayHit.csh @@ -252,7 +252,7 @@ float CheckVisibility(Surface surface, float lightDistance) { ray.direction = surface.L; ray.origin = surface.P + surface.N * EPSILON; ray.inverseDirection = 1.0 / ray.direction; - return HitAnyShadow(ray, 0.0, lightDistance - 2.0 * EPSILON); + return HitAnyTransparency(ray, 0.0, lightDistance - 2.0 * EPSILON); } else { return 0.0; diff --git a/data/shader/raytracer/buffers.hsh b/data/shader/raytracer/buffers.hsh index 1c9031cf7..140f956b6 100644 --- a/data/shader/raytracer/buffers.hsh +++ b/data/shader/raytracer/buffers.hsh @@ -28,6 +28,6 @@ layout(std430, set = 2, binding = 18) buffer RayBinOffsets { uint rayBinOffsets[]; }; -layout(std430, set = 2, binding = 21) buffer BvhInstances { - BVHInstance bvhInstances[]; +layout(std430, set = 2, binding = 21) buffer Instances { + Instance bvhInstances[]; }; \ No newline at end of file diff --git a/data/shader/raytracer/bvh.hsh b/data/shader/raytracer/bvh.hsh index d298e6202..45e48811f 100644 --- a/data/shader/raytracer/bvh.hsh +++ b/data/shader/raytracer/bvh.hsh @@ -1,4 +1,13 @@ -#extension GL_ARB_shader_ballot : require +#ifdef AE_HARDWARE_RAYTRACING +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_ray_query : enable + +layout(set = 2, binding = 23) uniform accelerationStructureEXT topLevelAS; + +layout(std430, set = 2, binding = 22) buffer GeometryTriangleOffsets { + uint geometryTriangleOffsets[]; +}; +#endif #include #include @@ -7,7 +16,7 @@ #include #define STACK_SIZE 32 -#define TLAS_INVALID 1000000 +#define TLAS_INVALID (STACK_SIZE + 2) struct PackedBVHNode { vec4 data0; @@ -37,6 +46,8 @@ layout (std430, set = 2, binding = 9) buffer BVHTriangles { PackedBVHTriangle bvhTriangles[]; }; +#ifndef AE_HARDWARE_RAYTRACING + layout(std430, set = 2, binding = 10) buffer BlasNodes { PackedBVHNode blasNodes[]; }; @@ -127,6 +138,38 @@ bool CheckLeaf(inout Ray ray, int nodePtr, float tmin, float tmax) { } +void CheckLeafClosestTransparency(inout Ray ray, int nodePtr, float tmin, float tmax) { + + int triPtr = ~nodePtr; + bool endOfNode = false; + + vec3 sol, v0, v1, v2, n; + + while (!endOfNode) { + Triangle tri = UnpackTriangle(triangles[triPtr]); + v0 = tri.v0.xyz; + v1 = tri.v1.xyz; + v2 = tri.v2.xyz; + endOfNode = tri.endOfNode; + float d = 0.0; +#ifdef BACKFACE_CULLING + n = cross(v0 - v1, v0 - v2); + d = dot(n, ray.direction); +#endif + bool intersect = IntersectTriangle(ray, v0, v1, v2, sol); + if (intersect && sol.x > tmin && sol.x < tmax && d <= 0.0 && sol.x < ray.hitDistance) { + float opacity = GetOpacity(tri, sol.yz, 0); + if (opacity > 0.0) { + ray.hitDistance = sol.x; + ray.hitID = triPtr; + ray.hitInstanceID = ray.currentInstanceID; + } + } + triPtr++; + } + +} + float CheckLeafTransparency(inout Ray ray, int nodePtr, float tmin, float tmax, float transparency) { int triPtr = ~nodePtr; @@ -163,7 +206,7 @@ void CheckInstance(inout Ray ray, inout int nodePtr) { int instancePtr = ~nodePtr; - BVHInstance instance = bvhInstances[instancePtr]; + Instance instance = bvhInstances[instancePtr]; // We don't normalize the direction in case there is a scale in the // matrix. In that case the normalization leads to wrong scales in closest hit distance @@ -257,6 +300,87 @@ void HitClosest(inout Ray ray, float tMin, float tMax) { } +void HitClosestTransparency(inout Ray ray, float tMin, float tMax) { + + uint stackPtr = 1u; + int nodePtr = 0; + uint threadID = gl_LocalInvocationIndex; + stack[0][threadID] = nodePtr; + + vec3 originalRayOrigin = ray.origin; + vec3 originalRayDirection = ray.direction; + + ray.hitDistance = tMax; + + if (isnan3(ray.direction)) + return; + + uint tlasIndex = TLAS_INVALID; + + while (stackPtr != 0u) { + if (tlasIndex == TLAS_INVALID || stackPtr < tlasIndex) { + if (tlasIndex != TLAS_INVALID) { + ray.origin = originalRayOrigin; + ray.direction = originalRayDirection; + ray.inverseDirection = 1.0 / ray.direction; + } + tlasIndex = TLAS_INVALID; + if (nodePtr < 0) { + tlasIndex = stackPtr; + + CheckInstance(ray, nodePtr); + } + else { + BVHNode node = UnpackNode(tlasNodes[nodePtr]); + + float hitL = 0.0, hitR = 0.0; + bool intersectL = IntersectAABB(ray, + node.leftAABB, tMin, ray.hitDistance, hitL); + bool intersectR = IntersectAABB(ray, + node.rightAABB, tMin, ray.hitDistance, hitR); + + bool noIntersection = !intersectL && !intersectR; + nodePtr = hitL <= hitR ? node.leftPtr : node.rightPtr; + nodePtr = noIntersection ? stack[--stackPtr][threadID] : nodePtr; + int stackIdx = hitL <= hitR ? node.rightPtr : node.leftPtr; + + if (intersectR && intersectL) { + stack[stackPtr++][threadID] = stackIdx; + } + } + } + else { + if(nodePtr < 0) { + CheckLeafClosestTransparency(ray, nodePtr, tMin, ray.hitDistance); + nodePtr = stack[--stackPtr][threadID]; + } + else { + BVHNode node = UnpackNode(blasNodes[nodePtr]); + + float hitL = 0.0, hitR = 0.0; + bool intersectL = IntersectAABB(ray, + node.leftAABB, tMin, ray.hitDistance, hitL); + bool intersectR = IntersectAABB(ray, + node.rightAABB, tMin, ray.hitDistance, hitR); + + bool noIntersection = !intersectL && !intersectR; + nodePtr = hitL <= hitR ? node.leftPtr : node.rightPtr; + nodePtr = noIntersection ? stack[--stackPtr][threadID] : nodePtr; + int stackIdx = hitL <= hitR ? node.rightPtr : node.leftPtr; + + if (intersectR && intersectL) { + stack[stackPtr++][threadID] = stackIdx; + } + } + } + } + + ray.origin = originalRayOrigin; + ray.direction = originalRayDirection; + ray.inverseDirection = 1.0 / ray.direction; + +} + bool HitAny(inout Ray ray, float tMin, float tMax) { if (isnan3(ray.direction)) @@ -339,7 +463,7 @@ bool HitAny(inout Ray ray, float tMin, float tMax) { } -float HitAnyShadow(inout Ray ray, float tMin, float tMax) { +float HitAnyTransparency(inout Ray ray, float tMin, float tMax) { if (isnan3(ray.direction)) return 1.0; @@ -420,4 +544,145 @@ float HitAnyShadow(inout Ray ray, float tMin, float tMax) { return transparency; -} \ No newline at end of file +} + +#else + +void HitClosest(inout Ray ray, float tMin, float tMax) { + + ray.hitDistance = tMax; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsNoneEXT, 0xFF, + ray.origin, tMin, ray.direction, tMax); + + // Start traversal: return false if traversal is complete + while(rayQueryProceedEXT(rayQuery)) { + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) { + rayQueryConfirmIntersectionEXT(rayQuery); + } + } + + // Returns type of committed (true) intersection + if(rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + ray.hitDistance = rayQueryGetIntersectionTEXT(rayQuery, true); + ray.hitInstanceID = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true); + + int geometryOffset = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true); + int idx = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) + geometryOffset; + + int triangleOffset = int(geometryTriangleOffsets[idx]); + ray.hitID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) + triangleOffset; + } +} + +bool HitAny(inout Ray ray, float tMin, float tMax) { + + ray.hitDistance = tMax; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT, 0xFF, + ray.origin, tMin, ray.direction, tMax); + + // Start traversal: return false if traversal is complete + while(rayQueryProceedEXT(rayQuery)) { + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) { + rayQueryConfirmIntersectionEXT(rayQuery); + } + } + + // Returns type of committed (true) intersection + if(rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) + { + return true; + } + + return false; + +} + +void HitClosestTransparency(inout Ray ray, float tMin, float tMax) { + + ray.hitDistance = tMax; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsNoneEXT, 0xFF, + ray.origin, tMin, ray.direction, tMax); + + // Start traversal: return false if traversal is complete + bool proceed = true; + while(rayQueryProceedEXT(rayQuery)) { + + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) { + int geometryOffset = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, false); + int idx = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false) + geometryOffset; + + int triangleOffset = int(geometryTriangleOffsets[idx]); + int hitID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false) + triangleOffset; + + vec2 barrycentric = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); + + Triangle tri = UnpackTriangle(triangles[hitID]); + + if (GetOpacity(tri, barrycentric, 0) > 0.0) { + rayQueryConfirmIntersectionEXT(rayQuery); + } + + } + + } + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + ray.hitDistance = rayQueryGetIntersectionTEXT(rayQuery, true); + ray.hitInstanceID = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true); + + int geometryOffset = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true); + int idx = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) + geometryOffset; + + int triangleOffset = int(geometryTriangleOffsets[idx]); + ray.hitID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) + triangleOffset; + } + +} + +float HitAnyTransparency(inout Ray ray, float tMin, float tMax) { + + float transparency = 1.0; + + ray.hitDistance = tMax; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT, 0xFF, + ray.origin, tMin, ray.direction, tMax); + + while(rayQueryProceedEXT(rayQuery)) { + + if (rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionTriangleEXT) { + int geometryOffset = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, false); + int idx = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false) + geometryOffset; + + int triangleOffset = int(geometryTriangleOffsets[idx]); + int hitID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false) + triangleOffset; + + vec2 barrycentric = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); + + Triangle tri = UnpackTriangle(triangles[hitID]); + transparency *= (1.0 - GetOpacity(tri, barrycentric, 0)); + + if (transparency < 0.001) { + rayQueryTerminateEXT(rayQuery); + return 0.0; + } + + } + + } + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT) { + return 0.0; + } + + return transparency; + +} +#endif \ No newline at end of file diff --git a/data/shader/raytracer/common.hsh b/data/shader/raytracer/common.hsh index e7e0b2277..68dc288ff 100644 --- a/data/shader/raytracer/common.hsh +++ b/data/shader/raytracer/common.hsh @@ -95,18 +95,19 @@ Light UnpackLight(PackedLight compressed) { Light light; - light.P = compressed.data0.xyz; - light.N = vec3(compressed.N); + light.P = compressed.P.xyz; + light.N = compressed.N.xyz; - light.radiance = vec3(compressed.data0.w, - compressed.data1.w, compressed.N.w); + light.radiance = compressed.color.rgb; - uint data = floatBitsToUint(compressed.data1.x); + uint data = floatBitsToUint(compressed.data.x); light.type = ((data & 0xF0000000u) >> 28u); - light.idx = (data & 0x0FFFFFFFu); - light.pdf = compressed.data1.y; - light.area = compressed.data1.z; + light.triangleIdx = int(data & 0x0FFFFFFFu); + light.instanceIdx = floatBitsToInt(compressed.data.w); + + light.pdf = compressed.data.y; + light.area = compressed.data.z; light.brightness = dot(light.radiance, vec3(0.33333)); return light; diff --git a/data/shader/raytracer/direct.hsh b/data/shader/raytracer/direct.hsh index 0a9d36ae2..f407788d7 100644 --- a/data/shader/raytracer/direct.hsh +++ b/data/shader/raytracer/direct.hsh @@ -1,5 +1,6 @@ #include #include +#include #include <../common/random.hsh> int GetLightCount() { @@ -56,7 +57,9 @@ void SampleLight(Light light, inout Surface surface, float seed0, float seed1, float r1 = random(seed0, seed1); if (light.type == uint(TRIANGLE_LIGHT)) { - Triangle tri = UnpackTriangle(triangles[light.idx]); + Instance instance = bvhInstances[light.instanceIdx]; + Triangle tri = UnpackTriangle(triangles[light.triangleIdx + instance.triangleOffset]); + TransformTriangle(tri, instance); lightSample = SampleTriangleLight(light, tri, surface, r0, r1); } diff --git a/data/shader/raytracer/structures.hsh b/data/shader/raytracer/structures.hsh index 2f5b3ede7..607db57fb 100644 --- a/data/shader/raytracer/structures.hsh +++ b/data/shader/raytracer/structures.hsh @@ -130,9 +130,10 @@ struct RaytraceMaterial { }; struct PackedLight { - vec4 data0; - vec4 data1; + vec4 P; vec4 N; + vec4 color; + vec4 data; }; struct Light { @@ -142,7 +143,8 @@ struct Light { vec3 radiance; uint type; - uint idx; + int triangleIdx; + int instanceIdx; float pdf; float area; @@ -156,14 +158,14 @@ struct LightSample { float pdf; }; -struct BVHInstance { +struct Instance { mat3x4 inverseMatrix; int blasOffset; - + int triangleOffset; + int padding0; int padding1; - int padding2; }; layout(push_constant) uniform constants { diff --git a/data/shader/raytracer/surface.hsh b/data/shader/raytracer/surface.hsh index bbef719d5..4c8003124 100644 --- a/data/shader/raytracer/surface.hsh +++ b/data/shader/raytracer/surface.hsh @@ -27,7 +27,7 @@ Material GetTriangleMaterial(Triangle tri, out RaytraceMaterial rayMat) { return mat; } -void TransformTriangle(inout Triangle tri, BVHInstance instance) { +void TransformTriangle(inout Triangle tri, Instance instance) { mat4 matrix = inverse(mat4(transpose(instance.inverseMatrix))); @@ -50,7 +50,7 @@ Surface GetSurfaceParameters(Triangle tri, Ray ray, bool useNormalMaps, out bool RaytraceMaterial rayMat; Material mat = GetTriangleMaterial(tri, rayMat); - BVHInstance instance = bvhInstances[ray.hitInstanceID]; + Instance instance = bvhInstances[ray.hitInstanceID]; TransformTriangle(tri, instance); // The ray doesn't provide us with the barrycentric coordinates diff --git a/data/shader/raytracer/traceClosest.csh b/data/shader/raytracer/traceClosest.csh index cf166fa13..7388a1d63 100644 --- a/data/shader/raytracer/traceClosest.csh +++ b/data/shader/raytracer/traceClosest.csh @@ -20,7 +20,11 @@ void main() { ray.hitID = -1; ray.hitDistance = 0.0; // Find any triangle in the BVH +#ifdef OPACITY_CHECK + HitClosestTransparency(ray, offset, INF); +#else HitClosest(ray, offset, INF); +#endif uint writeOffset = PushConstants.rayBufferOffset * PushConstants.rayBufferSize; rays[index + writeOffset] = PackRay(ray); diff --git a/data/shader/reflection/rtreflection.csh b/data/shader/reflection/rtreflection.csh index aa9d6ee10..b629f8fb2 100644 --- a/data/shader/reflection/rtreflection.csh +++ b/data/shader/reflection/rtreflection.csh @@ -51,7 +51,7 @@ layout(std140, set = 3, binding = 9) uniform UniformBuffer { vec3 EvaluateHit(inout Ray ray); vec3 EvaluateDirectLight(inout Surface surface); -bool CheckVisibility(Surface surface, float lightDistance); +float CheckVisibility(Surface surface, float lightDistance); void main() { @@ -127,7 +127,12 @@ void main() { vec3 radiance = vec3(0.0); if (material.roughness < 0.9) { - HitClosest(ray, 0.0, INF); +#ifdef OPACITY_CHECK + HitClosestTransparency(ray, 10e-9, INF); +#else + HitClosest(ray, 10e-9, INF); +#endif + radiance = EvaluateHit(ray); } else { @@ -211,24 +216,24 @@ vec3 EvaluateDirectLight(inout Surface surface) { radiance *= CalculateShadowWorldSpace(uniforms.shadow, cascadeMaps, surface.P, surface.geometryNormal, saturate(dot(surface.L, surface.geometryNormal))); #else - radiance *= CheckVisibility(surface, lightDistance) ? 1.0 : 0.0; + radiance *= CheckVisibility(surface, lightDistance); #endif return reflectance * radiance * surface.NdotL / lightPdf; } -bool CheckVisibility(Surface surface, float lightDistance) { +float CheckVisibility(Surface surface, float lightDistance) { if (surface.NdotL > 0.0) { Ray ray; ray.direction = surface.L; - ray.origin = surface.P + surface.N * 2.0 * EPSILON; + ray.origin = surface.P + surface.N * EPSILON; ray.inverseDirection = 1.0 / ray.direction; - return HitAny(ray, 0.0, lightDistance - 4.0 * EPSILON) == false; + return HitAnyTransparency(ray, 0.0, lightDistance - 2.0 * EPSILON); } else { - return false; + return 0.0; } } \ No newline at end of file diff --git a/src/demo/App.cpp b/src/demo/App.cpp index 2db9f125a..d3878d886 100644 --- a/src/demo/App.cpp +++ b/src/demo/App.cpp @@ -352,6 +352,7 @@ void App::Render(float deltaTime) { ImGui::Checkbox("Visualize probes##DDGI", &volume->debug); ImGui::Checkbox("Sample emissives##DDGI", &volume->sampleEmissives); ImGui::Checkbox("Use shadow map##DDGI", &volume->useShadowMap); + ImGui::Checkbox("Opacity check##DDGI", &volume->opacityCheck); if (ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenDisabled)) { ImGui::SetTooltip("Uses the shadow map to calculate shadows in reflections. \ This is only possible when cascaded shadow maps are not used."); @@ -436,6 +437,7 @@ void App::Render(float deltaTime) { ImGui::Checkbox("Debug##Ao", &debugAo); ImGui::Checkbox("Enable ambient occlusion##Ao", &ao->enable); ImGui::Checkbox("Enable raytracing (preview)##Ao", &ao->rt); + ImGui::Checkbox("Opacity check##Ao", &ao->opacityCheck); ImGui::SliderFloat("Radius##Ao", &ao->radius, 0.0f, 10.0f); ImGui::SliderFloat("Strength##Ao", &ao->strength, 0.0f, 20.0f, "%.3f", ImGuiSliderFlags_Logarithmic); //ImGui::SliderInt("Sample count##Ao", &ao->s, 0.0f, 20.0f, "%.3f", 2.0f); @@ -450,6 +452,7 @@ void App::Render(float deltaTime) { This is only possible when cascaded shadow maps are not used."); } ImGui::Checkbox("Enable GI in reflection", &reflection->gi); + ImGui::Checkbox("Opacity check##Reflection", &reflection->opacityCheck); // ImGui::SliderInt("Sample count", &reflection->sampleCount, 1, 32); ImGui::SliderFloat("Radiance Limit##Reflection", &reflection->radianceLimit, 0.0f, 10.0f); ImGui::SliderFloat("Bias##Reflection", &reflection->bias, 0.0f, 1.0f); @@ -787,7 +790,7 @@ bool App::LoadScene() { "sponza/sponza.obj", ModelLoader::LoadMesh, false, transform, 2048 ); meshes.push_back(mesh); - + transform = glm::scale(glm::mat4(1.0f), glm::vec3(1.f)); mesh = Atlas::ResourceManager::GetResourceWithLoaderAsync( "metallicwall.gltf", ModelLoader::LoadMesh, false, transform, 2048 @@ -799,6 +802,7 @@ bool App::LoadScene() { "chromesphere.gltf", ModelLoader::LoadMesh, false, transform, 2048 ); meshes.push_back(mesh); + // Other scene related settings apart from the mesh directionalLight->direction = glm::vec3(0.0f, -1.0f, 0.33f); @@ -977,8 +981,6 @@ bool App::LoadScene() { ); meshes.push_back(mesh); - sky = Atlas::Texture::Cubemap("environment.hdr", 2048); - // Other scene related settings apart from the mesh directionalLight->direction = glm::vec3(0.0f, -1.0f, 0.33f); directionalLight->intensity = 100.0f; @@ -1091,7 +1093,7 @@ void App::CheckLoadScene() { for (auto& mesh : meshes) { mesh->invertUVs = true; - mesh->cullBackFaces = false; + mesh->cullBackFaces = true; } if (sceneSelection == CORNELL) { diff --git a/src/engine/buffer/IndexBuffer.cpp b/src/engine/buffer/IndexBuffer.cpp index 1ccd7f2d1..ef7e44465 100644 --- a/src/engine/buffer/IndexBuffer.cpp +++ b/src/engine/buffer/IndexBuffer.cpp @@ -49,11 +49,17 @@ namespace Atlas { auto sizeInBytes = elementCount * elementSize; Graphics::BufferDesc desc { - .usageFlags = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .usageFlags = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, .domain = Graphics::BufferDomain::Device, .data = data, .size = sizeInBytes }; + + if (device->support.hardwareRayTracing) { + desc.usageFlags |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; + } + buffer = device->CreateBuffer(desc); } diff --git a/src/engine/buffer/VertexBuffer.cpp b/src/engine/buffer/VertexBuffer.cpp index 0f4a8c48e..c425a83c6 100644 --- a/src/engine/buffer/VertexBuffer.cpp +++ b/src/engine/buffer/VertexBuffer.cpp @@ -46,11 +46,17 @@ namespace Atlas { auto sizeInBytes = elementCount * elementSize; Graphics::BufferDesc desc { - .usageFlags = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .usageFlags = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, .domain = Graphics::BufferDomain::Device, .data = data, .size = sizeInBytes }; + + if (device->support.hardwareRayTracing) { + desc.usageFlags |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; + } + buffer = device->CreateBuffer(desc); } diff --git a/src/engine/graphics/ASBuilder.cpp b/src/engine/graphics/ASBuilder.cpp new file mode 100644 index 000000000..beea37542 --- /dev/null +++ b/src/engine/graphics/ASBuilder.cpp @@ -0,0 +1,248 @@ +#include "ASBuilder.h" + +#include "GraphicsDevice.h" + +namespace Atlas { + + namespace Graphics { + + BLASDesc ASBuilder::GetBLASDescForTriangleGeometry(Ref vertexBuffer, Ref indexBuffer, + size_t vertexCount, size_t vertexSize, size_t indexSize, std::vector regions) { + + VkDeviceAddress vertexAddress = vertexBuffer->GetDeviceAddress(); + VkDeviceAddress indexAddress = indexBuffer->GetDeviceAddress(); + + VkAccelerationStructureGeometryTrianglesDataKHR trianglesData = {}; + trianglesData.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + // Vertex data + trianglesData.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT; + trianglesData.vertexData.deviceAddress = vertexAddress; + trianglesData.vertexStride = vertexSize; + trianglesData.maxVertex = vertexCount; + // Index data + trianglesData.indexType = indexSize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + trianglesData.indexData.deviceAddress = indexAddress; + + BLASDesc desc; + + for (auto& region : regions) { + VkAccelerationStructureGeometryKHR geometry = {}; + geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + geometry.flags = region.opaque ? VK_GEOMETRY_OPAQUE_BIT_KHR + : VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR; + geometry.geometry.triangles = trianglesData; + + uint32_t triangleCount = uint32_t(region.indexCount / 3); + + VkAccelerationStructureBuildRangeInfoKHR buildRange; + buildRange.firstVertex = 0; + buildRange.primitiveCount = triangleCount; + buildRange.primitiveOffset = uint32_t(region.indexOffset * indexSize); + buildRange.transformOffset = 0; + + desc.geometries.push_back(geometry); + desc.buildRanges.push_back(buildRange); + } + + return desc; + + } + + void ASBuilder::BuildBLAS(std::vector> &blases) { + + auto device = GraphicsDevice::DefaultDevice; + + size_t maxScratchSize = 0; + size_t compactionCount = 0; + + for(size_t i = 0; i < blases.size(); i++) { + maxScratchSize = std::max(maxScratchSize, size_t(blases[i]->sizesInfo.buildScratchSize)); + compactionCount += (blases[i]->buildGeometryInfo.flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ? 1 : 0; + } + + auto scratchBufferDesc = BufferDesc { + .usageFlags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .domain = BufferDomain::Device, + .size = maxScratchSize, + .alignment = device->accelerationStructureProperties.minAccelerationStructureScratchOffsetAlignment + }; + auto scratchBuffer = device->CreateBuffer(scratchBufferDesc); + + Ref queryPool = nullptr; + if (compactionCount == blases.size()) { + auto queryPoolDesc = QueryPoolDesc{ + .queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, + .queryCount = uint32_t(compactionCount) + }; + queryPool = device->CreateQueryPool(queryPoolDesc); + } + + size_t batchSize = 0; + size_t batchSizeLimit = 256000000; + + std::vector batchIndices; + for (size_t i = 0; i < blases.size(); i++) { + + batchIndices.push_back(uint32_t(i)); + batchSize += blases[i]->sizesInfo.accelerationStructureSize; + + if (batchSize >= batchSizeLimit || i == blases.size() - 1) { + + if (queryPool) { + queryPool->Reset(); + } + + BuildBLASBatch(batchIndices, blases, scratchBuffer, queryPool); + + if (queryPool) { + CompactBLASBatch(batchIndices, blases, queryPool); + } + + batchIndices.clear(); + batchSize = 0; + + } + + } + + } + + Ref ASBuilder::BuildTLAS(Ref &tlas, + std::vector &instances) { + + auto device = GraphicsDevice::DefaultDevice; + + auto commandList = device->GetCommandList(GraphicsQueue); + + commandList->BeginCommands(); + + BufferDesc desc = { + .usageFlags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + .domain = BufferDomain::Host, + .data = instances.data(), + .size = sizeof(VkAccelerationStructureInstanceKHR) * instances.size(), + }; + auto instanceBuffer = device->CreateBuffer(desc); + + tlas->Allocate(instanceBuffer->GetDeviceAddress(), uint32_t(instances.size()), false); + + commandList->MemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + + auto scratchBufferDesc = BufferDesc { + .usageFlags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .domain = BufferDomain::Device, + .size = tlas->sizesInfo.buildScratchSize, + .alignment = device->accelerationStructureProperties.minAccelerationStructureScratchOffsetAlignment + }; + auto scratchBuffer = device->CreateBuffer(scratchBufferDesc); + + auto buildInfo = tlas->buildGeometryInfo; + buildInfo.srcAccelerationStructure = VK_NULL_HANDLE; + buildInfo.dstAccelerationStructure = tlas->accelerationStructure; + buildInfo.scratchData.deviceAddress = scratchBuffer->GetDeviceAddress(); + + commandList->BuildTLAS(tlas, buildInfo); + + commandList->EndCommands(); + + device->SubmitCommandList(commandList); + + return instanceBuffer; + + } + + void ASBuilder::BuildBLASBatch(const std::vector &batchIndices, + std::vector> &blases, Ref& scratchBuffer, Ref& queryPool) { + + auto device = GraphicsDevice::DefaultDevice; + + auto commandList = device->GetCommandList(GraphicsQueue, true); + + commandList->BeginCommands(); + + VkDeviceAddress scratchAddress = scratchBuffer->GetDeviceAddress(); + + uint32_t poolCounter = 0; + for (const auto idx : batchIndices) { + auto& blas = blases[idx]; + + blas->Allocate(); + + auto buildInfo = blas->buildGeometryInfo; + buildInfo.dstAccelerationStructure = blas->accelerationStructure; + buildInfo.scratchData.deviceAddress = scratchAddress; + + commandList->BuildBLAS(blas, buildInfo); + + commandList->MemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + + if (queryPool) { + vkCmdWriteAccelerationStructuresPropertiesKHR(commandList->commandBuffer, 1, + &blas->accelerationStructure, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, + queryPool->pool, poolCounter++); + } + } + + commandList->EndCommands(); + + device->FlushCommandList(commandList); + + } + + void ASBuilder::CompactBLASBatch(const std::vector& batchIndices, + std::vector>& blases, Ref& queryPool) { + + auto device = GraphicsDevice::DefaultDevice; + + auto commandList = device->GetCommandList(GraphicsQueue, true); + + commandList->BeginCommands(); + + std::vector compactSizes(batchIndices.size()); + queryPool->GetResult(0, uint32_t(batchIndices.size()), batchIndices.size() * sizeof(size_t), + compactSizes.data(), sizeof(size_t), VK_QUERY_RESULT_WAIT_BIT); + + uint32_t poolCounter = 0; + for (const auto idx : batchIndices) { + auto& blas = blases[idx]; + auto compactedSize = compactSizes[poolCounter++]; + + BLASDesc desc; + desc.geometries = blas->geometries; + desc.buildRanges = blas->buildRanges; + desc.flags = blas->flags; + + auto compactedBlas = device->CreateBLAS(desc); + + compactedBlas->sizesInfo.accelerationStructureSize = compactedSize; + + compactedBlas->Allocate(); + + // Copy generated BLAS into compacted one + VkCopyAccelerationStructureInfoKHR copyInfo = {}; + copyInfo.sType = VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR; + copyInfo.src = blas->accelerationStructure; + copyInfo.dst = compactedBlas->accelerationStructure; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR; + + vkCmdCopyAccelerationStructureKHR(commandList->commandBuffer, ©Info); + + blases[idx] = compactedBlas; + + } + + commandList->EndCommands(); + + device->FlushCommandList(commandList); + + } + + } + +} \ No newline at end of file diff --git a/src/engine/graphics/ASBuilder.h b/src/engine/graphics/ASBuilder.h new file mode 100644 index 000000000..862e9a4f3 --- /dev/null +++ b/src/engine/graphics/ASBuilder.h @@ -0,0 +1,46 @@ +#ifndef GRAPHICSASBUILDER_H +#define GRAPHICSASBUILDER_H + +#include "Common.h" + +#include "BLAS.h" +#include "TLAS.h" +#include "QueryPool.h" + +namespace Atlas { + + namespace Graphics { + + struct ASGeometryRegion { + size_t indexCount = 0; + size_t indexOffset; + + bool opaque = true; + }; + + class ASBuilder { + + public: + ASBuilder() = default; + + BLASDesc GetBLASDescForTriangleGeometry(Ref vertexBuffer, Ref indexBuffer, + size_t vertexCount, size_t vertexSize, size_t indexSize, std::vector regions); + + void BuildBLAS(std::vector>& blases); + + Ref BuildTLAS(Ref& tlas, std::vector& instances); + + private: + void BuildBLASBatch(const std::vector& batchIndices, + std::vector>& blases, Ref& scratchBuffer, Ref& queryPool); + + void CompactBLASBatch(const std::vector& batchIndices, + std::vector>& blases, Ref& queryPool); + + }; + + } + +} + +#endif \ No newline at end of file diff --git a/src/engine/graphics/BLAS.cpp b/src/engine/graphics/BLAS.cpp new file mode 100644 index 000000000..c5fdd9aa7 --- /dev/null +++ b/src/engine/graphics/BLAS.cpp @@ -0,0 +1,69 @@ +#include "BLAS.h" + +#include "GraphicsDevice.h" + +namespace Atlas { + + namespace Graphics { + + BLAS::BLAS(GraphicsDevice* device, BLASDesc desc) : device(device), geometries(desc.geometries), + buildRanges(desc.buildRanges), flags(desc.flags) { + + buildGeometryInfo = {}; + buildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + buildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + buildGeometryInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + buildGeometryInfo.flags = desc.flags | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR; + buildGeometryInfo.geometryCount = uint32_t(geometries.size()); + buildGeometryInfo.pGeometries = geometries.data(); + + sizesInfo = {}; + sizesInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + rangeInfo = buildRanges.data(); + + std::vector maxPrimitivesCount; + for (auto& range : buildRanges) { + maxPrimitivesCount.push_back(range.primitiveCount); + } + + vkGetAccelerationStructureBuildSizesKHR(device->device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &buildGeometryInfo, maxPrimitivesCount.data(), &sizesInfo); + + } + + BLAS::~BLAS() { + + vkDestroyAccelerationStructureKHR(device->device, accelerationStructure, nullptr); + + } + + void BLAS::Allocate() { + + BufferDesc desc = { + .usageFlags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + .domain = BufferDomain::Device, + .size = sizesInfo.accelerationStructureSize + }; + buffer = device->CreateBuffer(desc); + + VkAccelerationStructureCreateInfoKHR createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + createInfo.size = sizesInfo.accelerationStructureSize; + createInfo.buffer = buffer->buffer; + + VK_CHECK(vkCreateAccelerationStructureKHR(device->device, &createInfo, nullptr, &accelerationStructure)) + + } + + VkDeviceAddress BLAS::GetDeviceAddress() { + + return buffer->GetDeviceAddress(); + + } + + } + +} \ No newline at end of file diff --git a/src/engine/graphics/BLAS.h b/src/engine/graphics/BLAS.h new file mode 100644 index 000000000..a8257e05f --- /dev/null +++ b/src/engine/graphics/BLAS.h @@ -0,0 +1,59 @@ +#ifndef AE_GRAPHICSBLAS_H +#define AE_GRAPHICSBLAS_H + +#include "Common.h" +#include "Buffer.h" + +#include + +namespace Atlas { + + namespace Graphics { + + class GraphicsDevice; + class MemoryManager; + class ASBuilder; + + struct BLASDesc { + VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + + std::vector geometries; + std::vector buildRanges; + }; + + class BLAS { + + friend GraphicsDevice; + friend ASBuilder; + + public: + BLAS(GraphicsDevice* device, BLASDesc desc); + + ~BLAS(); + + void Allocate(); + + VkDeviceAddress GetDeviceAddress(); + + VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo; + VkAccelerationStructureBuildSizesInfoKHR sizesInfo; + VkAccelerationStructureBuildRangeInfoKHR* rangeInfo; + + Ref buffer; + VkAccelerationStructureKHR accelerationStructure = VK_NULL_HANDLE; + + private: + GraphicsDevice* device; + + std::vector geometries; + std::vector buildRanges; + + VkBuildAccelerationStructureFlagsKHR flags; + + }; + + } + +} + +#endif \ No newline at end of file diff --git a/src/engine/graphics/Buffer.cpp b/src/engine/graphics/Buffer.cpp index 5eae8b8d3..2d7f2489b 100644 --- a/src/engine/graphics/Buffer.cpp +++ b/src/engine/graphics/Buffer.cpp @@ -8,7 +8,7 @@ namespace Atlas { namespace Graphics { Buffer::Buffer(GraphicsDevice *device, const BufferDesc& desc) : usageFlags(desc.usageFlags), - domain(desc.domain), size(desc.size), memoryManager(device->memoryManager) { + domain(desc.domain), size(desc.size), alignment(desc.alignment), memoryManager(device->memoryManager) { VkBufferCreateInfo bufferInfo = {}; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; @@ -26,8 +26,14 @@ namespace Atlas { allocationCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; } - VK_CHECK(vmaCreateBuffer(memoryManager->allocator, &bufferInfo, - &allocationCreateInfo, &buffer, &allocation, nullptr)) + if (alignment == 0) { + VK_CHECK(vmaCreateBuffer(memoryManager->allocator, &bufferInfo, + &allocationCreateInfo, &buffer, &allocation, nullptr)) + } + else { + VK_CHECK(vmaCreateBufferWithAlignment(memoryManager->allocator, &bufferInfo, + &allocationCreateInfo, alignment, &buffer, &allocation, nullptr)) + } if (desc.data) SetData(desc.data, 0, desc.size); @@ -80,11 +86,21 @@ namespace Atlas { } + VkDeviceAddress Buffer::GetDeviceAddress() { + + VkBufferDeviceAddressInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + info.buffer = buffer; + + return vkGetBufferDeviceAddress(memoryManager->device->device, &info); + + } + size_t Buffer::GetAlignedSize(size_t size) { auto device = GraphicsDevice::DefaultDevice; - size_t minUboAlignment = device->deviceProperties.limits.minUniformBufferOffsetAlignment; + size_t minUboAlignment = device->deviceProperties.properties.limits.minUniformBufferOffsetAlignment; size_t alignedSize = size; if (minUboAlignment > 0) { alignedSize = (alignedSize + minUboAlignment - 1) & ~(minUboAlignment - 1); diff --git a/src/engine/graphics/Buffer.h b/src/engine/graphics/Buffer.h index 239731fe8..8749cadc6 100644 --- a/src/engine/graphics/Buffer.h +++ b/src/engine/graphics/Buffer.h @@ -31,6 +31,7 @@ namespace Atlas { void* data = nullptr; size_t size; + size_t alignment = 0; }; struct BufferAllocation { @@ -53,6 +54,8 @@ namespace Atlas { void Unmap(); + VkDeviceAddress GetDeviceAddress(); + VkBuffer buffer; VmaAllocation allocation; VkAccessFlags accessMask = VK_ACCESS_MEMORY_READ_BIT | @@ -62,6 +65,7 @@ namespace Atlas { BufferDomain domain; const size_t size = 0; + const size_t alignment = 0; private: MemoryManager* memoryManager; diff --git a/src/engine/graphics/CommandList.cpp b/src/engine/graphics/CommandList.cpp index 1bedf4d59..86d0629f1 100644 --- a/src/engine/graphics/CommandList.cpp +++ b/src/engine/graphics/CommandList.cpp @@ -395,6 +395,7 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; descriptorBindingData.changed[set] = true; } else { @@ -406,6 +407,7 @@ namespace Atlas { descriptorBindingData.dynamicBuffers[set][binding] = {nullptr, 0}; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; descriptorBindingData.changed[set] = true; } @@ -427,6 +429,7 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; } @@ -446,6 +449,7 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; descriptorBindingData.changed[set] = true; } else { @@ -457,6 +461,7 @@ namespace Atlas { descriptorBindingData.dynamicBuffers[set][binding] = {nullptr, 0}; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; descriptorBindingData.changed[set] = true; } } @@ -477,6 +482,7 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.sampledImages[set][binding] = {nullptr, nullptr}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; } @@ -492,6 +498,7 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.dynamicBuffers[set][binding] = {nullptr, 0}; descriptorBindingData.sampledImages[set][binding] = { nullptr, nullptr }; + descriptorBindingData.tlases[set][binding] = nullptr; descriptorBindingData.changed[set] = true; } @@ -509,6 +516,24 @@ namespace Atlas { descriptorBindingData.buffers[set][binding] = nullptr; descriptorBindingData.dynamicBuffers[set][binding] = {nullptr, 0}; descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.tlases[set][binding] = nullptr; + descriptorBindingData.changed[set] = true; + + } + + void CommandList::BindTLAS(const Ref &tlas, uint32_t set, uint32_t binding) { + + assert(set < DESCRIPTOR_SET_COUNT && "Descriptor set not allowed for use"); + assert(binding < BINDINGS_PER_DESCRIPTOR_SET && "The binding point is not allowed for use"); + + if (descriptorBindingData.tlases[set][binding] == tlas.get()) + return; + + descriptorBindingData.tlases[set][binding] = tlas.get(); + descriptorBindingData.buffers[set][binding] = nullptr; + descriptorBindingData.dynamicBuffers[set][binding] = {nullptr, 0}; + descriptorBindingData.images[set][binding] = nullptr; + descriptorBindingData.sampledImages[set][binding] = { nullptr, nullptr }; descriptorBindingData.changed[set] = true; } @@ -598,6 +623,19 @@ namespace Atlas { } + void CommandList::MemoryBarrier(VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, + VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask) { + + VkMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + barrier.srcAccessMask = srcAccessMask; + barrier.dstAccessMask = dstAccessMask; + + vkCmdPipelineBarrier(commandBuffer, srcStageMask, dstStageMask, 0, 1, + &barrier, 0, nullptr, 0, nullptr); + + } + void CommandList::PipelineBarrier(VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask) { vkCmdPipelineBarrier(commandBuffer, srcStageMask, dstStageMask, 0, 0, @@ -770,11 +808,24 @@ namespace Atlas { } + void CommandList::BuildBLAS(const Ref &blas, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo) { + + vkCmdBuildAccelerationStructuresKHR(commandBuffer, 1, &buildInfo, &blas->rangeInfo); + + } + + void CommandList::BuildTLAS(const Ref &tlas, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo) { + + vkCmdBuildAccelerationStructuresKHR(commandBuffer, 1, &buildInfo, &tlas->rangeInfo); + + } + void CommandList::BindDescriptorSets() { VkWriteDescriptorSet setWrites[2 * BINDINGS_PER_DESCRIPTOR_SET]; VkDescriptorBufferInfo bufferInfos[2 * BINDINGS_PER_DESCRIPTOR_SET]; VkDescriptorImageInfo imageInfos[2 * BINDINGS_PER_DESCRIPTOR_SET]; + VkWriteDescriptorSetAccelerationStructureKHR tlasInfos[2 * BINDINGS_PER_DESCRIPTOR_SET]; uint32_t dynamicOffsets[2 * BINDINGS_PER_DESCRIPTOR_SET]; @@ -924,6 +975,35 @@ namespace Atlas { setWrite.pImageInfo = &imageInfo; } + // TOP-LEVEL ACCELERATION STRUCTURES + for (uint32_t j = 0; j < BINDINGS_PER_DESCRIPTOR_SET; j++) { + if (!descriptorBindingData.tlases[i][j]) continue; + const auto& binding = shader->sets[i].bindings[j]; + // This probably is an old binding, which isn't used by this shader + if (!binding.valid) continue; + // Check that the descriptor types match up + const auto descriptorType = binding.layoutBinding.descriptorType; + if (descriptorType != VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) + continue; + + auto tlas = descriptorBindingData.tlases[i][j]; + + auto& tlasInfo = tlasInfos[bindingCounter]; + tlasInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + tlasInfo.pNext = nullptr; + tlasInfo.accelerationStructureCount = 1; + tlasInfo.pAccelerationStructures = &tlas->accelerationStructure; + + auto& setWrite = setWrites[bindingCounter++]; + setWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + setWrite.dstBinding = j; + setWrite.dstArrayElement = binding.arrayElement; + setWrite.dstSet = descriptorBindingData.sets[i]; + setWrite.descriptorCount = 1; + setWrite.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + setWrite.pNext = &tlasInfo; + } + vkUpdateDescriptorSets(device, bindingCounter, setWrites, 0, nullptr); } diff --git a/src/engine/graphics/CommandList.h b/src/engine/graphics/CommandList.h index adfa74acc..0cee54744 100644 --- a/src/engine/graphics/CommandList.h +++ b/src/engine/graphics/CommandList.h @@ -90,6 +90,8 @@ namespace Atlas { void BindImage(const Ref& image, const Ref& sampler, uint32_t set, uint32_t binding); + void BindTLAS(const Ref& tlas, uint32_t set, uint32_t binding); + void ResetBindings(); void ImageMemoryBarrier(const Ref& image, VkImageLayout newLayout, VkAccessFlags newAccessMask, @@ -113,6 +115,10 @@ namespace Atlas { void BufferMemoryBarrier(BufferBarrier& barrier, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask); + void MemoryBarrier(VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + void PipelineBarrier(VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); @@ -146,6 +152,10 @@ namespace Atlas { void GenerateMipMap(const Ref& image); + void BuildBLAS(const Ref& blas, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo); + + void BuildTLAS(const Ref& tlas, VkAccelerationStructureBuildGeometryInfoKHR& buildInfo); + VkCommandPool commandPool; VkCommandBuffer commandBuffer; VkFence fence; @@ -172,6 +182,7 @@ namespace Atlas { std::pair dynamicBuffers[DESCRIPTOR_SET_COUNT][BINDINGS_PER_DESCRIPTOR_SET]; Image* images[DESCRIPTOR_SET_COUNT][BINDINGS_PER_DESCRIPTOR_SET]; std::pair sampledImages[DESCRIPTOR_SET_COUNT][BINDINGS_PER_DESCRIPTOR_SET]; + TLAS* tlases[DESCRIPTOR_SET_COUNT][BINDINGS_PER_DESCRIPTOR_SET]; VkDescriptorSet sets[DESCRIPTOR_SET_COUNT]; bool changed[DESCRIPTOR_SET_COUNT]; @@ -191,6 +202,7 @@ namespace Atlas { dynamicBuffers[i][j] = { nullptr, 0u }; images[i][j] = nullptr; sampledImages[i][j] = { nullptr, nullptr }; + tlases[i][j] = nullptr; } sets[i] = nullptr; changed[i] = true; @@ -203,6 +215,7 @@ namespace Atlas { dynamicBuffers[set][j] = { nullptr, 0u }; images[set][j] = nullptr; sampledImages[set][j] = { nullptr, nullptr }; + tlases[set][j] = nullptr; } sets[set] = nullptr; changed[set] = true; diff --git a/src/engine/graphics/GraphicsDevice.cpp b/src/engine/graphics/GraphicsDevice.cpp index 1378ebfb9..5a42f64a4 100644 --- a/src/engine/graphics/GraphicsDevice.cpp +++ b/src/engine/graphics/GraphicsDevice.cpp @@ -1,5 +1,6 @@ #include "GraphicsDevice.h" #include "Instance.h" +#include "StructureChainBuilder.h" #include "../EngineInstance.h" #include @@ -27,14 +28,19 @@ namespace Atlas { }; std::vector optionalExtensions = { + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, + VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, + VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, + VK_KHR_RAY_QUERY_EXTENSION_NAME #ifdef AE_BUILDTYPE_DEBUG - VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME + , VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME #endif }; SelectPhysicalDevice(instance->instance, surface->GetNativeSurface(), requiredExtensions, optionalExtensions); - vkGetPhysicalDeviceProperties(physicalDevice, &deviceProperties); + + GetPhysicalDeviceProperties(physicalDevice); auto optionalExtensionOverlap = CheckDeviceOptionalExtensionSupport(physicalDevice, optionalExtensions); requiredExtensions.insert(requiredExtensions.end(), optionalExtensionOverlap.begin(), @@ -44,39 +50,12 @@ namespace Atlas { BuildPhysicalDeviceFeatures(physicalDevice); - VkDeviceCreateInfo createInfo{}; - createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - createInfo.pQueueCreateInfos = queueCreateInfos.data(); - createInfo.queueCreateInfoCount = uint32_t(queueCreateInfos.size()); - - createInfo.pEnabledFeatures = nullptr; - createInfo.enabledExtensionCount = uint32_t(requiredExtensions.size()); - createInfo.ppEnabledExtensionNames = requiredExtensions.data(); - - if (enableValidationLayers) { - createInfo.enabledLayerCount = uint32_t(instance->layerNames.size()); - createInfo.ppEnabledLayerNames = instance->layerNames.data(); - } else { - createInfo.enabledLayerCount = 0; - } - #ifdef AE_OS_MACOS - VkPhysicalDevicePortabilitySubsetFeaturesKHR portabilityFeatures = {}; - // This is hacked since I can't get it to work otherwise - // See VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR in vulkan_core.h - portabilityFeatures.sType = static_cast(1000163000); - portabilityFeatures.mutableComparisonSamplers = VK_TRUE; - portabilityFeatures.pNext = &features; - - // This feature struct is the last one in the pNext chain for now - createInfo.pNext = &portabilityFeatures; - setenv("MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS", "1", 1); -#else - createInfo.pNext = &features; #endif - VK_CHECK(vkCreateDevice(physicalDevice, &createInfo, nullptr, &device)) + // Uses the physical device structures generated above + CreateDevice(queueCreateInfos, requiredExtensions, enableValidationLayers); for (auto& queueFamily : queueFamilyIndices.families) { for (auto& queue : queueFamily.queues) { @@ -110,6 +89,16 @@ namespace Atlas { // so delete all of the memoryManager content before cleaning the rest memoryManager->DestroyAllImmediate(); + for (auto& tlasRef : tlases) { + assert(tlasRef.use_count() == 1 && "TLAS wasn't deallocated or allocated wrongly"); + tlasRef.reset(); + } + + for (auto& blasRef : blases) { + assert(blasRef.use_count() == 1 && "BLAS wasn't deallocated or allocated wrongly"); + blasRef.reset(); + } + for (auto& pipelineRef : pipelines) { assert(pipelineRef.use_count() == 1 && "Pipeline wasn't deallocated or allocated wrongly"); pipelineRef.reset(); @@ -316,6 +305,26 @@ namespace Atlas { } + Ref GraphicsDevice::CreateBLAS(BLASDesc desc) { + + auto blas = std::make_shared(this, desc); + + blases.push_back(blas); + + return blas; + + } + + Ref GraphicsDevice::CreateTLAS(TLASDesc desc) { + + auto tlas = std::make_shared(this, desc); + + tlases.push_back(tlas); + + return tlas; + + } + CommandList* GraphicsDevice::GetCommandList(QueueType queueType, bool frameIndependentList) { if (frameIndependentList) { @@ -877,6 +886,95 @@ namespace Atlas { } + void GraphicsDevice::GetPhysicalDeviceProperties(VkPhysicalDevice device) { + + StructureChainBuilder propertiesBuilder(deviceProperties); + + accelerationStructureProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR; + rayTracingPipelineProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + propertiesBuilder.Append(rayTracingPipelineProperties); + propertiesBuilder.Append(accelerationStructureProperties); + + vkGetPhysicalDeviceProperties2(physicalDevice, &deviceProperties); + + } + + void GraphicsDevice::CreateDevice(const std::vector& queueCreateInfos, + const std::vector& extensions, bool enableValidationLayers) { + + VkDeviceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + createInfo.pQueueCreateInfos = queueCreateInfos.data(); + createInfo.queueCreateInfoCount = uint32_t(queueCreateInfos.size()); + + createInfo.pEnabledFeatures = nullptr; + createInfo.enabledExtensionCount = uint32_t(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + + if (enableValidationLayers) { + createInfo.enabledLayerCount = uint32_t(instance->layerNames.size()); + createInfo.ppEnabledLayerNames = instance->layerNames.data(); + } + else { + createInfo.enabledLayerCount = 0; + } + + std::set availableExtensions; + + for (auto extensionName : extensions) { + availableExtensions.insert(extensionName); + } + + StructureChainBuilder featureBuilder(createInfo); + + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeature = {}; + accelerationStructureFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + + VkPhysicalDeviceRayTracingPipelineFeaturesKHR rtPipelineFeature = {}; + rtPipelineFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR; + + VkPhysicalDeviceRayQueryFeaturesKHR rayQueryFeature = {}; + rayQueryFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR; + + // Check for ray tracing extension support + if (availableExtensions.contains(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME) && + availableExtensions.contains(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME) && + availableExtensions.contains(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME) && + availableExtensions.contains(VK_KHR_RAY_QUERY_EXTENSION_NAME)) { + + accelerationStructureFeature.accelerationStructure = VK_TRUE; + rtPipelineFeature.rayTracingPipeline = VK_TRUE; + rayQueryFeature.rayQuery = VK_TRUE; + + featureBuilder.Append(accelerationStructureFeature); + featureBuilder.Append(rtPipelineFeature); + featureBuilder.Append(rayQueryFeature); + + support.hardwareRayTracing = true; + } + + if (availableExtensions.contains(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME)) { + support.shaderPrintf = true; + } + +#ifdef AE_OS_MACOS + VkPhysicalDevicePortabilitySubsetFeaturesKHR portabilityFeatures = {}; + // This is hacked since I can't get it to work otherwise + // See VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR in vulkan_core.h + portabilityFeatures.sType = static_cast(1000163000); + portabilityFeatures.mutableComparisonSamplers = VK_TRUE; + + // This feature struct is the last one in the pNext chain for now + featureBuilder.Append(portabilityFeatures); +#endif + featureBuilder.Append(features); + + VK_CHECK(vkCreateDevice(physicalDevice, &createInfo, nullptr, &device)) + + } + bool GraphicsDevice::CheckForWindowResize() { auto nativeWindow = surface->GetNativeWindow(); @@ -1020,6 +1118,26 @@ namespace Atlas { } } + for (size_t i = 0; i < blases.size(); i++) { + auto& blasRef = blases[i]; + if (blasRef.use_count() == 1) { + blasRef.swap(blases.back()); + memoryManager->DestroyAllocation(blases.back()); + blases.pop_back(); + i--; + } + } + + for (size_t i = 0; i < tlases.size(); i++) { + auto& tlasRef = tlases[i]; + if (tlasRef.use_count() == 1) { + tlasRef.swap(tlases.back()); + memoryManager->DestroyAllocation(tlases.back()); + tlases.pop_back(); + i--; + } + } + } CommandList* GraphicsDevice::GetOrCreateCommandList(QueueType queueType, std::mutex &mutex, diff --git a/src/engine/graphics/GraphicsDevice.h b/src/engine/graphics/GraphicsDevice.h index 83fc3db41..0c8c71aed 100644 --- a/src/engine/graphics/GraphicsDevice.h +++ b/src/engine/graphics/GraphicsDevice.h @@ -13,6 +13,8 @@ #include "Sampler.h" #include "Descriptor.h" #include "QueryPool.h" +#include "BLAS.h" +#include "TLAS.h" #include "Framebuffer.h" #include "MemoryManager.h" @@ -29,6 +31,11 @@ namespace Atlas { class Instance; class ImguiWrapper; + struct DeviceSupport { + bool hardwareRayTracing = false; + bool shaderPrintf = false; + }; + struct CommandListSubmission { CommandList* cmd; @@ -113,6 +120,10 @@ namespace Atlas { Ref CreateQueryPool(QueryPoolDesc desc); + Ref CreateBLAS(BLASDesc desc); + + Ref CreateTLAS(TLASDesc desc); + CommandList* GetCommandList(QueueType queueType = QueueType::GraphicsQueue, bool frameIndependentList = false); @@ -138,12 +149,17 @@ namespace Atlas { VkPhysicalDevice physicalDevice; VkDevice device; - VkPhysicalDeviceProperties deviceProperties; + + VkPhysicalDeviceProperties2 deviceProperties = {}; + VkPhysicalDeviceRayTracingPipelinePropertiesKHR rayTracingPipelineProperties = {}; + VkPhysicalDeviceAccelerationStructurePropertiesKHR accelerationStructureProperties = {}; VkPhysicalDeviceFeatures2 features = {}; VkPhysicalDeviceVulkan11Features features11 = {}; VkPhysicalDeviceVulkan12Features features12 = {}; + DeviceSupport support; + bool isComplete = false; static GraphicsDevice* DefaultDevice; @@ -194,6 +210,11 @@ namespace Atlas { void BuildPhysicalDeviceFeatures(VkPhysicalDevice device); + void GetPhysicalDeviceProperties(VkPhysicalDevice device); + + void CreateDevice(const std::vector& queueCreateInfos, + const std::vector& extensions, bool enableValidationLayers); + bool CheckForWindowResize(); void CreateFrameData(); @@ -223,6 +244,8 @@ namespace Atlas { std::vector> samplers; std::vector> descriptorPools; std::vector> queryPools; + std::vector> blases; + std::vector> tlases; std::mutex commandListsMutex; std::vector commandLists; diff --git a/src/engine/graphics/MemoryManager.cpp b/src/engine/graphics/MemoryManager.cpp index 0484b4aaa..c5e9c81c7 100644 --- a/src/engine/graphics/MemoryManager.cpp +++ b/src/engine/graphics/MemoryManager.cpp @@ -23,6 +23,8 @@ namespace Atlas { allocatorInfo.instance = device->instance->GetNativeInstance(); allocatorInfo.pVulkanFunctions = &vulkanFunctions; allocatorInfo.vulkanApiVersion = VK_API_VERSION_1_2; + allocatorInfo.flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT; + VK_CHECK(vmaCreateAllocator(&allocatorInfo, &allocator)) vkGetPhysicalDeviceProperties(device->physicalDevice, &deviceProperties); @@ -111,6 +113,20 @@ namespace Atlas { } + void MemoryManager::DestroyAllocation(Ref& allocation) { + + deleteBLASAllocations + .emplace_back(DeleteResource { allocation, frameIndex + framesToDeletion }); + + } + + void MemoryManager::DestroyAllocation(Ref& allocation) { + + deleteTLASAllocations + .emplace_back(DeleteResource { allocation, frameIndex + framesToDeletion }); + + } + void MemoryManager::DestroyRawAllocation(std::function destroyLambda) { deleteRawAllocations.push_back(DeleteLambda { destroyLambda, frameIndex + framesToDeletion } ); @@ -143,6 +159,8 @@ namespace Atlas { deleteRawAllocations.pop_front(); } + DeleteAllocations(deleteTLASAllocations); + DeleteAllocations(deleteBLASAllocations); DeleteAllocations(deletePipelineAllocations); DeleteAllocations(deleteFrameBufferAllocations); DeleteAllocations(deleteRenderPassAllocations); diff --git a/src/engine/graphics/MemoryManager.h b/src/engine/graphics/MemoryManager.h index 879a05542..e69832b91 100644 --- a/src/engine/graphics/MemoryManager.h +++ b/src/engine/graphics/MemoryManager.h @@ -11,6 +11,8 @@ #include "Descriptor.h" #include "QueryPool.h" #include "Framebuffer.h" +#include "BLAS.h" +#include "TLAS.h" #include "MemoryTransferManager.h" #define VMA_STATS_STRING_ENABLED 0 @@ -74,6 +76,10 @@ namespace Atlas { void DestroyAllocation(Ref& allocation); + void DestroyAllocation(Ref& allocation); + + void DestroyAllocation(Ref& allocation); + void DestroyRawAllocation(std::function destroyLambda); void DestroyAllImmediate(); @@ -118,6 +124,8 @@ namespace Atlas { std::deque> deleteSamplerAllocations; std::deque> deleteDescriptorPoolAllocations; std::deque> deleteQueryPoolAllocations; + std::deque> deleteBLASAllocations; + std::deque> deleteTLASAllocations; }; diff --git a/src/engine/graphics/Sampler.cpp b/src/engine/graphics/Sampler.cpp index b13e96e15..64de6d834 100644 --- a/src/engine/graphics/Sampler.cpp +++ b/src/engine/graphics/Sampler.cpp @@ -11,7 +11,7 @@ namespace Atlas { desc.mode, desc.mipmapMode, desc.maxLod, desc.mipLodBias); if (desc.anisotropicFiltering) { samplerInfo.anisotropyEnable = VK_TRUE; - samplerInfo.maxAnisotropy = device->deviceProperties.limits.maxSamplerAnisotropy; + samplerInfo.maxAnisotropy = device->deviceProperties.properties.limits.maxSamplerAnisotropy; } if (desc.compareEnabled) { samplerInfo.compareEnable = VK_TRUE; diff --git a/src/engine/graphics/Shader.cpp b/src/engine/graphics/Shader.cpp index d020af014..9a238c026 100644 --- a/src/engine/graphics/Shader.cpp +++ b/src/engine/graphics/Shader.cpp @@ -20,6 +20,8 @@ namespace Atlas { const std::string ShaderStageFile::GetGlslCode(const std::vector& macros) const { + auto device = GraphicsDevice::DefaultDevice; + std::string glslCode = ""; glslCode.append("#version 460\n\n"); @@ -27,6 +29,14 @@ namespace Atlas { glslCode.append("#define AE_TEXTURE_SHADOW_LOD\n"); } + if (device->support.shaderPrintf) { + glslCode.append("#define AE_SHADER_PRINTF\n"); + } + + if (device->support.hardwareRayTracing) { + glslCode.append("#define AE_HARDWARE_RAYTRACING\n"); + } + // Extensions have to come first for (auto& extension : extensions) { for (auto& ifdef : extension.ifdefs) diff --git a/src/engine/graphics/ShaderCompiler.cpp b/src/engine/graphics/ShaderCompiler.cpp index 3f33be66a..316f97494 100644 --- a/src/engine/graphics/ShaderCompiler.cpp +++ b/src/engine/graphics/ShaderCompiler.cpp @@ -1,4 +1,5 @@ #include "ShaderCompiler.h" +#include "GraphicsDevice.h" #include "Log.h" #include @@ -28,6 +29,8 @@ namespace Atlas { std::vector ShaderCompiler::Compile(const ShaderStageFile& shaderFile, const std::vector& macros, bool& success) { + auto device = GraphicsDevice::DefaultDevice; + std::vector spirvBinary; TBuiltInResource Resources = {}; @@ -36,6 +39,14 @@ namespace Atlas { EShLanguage stage = FindLanguage(shaderFile.shaderStage); glslang::TShader shader(stage); + if (device->support.hardwareRayTracing) { + // Mac struggles with Spv 1.4, so use only when necessary + shader.setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_4); + } + else { + shader.setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_0); + } + // Enable SPIR-V and Vulkan rules when parsing GLSL EShMessages messages = (EShMessages)(EShMsgSpvRules | EShMsgVulkanRules); @@ -57,7 +68,7 @@ namespace Atlas { success = false; return spirvBinary; } - + glslang::SpvOptions options; glslang::GlslangToSpv(*program.getIntermediate(stage), spirvBinary, &options); diff --git a/src/engine/graphics/StructureChainBuilder.h b/src/engine/graphics/StructureChainBuilder.h new file mode 100644 index 000000000..aaecd1490 --- /dev/null +++ b/src/engine/graphics/StructureChainBuilder.h @@ -0,0 +1,41 @@ +#ifndef AE_GRAPHICSSTRUCTURECHAINBUILDER_H +#define AE_GRAPHICSSTRUCTURECHAINBUILDER_H + +#include "Common.h" + +namespace Atlas { + + namespace Graphics { + + class StructureChainBuilder { + + public: + StructureChainBuilder() = delete; + + template + StructureChainBuilder(T& structure) { + + chainTail = reinterpret_cast(&structure); + + } + + template + void Append(T& structure) { + + auto chainNext = reinterpret_cast(&structure); + + chainTail->pNext = chainNext; + chainTail = chainNext; + + } + + private: + VkBaseInStructure* chainTail = nullptr; + + }; + + } + +} + +#endif \ No newline at end of file diff --git a/src/engine/graphics/TLAS.cpp b/src/engine/graphics/TLAS.cpp new file mode 100644 index 000000000..aca292aed --- /dev/null +++ b/src/engine/graphics/TLAS.cpp @@ -0,0 +1,78 @@ +#include "TLAS.h" + +#include "GraphicsDevice.h" + +namespace Atlas { + + namespace Graphics { + + TLAS::TLAS(GraphicsDevice* device, TLASDesc desc) : device(device) { + + geometry = {}; + geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + + buildGeometryInfo = {}; + buildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + buildGeometryInfo.flags = desc.flags; + buildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + buildGeometryInfo.srcAccelerationStructure = VK_NULL_HANDLE; + + buildRange = {}; + + rangeInfo = &buildRange; + + } + + TLAS::~TLAS() { + + vkDestroyAccelerationStructureKHR(device->device, accelerationStructure, nullptr); + + } + + void TLAS::Allocate(VkDeviceAddress instancesAddress, uint32_t instancesCount, bool update) { + + VkAccelerationStructureGeometryInstancesDataKHR instances = {}; + instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + instances.data.deviceAddress = instancesAddress; + + geometry.geometry.instances = instances; + + buildGeometryInfo.geometryCount = 1; + buildGeometryInfo.pGeometries = &geometry; + buildGeometryInfo.mode = update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR + : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + + sizesInfo = {}; + sizesInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + vkGetAccelerationStructureBuildSizesKHR(device->device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &buildGeometryInfo, &instancesCount, &sizesInfo); + + buildRange.primitiveCount = instancesCount; + + // Don't want to reallocate on update. Keep in mind that the new instanceCount has + // to be lower or equal than when this TLAS was last fully allocated + if (update) + return; + + BufferDesc desc = { + .usageFlags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + .domain = BufferDomain::Device, + .size = sizesInfo.accelerationStructureSize + }; + buffer = device->CreateBuffer(desc); + + VkAccelerationStructureCreateInfoKHR createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + createInfo.size = sizesInfo.accelerationStructureSize; + createInfo.buffer = buffer->buffer; + + VK_CHECK(vkCreateAccelerationStructureKHR(device->device, &createInfo, nullptr, &accelerationStructure)) + + } + + } + +} \ No newline at end of file diff --git a/src/engine/graphics/TLAS.h b/src/engine/graphics/TLAS.h new file mode 100644 index 000000000..08fd817c2 --- /dev/null +++ b/src/engine/graphics/TLAS.h @@ -0,0 +1,46 @@ +#ifndef AE_GRAPHICSTLAS_H +#define AE_GRAPHICSTLAS_H + +#include "Common.h" +#include "Buffer.h" + +namespace Atlas { + + namespace Graphics { + + class GraphicsDevice; + class MemoryManager; + + struct TLASDesc { + VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + }; + + class TLAS { + + public: + TLAS(GraphicsDevice* device, TLASDesc desc); + + ~TLAS(); + + void Allocate(VkDeviceAddress instancesAddress, uint32_t instancesCount, bool update); + + VkAccelerationStructureBuildGeometryInfoKHR buildGeometryInfo; + VkAccelerationStructureBuildSizesInfoKHR sizesInfo; + VkAccelerationStructureBuildRangeInfoKHR* rangeInfo; + + Ref buffer; + VkAccelerationStructureKHR accelerationStructure; + + private: + GraphicsDevice* device; + + VkAccelerationStructureGeometryKHR geometry; + VkAccelerationStructureBuildRangeInfoKHR buildRange; + + }; + + } + +} + +#endif \ No newline at end of file diff --git a/src/engine/lighting/AO.h b/src/engine/lighting/AO.h index ae724b3b0..ed72c5dd4 100644 --- a/src/engine/lighting/AO.h +++ b/src/engine/lighting/AO.h @@ -23,6 +23,7 @@ namespace Atlas { bool enable = true; bool rt = false; + bool opacityCheck = false; Texture::Texture2D noiseTexture; std::vector samples; diff --git a/src/engine/lighting/IrradianceVolume.h b/src/engine/lighting/IrradianceVolume.h index 997a7d989..04d114c91 100644 --- a/src/engine/lighting/IrradianceVolume.h +++ b/src/engine/lighting/IrradianceVolume.h @@ -100,6 +100,7 @@ namespace Atlas { bool optimizeProbes = true; bool useShadowMap = false; bool lowerResMoments = false; + bool opacityCheck = false; InternalIrradianceVolume internal; diff --git a/src/engine/lighting/Reflection.h b/src/engine/lighting/Reflection.h index 5b5f14609..cfdf2e0b7 100644 --- a/src/engine/lighting/Reflection.h +++ b/src/engine/lighting/Reflection.h @@ -30,6 +30,7 @@ namespace Atlas { bool rt = false; bool gi = true; bool useShadowMap = false; + bool opacityCheck = false; }; diff --git a/src/engine/mesh/Mesh.cpp b/src/engine/mesh/Mesh.cpp index 9133a272b..b476b4b79 100644 --- a/src/engine/mesh/Mesh.cpp +++ b/src/engine/mesh/Mesh.cpp @@ -27,13 +27,13 @@ namespace Atlas { if (data.indices.ContainsData()) { auto type = data.indices.GetElementSize() == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; - Buffer::IndexBuffer buffer(type, data.GetIndexCount(), data.indices.GetConvertedVoid()); - vertexArray.AddIndexComponent(buffer); + indexBuffer = Buffer::IndexBuffer(type, data.GetIndexCount(), data.indices.GetConvertedVoid()); + vertexArray.AddIndexComponent(indexBuffer); } if (data.vertices.ContainsData()) { - Buffer::VertexBuffer buffer(data.vertices.GetFormat(), data.GetVertexCount(), + vertexBuffer = Buffer::VertexBuffer(data.vertices.GetFormat(), data.GetVertexCount(), data.vertices.GetConvertedVoid()); - vertexArray.AddComponent(0, buffer); + vertexArray.AddComponent(0, vertexBuffer); } if (data.normals.ContainsData()) { Buffer::VertexBuffer buffer(data.normals.GetFormat(), data.GetVertexCount(), diff --git a/src/engine/mesh/Mesh.h b/src/engine/mesh/Mesh.h index ab995c8ca..bdca763bd 100644 --- a/src/engine/mesh/Mesh.h +++ b/src/engine/mesh/Mesh.h @@ -10,6 +10,7 @@ #include "Impostor.h" #include "../graphics/Buffer.h" +#include "../graphics/BLAS.h" namespace Atlas { @@ -38,7 +39,12 @@ namespace Atlas { MeshData data; MeshMobility mobility = MeshMobility::Stationary; + Buffer::VertexArray vertexArray; + Buffer::IndexBuffer indexBuffer; + Buffer::VertexBuffer vertexBuffer; + + Ref blas = nullptr; Impostor* impostor = nullptr; diff --git a/src/engine/mesh/MeshData.cpp b/src/engine/mesh/MeshData.cpp index add4cf340..24c9fb776 100644 --- a/src/engine/mesh/MeshData.cpp +++ b/src/engine/mesh/MeshData.cpp @@ -117,6 +117,9 @@ namespace Atlas { void MeshData::BuildBVH() { + auto device = Graphics::GraphicsDevice::DefaultDevice; + bool hardwareRayTracing = device->support.hardwareRayTracing; + struct Triangle { vec3 v0; vec3 v1; @@ -193,10 +196,18 @@ namespace Atlas { } - // Generate BVH - auto bvh = Volume::BVH(aabbs, bvhTriangles); + Volume::BVH bvh; + if (!hardwareRayTracing) { + // Generate BVH + bvh = Volume::BVH(aabbs, bvhTriangles); + + bvhTriangles.clear(); + bvhTriangles.shrink_to_fit(); + } + + auto& data = hardwareRayTracing ? bvhTriangles : bvh.data; - for (auto& bvhTriangle : bvh.data) { + for (auto& bvhTriangle : data) { auto& triangle = triangles[bvhTriangle.idx]; @@ -256,30 +267,34 @@ namespace Atlas { gpuTriangles.push_back(gpuTriangle); - BVHTriangle gpuBvhTriangle; - gpuBvhTriangle.v0 = vec4(triangle.v0, bvhTriangle.endOfNode ? 1.0f : -1.0f); - gpuBvhTriangle.v1 = vec4(triangle.v1, reinterpret_cast(triangle.materialIdx)); - gpuBvhTriangle.v2 = vec4(triangle.v2, 0.0f); + if (!hardwareRayTracing) { + BVHTriangle gpuBvhTriangle; + gpuBvhTriangle.v0 = vec4(triangle.v0, bvhTriangle.endOfNode ? 1.0f : -1.0f); + gpuBvhTriangle.v1 = vec4(triangle.v1, reinterpret_cast(triangle.materialIdx)); + gpuBvhTriangle.v2 = vec4(triangle.v2, 0.0f); - gpuBvhTriangles.push_back(gpuBvhTriangle); + gpuBvhTriangles.push_back(gpuBvhTriangle); + } } - triangles.clear(); - triangles.shrink_to_fit(); + if (!hardwareRayTracing) { + triangles.clear(); + triangles.shrink_to_fit(); - auto& nodes = bvh.GetTree(); - gpuBvhNodes = std::vector(nodes.size()); - // Copy to GPU format - for (size_t i = 0; i < nodes.size(); i++) { - gpuBvhNodes[i].leftPtr = nodes[i].leftPtr; - gpuBvhNodes[i].rightPtr = nodes[i].rightPtr; + auto& nodes = bvh.GetTree(); + gpuBvhNodes = std::vector(nodes.size()); + // Copy to GPU format + for (size_t i = 0; i < nodes.size(); i++) { + gpuBvhNodes[i].leftPtr = nodes[i].leftPtr; + gpuBvhNodes[i].rightPtr = nodes[i].rightPtr; - gpuBvhNodes[i].leftAABB.min = nodes[i].leftAABB.min; - gpuBvhNodes[i].leftAABB.max = nodes[i].leftAABB.max; + gpuBvhNodes[i].leftAABB.min = nodes[i].leftAABB.min; + gpuBvhNodes[i].leftAABB.max = nodes[i].leftAABB.max; - gpuBvhNodes[i].rightAABB.min = nodes[i].rightAABB.min; - gpuBvhNodes[i].rightAABB.max = nodes[i].rightAABB.max; + gpuBvhNodes[i].rightAABB.min = nodes[i].rightAABB.min; + gpuBvhNodes[i].rightAABB.max = nodes[i].rightAABB.max; + } } } diff --git a/src/engine/renderer/AORenderer.cpp b/src/engine/renderer/AORenderer.cpp index 493ba7606..09ffe8df5 100644 --- a/src/engine/renderer/AORenderer.cpp +++ b/src/engine/renderer/AORenderer.cpp @@ -79,6 +79,8 @@ namespace Atlas { groupCount.x += ((groupCount.x * 8 == res.x) ? 0 : 1); groupCount.y += ((groupCount.y * 4 == res.y) ? 0 : 1); + rtaoPipelineConfig.ManageMacro("OPACITY_CHECK", ao->opacityCheck); + auto pipeline = PipelineManager::GetPipeline(rtaoPipelineConfig); auto uniforms = RTUniforms { diff --git a/src/engine/renderer/DDGIRenderer.cpp b/src/engine/renderer/DDGIRenderer.cpp index 0628dbc31..92c161493 100644 --- a/src/engine/renderer/DDGIRenderer.cpp +++ b/src/engine/renderer/DDGIRenderer.cpp @@ -130,7 +130,7 @@ namespace Atlas { commandList->BindImage(lastMomentsArray.image, lastMomentsArray.sampler, 2, 25); auto rayHitPipeline = PipelineManager::GetPipeline(rayHitPipelineConfig); - helper.DispatchHitClosest(commandList, rayHitPipeline, false, + helper.DispatchHitClosest(commandList, rayHitPipeline, false, volume->opacityCheck, [&]() { RayHitUniforms uniforms; uniforms.seed = Common::Random::SampleUniformFloat(); diff --git a/src/engine/renderer/MainRenderer.cpp b/src/engine/renderer/MainRenderer.cpp index 0dbca63da..7abe2a659 100644 --- a/src/engine/renderer/MainRenderer.cpp +++ b/src/engine/renderer/MainRenderer.cpp @@ -305,7 +305,7 @@ namespace Atlas { commandList->BeginRenderPass(device->swapChain, true); textureRenderer.RenderTexture2D(commandList, viewport, &target->texture, - 0.0f, 0.0f, float(viewport->width), float(viewport->height), false, true); + 0.0f, 0.0f, float(viewport->width), float(viewport->height), 0.0f, 1.0f, false, true); commandList->EndRenderPass(); diff --git a/src/engine/renderer/PathTracingRenderer.cpp b/src/engine/renderer/PathTracingRenderer.cpp index 7f995a18c..cad42956b 100644 --- a/src/engine/renderer/PathTracingRenderer.cpp +++ b/src/engine/renderer/PathTracingRenderer.cpp @@ -139,7 +139,7 @@ namespace Atlas { Graphics::Profiler::EndAndBeginQuery("Bounce " + std::to_string(i)); - helper.DispatchHitClosest(commandList, PipelineManager::GetPipeline(rayHitPipelineConfig), false, + helper.DispatchHitClosest(commandList, PipelineManager::GetPipeline(rayHitPipelineConfig), false, true, [=]() { commandList->BindBufferOffset(rayHitUniformBuffer.Get(), rayHitUniformBuffer.GetAlignedOffset(i), 3, 4); diff --git a/src/engine/renderer/RTReflectionRenderer.cpp b/src/engine/renderer/RTReflectionRenderer.cpp index cc45bdc8f..994d64cf6 100644 --- a/src/engine/renderer/RTReflectionRenderer.cpp +++ b/src/engine/renderer/RTReflectionRenderer.cpp @@ -103,6 +103,7 @@ namespace Atlas { auto ddgiEnabled = scene->irradianceVolume && scene->irradianceVolume->enable; rtrPipelineConfig.ManageMacro("USE_SHADOW_MAP", reflection->useShadowMap); rtrPipelineConfig.ManageMacro("GI", reflection->gi && ddgiEnabled); + rtrPipelineConfig.ManageMacro("OPACITY_CHECK", reflection->opacityCheck); auto pipeline = PipelineManager::GetPipeline(rtrPipelineConfig); diff --git a/src/engine/renderer/helper/RayTracingHelper.cpp b/src/engine/renderer/helper/RayTracingHelper.cpp index b50a7045c..9a22a5d90 100644 --- a/src/engine/renderer/helper/RayTracingHelper.cpp +++ b/src/engine/renderer/helper/RayTracingHelper.cpp @@ -16,7 +16,7 @@ namespace Atlas { RayTracingHelper::RayTracingHelper() { - const size_t lightCount = 512; + const size_t lightCount = 128; indirectDispatchBuffer = Buffer::Buffer(Buffer::BufferUsageBits::IndirectBufferBit, 3 * sizeof(uint32_t), 0); @@ -89,7 +89,7 @@ namespace Atlas { std::vector weights; weights.reserve(lights.size()); for (auto& light : lights) { - weights.push_back(light.data1.y); + weights.push_back(light.data.y); } auto piecewiseDistribution = Common::Piecewise1D(weights); @@ -103,12 +103,12 @@ namespace Atlas { } for (auto& light : selectedLights) { - light.data1.y *= float(selectedLights.size()); + light.data.y *= float(selectedLights.size()); } } else { for (auto light : lights) { - light.data1.y = 1.0f; + light.data.y = 1.0f; selectedLights.push_back(light); } } @@ -156,11 +156,18 @@ namespace Atlas { rtData.materialBuffer.Bind(commandList, 2, 7); rtData.triangleBuffer.Bind(commandList, 2, 8); - rtData.bvhTriangleBuffer.Bind(commandList, 2, 9); - rtData.blasNodeBuffer.Bind(commandList, 2, 10); rtData.bvhInstanceBuffer.Bind(commandList, 2, 21); - rtData.tlasNodeBuffer.Bind(commandList, 2, 22); lightBuffer.Bind(commandList, 2, 11); + + if (rtData.hardwareRayTracing) { + commandList->BindTLAS(rtData.tlas, 2, 23); + rtData.geometryTriangleOffsetBuffer.Bind(commandList, 2, 22); + } + else { + rtData.bvhTriangleBuffer.Bind(commandList, 2, 9); + rtData.blasNodeBuffer.Bind(commandList, 2, 10); + rtData.tlasNodeBuffer.Bind(commandList, 2, 22); + } } // Execute shader @@ -197,7 +204,7 @@ namespace Atlas { std::vector weights; weights.reserve(lights.size()); for (auto& light : lights) { - weights.push_back(light.data1.y); + weights.push_back(light.data.y); } auto piecewiseDistribution = Common::Piecewise1D(weights); @@ -211,12 +218,12 @@ namespace Atlas { } for (auto& light : selectedLights) { - light.data1.y *= float(selectedLights.size()); + light.data.y *= float(selectedLights.size()); } } else { for (auto light : lights) { - light.data1.y = 1.0f; + light.data.y = 1.0f; selectedLights.push_back(light); } } @@ -257,7 +264,7 @@ namespace Atlas { void RayTracingHelper::DispatchHitClosest(Graphics::CommandList* commandList, const Ref& hitPipeline, bool binning, - std::function prepare) { + bool opacityCheck, std::function prepare) { auto& rtData = scene->rtData; if (!rtData.IsValid()) return; @@ -302,11 +309,18 @@ namespace Atlas { rtData.materialBuffer.Bind(commandList, 2, 7); rtData.triangleBuffer.Bind(commandList, 2, 8); - rtData.bvhTriangleBuffer.Bind(commandList, 2, 9); - rtData.blasNodeBuffer.Bind(commandList, 2, 10); rtData.bvhInstanceBuffer.Bind(commandList, 2, 21); - rtData.tlasNodeBuffer.Bind(commandList, 2, 22); lightBuffer.Bind(commandList, 2, 11); + + if (rtData.hardwareRayTracing) { + commandList->BindTLAS(rtData.tlas, 2, 23); + rtData.geometryTriangleOffsetBuffer.Bind(commandList, 2, 22); + } + else { + rtData.bvhTriangleBuffer.Bind(commandList, 2, 9); + rtData.blasNodeBuffer.Bind(commandList, 2, 10); + rtData.tlasNodeBuffer.Bind(commandList, 2, 22); + } } Graphics::Profiler::BeginQuery("Setup command buffer"); @@ -400,7 +414,11 @@ namespace Atlas { // Trace rays for closest intersection { - auto pipeline = PipelineManager::GetPipeline(traceClosestPipelineConfig); + auto pipelineConfig = traceClosestPipelineConfig; + if (opacityCheck) { + pipelineConfig.AddMacro("OPACITY_CHECK"); + } + auto pipeline = PipelineManager::GetPipeline(pipelineConfig); commandList->BindPipeline(pipeline); PushConstants constants; @@ -519,9 +537,10 @@ namespace Atlas { auto cd = reinterpret_cast(data); GPULight gpuLight; - gpuLight.data0 = vec4(P, radiance.r); - gpuLight.data1 = vec4(cd, weight, area, radiance.g); - gpuLight.N = vec4(N, radiance.b); + gpuLight.P = vec4(P, 1.0f); + gpuLight.N = vec4(N, 0.0f); + gpuLight.color = vec4(radiance, 0.0f); + gpuLight.data = vec4(cd, weight, area, 0.0f); lights.push_back(gpuLight); } @@ -534,7 +553,7 @@ namespace Atlas { // Find the maximum weight auto maxWeight = 0.0f; for (auto& light : lights) { - maxWeight = glm::max(maxWeight, light.data1.y); + maxWeight = glm::max(maxWeight, light.data.y); } // Calculate min weight and adjust lights based on it @@ -543,12 +562,12 @@ namespace Atlas { auto totalWeight = 0.0f; for (auto& light : lights) { - light.data1.y = glm::max(light.data1.y, minWeight); - totalWeight += light.data1.y; + light.data.y = glm::max(light.data.y, minWeight); + totalWeight += light.data.y; } for (auto& light : lights) { - light.data1.y /= totalWeight; + light.data.y /= totalWeight; } } diff --git a/src/engine/renderer/helper/RayTracingHelper.h b/src/engine/renderer/helper/RayTracingHelper.h index e172c0d65..eb4a8a1b7 100644 --- a/src/engine/renderer/helper/RayTracingHelper.h +++ b/src/engine/renderer/helper/RayTracingHelper.h @@ -35,7 +35,7 @@ namespace Atlas { void DispatchHitClosest(Graphics::CommandList* commandList, const Ref& hitPipeline, bool binning, - std::function prepare); + bool opacityCheck, std::function prepare); void DispatchHitAny(Graphics::CommandList* commandList, const Ref& hitPipeline, std::function prepare); diff --git a/src/engine/scene/RTData.cpp b/src/engine/scene/RTData.cpp index 3db3d8c52..1940e8e77 100644 --- a/src/engine/scene/RTData.cpp +++ b/src/engine/scene/RTData.cpp @@ -3,6 +3,7 @@ #include "../mesh/MeshData.h" #include "../volume/BVH.h" +#include "../graphics/ASBuilder.h" namespace Atlas { @@ -10,9 +11,14 @@ namespace Atlas { RTData::RTData(Scene* scene) : scene(scene) { + auto device = Graphics::GraphicsDevice::DefaultDevice; + + hardwareRayTracing = device->support.hardwareRayTracing; + triangleBuffer = Buffer::Buffer(Buffer::BufferUsageBits::StorageBufferBit, sizeof(GPUTriangle)); bvhTriangleBuffer = Buffer::Buffer(Buffer::BufferUsageBits::StorageBufferBit, sizeof(BVHTriangle)); blasNodeBuffer = Buffer::Buffer(Buffer::BufferUsageBits::StorageBufferBit, sizeof(GPUBVHNode)); + geometryTriangleOffsetBuffer = Buffer::Buffer(Buffer::BufferUsageBits::StorageBufferBit, sizeof(uint32_t)); auto bufferUsage = Buffer::BufferUsageBits::StorageBufferBit | Buffer::BufferUsageBits::HostAccessBit | Buffer::BufferUsageBits::MultiBufferedBit; @@ -27,137 +33,21 @@ namespace Atlas { isValid = false; - std::vector gpuTriangles; - std::vector gpuBvhTriangles; - std::vector gpuBvhNodes; - - auto meshes = scene->GetMeshes(); - - materialAccess.clear(); - - int32_t materialCount = 0; - for (auto& mesh : meshes) { - if (!mesh.IsLoaded()) - continue; - - // Not all meshes might have a bvh - if (!mesh->data.gpuTriangles.size()) - continue; - - auto triangleOffset = int32_t(gpuTriangles.size()); - auto nodeOffset = int32_t(gpuBvhNodes.size()); - - for (size_t i = 0; i < mesh->data.gpuBvhNodes.size(); i++) { - auto gpuBvhNode = mesh->data.gpuBvhNodes[i]; - - auto leftPtr = gpuBvhNode.leftPtr; - auto rightPtr = gpuBvhNode.rightPtr; - - gpuBvhNode.leftPtr = leftPtr < 0 ? ~((~leftPtr) + triangleOffset) : leftPtr + nodeOffset; - gpuBvhNode.rightPtr = rightPtr < 0 ? ~((~rightPtr) + triangleOffset) : rightPtr + nodeOffset; - - gpuBvhNodes.push_back(gpuBvhNode); - } - - // Subtract and reassign material offset - for (size_t i = 0; i < mesh->data.gpuTriangles.size(); i++) { - auto gpuTriangle = mesh->data.gpuTriangles[i]; - auto gpuBvhTriangle = mesh->data.gpuBvhTriangles[i]; - - auto localMaterialIdx = reinterpret_cast(gpuTriangle.d0.w); - auto materialIdx = localMaterialIdx + materialCount; - - gpuTriangle.d0.w = reinterpret_cast(materialIdx); - gpuBvhTriangle.v1.w = reinterpret_cast(materialIdx); - - gpuTriangles.push_back(gpuTriangle); - gpuBvhTriangles.push_back(gpuBvhTriangle); - } - - for (auto& material : mesh->data.materials) { - materialAccess[&material] = materialCount++; - } - - GPUMesh gpuMesh = { - .nodeOffset = nodeOffset, - .triangleOffset = triangleOffset - }; - meshInfo[mesh.GetID()] = gpuMesh; - + if (hardwareRayTracing) { + BuildForHardwareRayTracing(); + } + else { + BuildForSoftwareRayTracing(); } - - if (!gpuTriangles.size()) - return; - - // Upload triangles - triangleBuffer.SetSize(gpuTriangles.size()); - triangleBuffer.SetData(gpuTriangles.data(), 0, gpuTriangles.size()); - - bvhTriangleBuffer.SetSize(gpuBvhTriangles.size()); - bvhTriangleBuffer.SetData(gpuBvhTriangles.data(), 0, gpuBvhTriangles.size()); - - blasNodeBuffer.SetSize(gpuBvhNodes.size()); - blasNodeBuffer.SetData(gpuBvhNodes.data(), 0, gpuBvhNodes.size()); std::vector materials; UpdateMaterials(materials, true); - triangleLights.clear(); - - // Triangle lights - for (size_t i = 0; i < gpuTriangles.size(); i++) { - auto& triangle = gpuTriangles[i]; - auto idx = reinterpret_cast(triangle.d0.w); - auto& material = materials[idx]; - - auto radiance = material.emissiveColor; - auto brightness = dot(radiance, vec3(0.3333f)); - - if (brightness > 0.0f) { - // Extract normal information again - auto cn0 = reinterpret_cast(triangle.v0.w); - auto cn1 = reinterpret_cast(triangle.v1.w); - auto cn2 = reinterpret_cast(triangle.v2.w); - - auto n0 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn0)); - auto n1 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn1)); - auto n2 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn2)); - - // Compute neccessary information - auto P = (vec3(triangle.v0) + vec3(triangle.v1) + vec3(triangle.v2)) / 3.0f; - auto N = (n0 + n1 + n2) / 3.0f; - - auto a = glm::distance(vec3(triangle.v1), vec3(triangle.v0)); - auto b = glm::distance(vec3(triangle.v2), vec3(triangle.v0)); - auto c = glm::distance(vec3(triangle.v1), vec3(triangle.v2)); - auto p = 0.5f * (a + b + c); - auto area = glm::sqrt(p * (p - a) * (p - b) * (p - c)); - - auto weight = area * brightness; - - // Compress light - auto pn = Common::Packing::PackSignedVector3x10_1x2(vec4(N, 0.0f)); - auto cn = reinterpret_cast(pn); - - uint32_t data = 0; - data |= (1 << 28u); // Type TRIANGLE_LIGHT (see RayTracingHelper.cpp) - data |= uint32_t(i); - auto cd = reinterpret_cast(data); - - GPULight light; - light.data0 = vec4(P, radiance.r); - light.data1 = vec4(cd, weight, area, radiance.g); - light.N = vec4(N, radiance.b); - - triangleLights.push_back(light); - } - } - isValid = true; } - void RTData::Update() { + void RTData::Update(bool updateTriangleLights) { auto actors = scene->GetMeshActors(); @@ -168,55 +58,56 @@ namespace Atlas { std::vector gpuBvhInstances; std::vector actorAABBs; + for (auto& [_, meshInfo] : meshInfos) { + meshInfo.instanceIndices.clear(); + meshInfo.matrices.clear(); + } + for (auto& actor : actors) { if (!actor->mesh.IsLoaded()) continue; - if (!meshInfo.contains(actor->mesh.GetID())) + if (!meshInfos.contains(actor->mesh.GetID())) continue; actorAABBs.push_back(actor->aabb); - auto mesh = meshInfo[actor->mesh.GetID()]; + auto& meshInfo = meshInfos[actor->mesh.GetID()]; + + auto inverseMatrix = mat3x4(glm::transpose(glm::inverse(actor->globalMatrix))); GPUBVHInstance gpuBvhInstance = { - .inverseMatrix = mat3x4(glm::transpose(glm::inverse(actor->globalMatrix))), - .blasOffset = mesh.nodeOffset + .inverseMatrix = inverseMatrix, + .blasOffset = meshInfo.offset, + .triangleOffset = meshInfo.triangleOffset }; + meshInfo.matrices.push_back(actor->globalMatrix); + meshInfo.instanceIndices.push_back(uint32_t(gpuBvhInstances.size())); gpuBvhInstances.push_back(gpuBvhInstance); + if (hardwareRayTracing) { + + } } if (!gpuBvhInstances.size()) return; - auto bvh = Volume::BVH(actorAABBs); - - auto& nodes = bvh.GetTree(); - auto gpuBvhNodes = std::vector(nodes.size()); - // Copy to GPU format - for (size_t i = 0; i < nodes.size(); i++) { - gpuBvhNodes[i].leftPtr = nodes[i].leftPtr; - gpuBvhNodes[i].rightPtr = nodes[i].rightPtr; - - gpuBvhNodes[i].leftAABB.min = nodes[i].leftAABB.min; - gpuBvhNodes[i].leftAABB.max = nodes[i].leftAABB.max; - - gpuBvhNodes[i].rightAABB.min = nodes[i].rightAABB.min; - gpuBvhNodes[i].rightAABB.max = nodes[i].rightAABB.max; + if (hardwareRayTracing) { + UpdateForHardwareRayTracing(actors); } - - // Order after the BVH build to fit the node indices - std::vector orderedGpuBvhInstances(bvh.refs.size()); - for (size_t i = 0; i < bvh.refs.size(); i++) { - orderedGpuBvhInstances[i] = gpuBvhInstances[bvh.refs[i].idx]; + else { + gpuBvhInstances = UpdateForSoftwareRayTracing(gpuBvhInstances, actorAABBs); } - tlasNodeBuffer.SetSize(gpuBvhNodes.size()); - tlasNodeBuffer.SetData(gpuBvhNodes.data(), 0, gpuBvhNodes.size()); + std::vector materials; + UpdateMaterials(materials, false); - bvhInstanceBuffer.SetSize(orderedGpuBvhInstances.size()); - bvhInstanceBuffer.SetData(orderedGpuBvhInstances.data(), 0, orderedGpuBvhInstances.size()); + if (updateTriangleLights) + UpdateTriangleLights(); + + bvhInstanceBuffer.SetSize(gpuBvhInstances.size()); + bvhInstanceBuffer.SetData(gpuBvhInstances.data(), 0, gpuBvhInstances.size()); } @@ -409,6 +300,331 @@ namespace Atlas { } + void RTData::BuildForSoftwareRayTracing() { + + std::vector gpuTriangles; + std::vector gpuBvhTriangles; + std::vector gpuBvhNodes; + + auto meshes = scene->GetMeshes(); + + materialAccess.clear(); + + int32_t materialCount = 0; + for (auto& mesh : meshes) { + if (!mesh.IsLoaded()) + continue; + + // Not all meshes might have a bvh + if (!mesh->data.gpuTriangles.size()) + continue; + + auto triangleOffset = int32_t(gpuTriangles.size()); + auto nodeOffset = int32_t(gpuBvhNodes.size()); + + for (size_t i = 0; i < mesh->data.gpuBvhNodes.size(); i++) { + auto gpuBvhNode = mesh->data.gpuBvhNodes[i]; + + auto leftPtr = gpuBvhNode.leftPtr; + auto rightPtr = gpuBvhNode.rightPtr; + + gpuBvhNode.leftPtr = leftPtr < 0 ? ~((~leftPtr) + triangleOffset) : leftPtr + nodeOffset; + gpuBvhNode.rightPtr = rightPtr < 0 ? ~((~rightPtr) + triangleOffset) : rightPtr + nodeOffset; + + gpuBvhNodes.push_back(gpuBvhNode); + } + + // Subtract and reassign material offset + for (size_t i = 0; i < mesh->data.gpuTriangles.size(); i++) { + auto gpuTriangle = mesh->data.gpuTriangles[i]; + auto gpuBvhTriangle = mesh->data.gpuBvhTriangles[i]; + + auto localMaterialIdx = reinterpret_cast(gpuTriangle.d0.w); + auto materialIdx = localMaterialIdx + materialCount; + + gpuTriangle.d0.w = reinterpret_cast(materialIdx); + gpuBvhTriangle.v1.w = reinterpret_cast(materialIdx); + + gpuTriangles.push_back(gpuTriangle); + gpuBvhTriangles.push_back(gpuBvhTriangle); + } + + for (auto& material : mesh->data.materials) { + materialAccess[&material] = materialCount++; + } + + MeshInfo meshInfo = { + .offset = nodeOffset, + .triangleOffset = triangleOffset + }; + meshInfos[mesh.GetID()] = meshInfo; + + BuildTriangleLightsForMesh(mesh); + + } + + if (!gpuTriangles.size()) + return; + + // Upload triangles + triangleBuffer.SetSize(gpuTriangles.size()); + triangleBuffer.SetData(gpuTriangles.data(), 0, gpuTriangles.size()); + + bvhTriangleBuffer.SetSize(gpuBvhTriangles.size()); + bvhTriangleBuffer.SetData(gpuBvhTriangles.data(), 0, gpuBvhTriangles.size()); + + blasNodeBuffer.SetSize(gpuBvhNodes.size()); + blasNodeBuffer.SetData(gpuBvhNodes.data(), 0, gpuBvhNodes.size()); + + } + + void RTData::BuildForHardwareRayTracing() { + + auto device = Graphics::GraphicsDevice::DefaultDevice; + + std::vector triangleOffsets; + std::vector gpuTriangles; + + Graphics::ASBuilder asBuilder; + + auto meshes = scene->GetMeshes(); + + materialAccess.clear(); + blases.clear(); + + int32_t materialCount = 0; + for (auto& mesh : meshes) { + if (!mesh.IsLoaded()) + continue; + + // Not all meshes might have a bvh + if (!mesh->data.gpuTriangles.size()) + continue; + + auto triangleOffset = int32_t(gpuTriangles.size()); + auto offset = int32_t(triangleOffsets.size()); + + // Subtract and reassign material offset + for (size_t i = 0; i < mesh->data.gpuTriangles.size(); i++) { + auto gpuTriangle = mesh->data.gpuTriangles[i]; + + auto localMaterialIdx = reinterpret_cast(gpuTriangle.d0.w); + auto materialIdx = localMaterialIdx + materialCount; + + gpuTriangle.d0.w = reinterpret_cast(materialIdx); + + gpuTriangles.push_back(gpuTriangle); + } + + for (auto& material : mesh->data.materials) { + materialAccess[&material] = materialCount++; + } + + std::vector geometryRegions; + for (auto& subData : mesh->data.subData) { + geometryRegions.emplace_back(Graphics::ASGeometryRegion{ + .indexCount = subData.indicesCount, + .indexOffset = subData.indicesOffset, + .opaque = !subData.material->HasOpacityMap() && subData.material->opacity == 1.0f + }); + } + + auto blasDesc = asBuilder.GetBLASDescForTriangleGeometry(mesh->vertexBuffer.buffer, mesh->indexBuffer.buffer, + mesh->vertexBuffer.elementCount, mesh->vertexBuffer.elementSize, + mesh->indexBuffer.elementSize, geometryRegions); + + blases.push_back(device->CreateBLAS(blasDesc)); + + MeshInfo meshInfo = { + .blas = blases.back(), + + .offset = offset, + .triangleOffset = triangleOffset + }; + meshInfos[mesh.GetID()] = meshInfo; + + for (auto& subData : mesh->data.subData) { + auto totalTriangleOffset = triangleOffset + subData.indicesOffset / 3; + triangleOffsets.push_back(totalTriangleOffset); + } + + BuildTriangleLightsForMesh(mesh); + + } + + if (!gpuTriangles.size()) + return; + + // Upload triangles + triangleBuffer.SetSize(gpuTriangles.size()); + triangleBuffer.SetData(gpuTriangles.data(), 0, gpuTriangles.size()); + + geometryTriangleOffsetBuffer.SetSize(triangleOffsets.size()); + geometryTriangleOffsetBuffer.SetData(triangleOffsets.data(), 0, triangleOffsets.size()); + + asBuilder.BuildBLAS(blases); + + } + + std::vector RTData::UpdateForSoftwareRayTracing(std::vector& gpuBvhInstances, + std::vector& actorAABBs) { + + auto bvh = Volume::BVH(actorAABBs); + + auto& nodes = bvh.GetTree(); + auto gpuBvhNodes = std::vector(nodes.size()); + // Copy to GPU format + for (size_t i = 0; i < nodes.size(); i++) { + gpuBvhNodes[i].leftPtr = nodes[i].leftPtr; + gpuBvhNodes[i].rightPtr = nodes[i].rightPtr; + + gpuBvhNodes[i].leftAABB.min = nodes[i].leftAABB.min; + gpuBvhNodes[i].leftAABB.max = nodes[i].leftAABB.max; + + gpuBvhNodes[i].rightAABB.min = nodes[i].rightAABB.min; + gpuBvhNodes[i].rightAABB.max = nodes[i].rightAABB.max; + } + + // Order after the BVH build to fit the node indices + std::vector orderedGpuBvhInstances(bvh.refs.size()); + for (size_t i = 0; i < bvh.refs.size(); i++) { + orderedGpuBvhInstances[i] = gpuBvhInstances[bvh.refs[i].idx]; + } + + tlasNodeBuffer.SetSize(gpuBvhNodes.size()); + tlasNodeBuffer.SetData(gpuBvhNodes.data(), 0, gpuBvhNodes.size()); + + return orderedGpuBvhInstances; + + } + + void RTData::UpdateForHardwareRayTracing(std::vector& actors) { + + auto device = Graphics::GraphicsDevice::DefaultDevice; + + Graphics::ASBuilder asBuilder; + + std::vector instances; + + for (auto actor : actors) { + if (!actor->mesh.IsLoaded()) + continue; + + if (!meshInfos.contains(actor->mesh.GetID())) + continue; + + auto& meshInfo = meshInfos[actor->mesh.GetID()]; + + VkAccelerationStructureInstanceKHR inst = {}; + VkTransformMatrixKHR transform; + + auto transposed = glm::transpose(actor->globalMatrix); + std::memcpy(&transform, &transposed, sizeof(VkTransformMatrixKHR)); + + inst.transform = transform; + inst.instanceCustomIndex = meshInfo.offset; + inst.accelerationStructureReference = meshInfo.blas->GetDeviceAddress(); + inst.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + inst.mask = 0xFF; + inst.instanceShaderBindingTableRecordOffset = 0; + instances.push_back(inst); + } + + auto tlasDesc = Graphics::TLASDesc(); + tlas = device->CreateTLAS(tlasDesc); + + asBuilder.BuildTLAS(tlas, instances); + + } + + void RTData::BuildTriangleLightsForMesh(ResourceHandle &mesh) { + + auto& gpuTriangles = mesh->data.gpuTriangles; + auto& materials = mesh->data.materials; + + auto& meshInfo = meshInfos[mesh.GetID()]; + + // Triangle lights + for (size_t i = 0; i < gpuTriangles.size(); i++) { + auto& triangle = gpuTriangles[i]; + auto idx = reinterpret_cast(triangle.d0.w); + auto& material = materials[idx]; + + auto radiance = material.emissiveColor; + auto brightness = dot(radiance, vec3(0.3333f)); + + if (brightness > 0.0f) { + // Extract normal information again + auto cn0 = reinterpret_cast(triangle.v0.w); + auto cn1 = reinterpret_cast(triangle.v1.w); + auto cn2 = reinterpret_cast(triangle.v2.w); + + auto n0 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn0)); + auto n1 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn1)); + auto n2 = vec3(Common::Packing::UnpackSignedVector3x10_1x2(cn2)); + + // Compute necessary information + auto P = (vec3(triangle.v0) + vec3(triangle.v1) + vec3(triangle.v2)) / 3.0f; + auto N = (n0 + n1 + n2) / 3.0f; + + auto a = glm::distance(vec3(triangle.v1), vec3(triangle.v0)); + auto b = glm::distance(vec3(triangle.v2), vec3(triangle.v0)); + auto c = glm::distance(vec3(triangle.v1), vec3(triangle.v2)); + auto p = 0.5f * (a + b + c); + auto area = glm::sqrt(p * (p - a) * (p - b) * (p - c)); + + auto weight = area * brightness; + + uint32_t data = 0; + data |= (1 << 28u); // Type TRIANGLE_LIGHT (see RayTracingHelper.cpp) + data |= uint32_t(i); + auto cd = reinterpret_cast(data); + + GPULight light; + light.P = vec4(P, 1.0f); + light.N = vec4(N, 0.0f); + light.color = vec4(radiance, 0.0f); + light.data = vec4(cd, weight, area, 0.0f); + + meshInfo.triangleLights.push_back(light); + } + } + + } + + void RTData::UpdateTriangleLights() { + + triangleLights.clear(); + + for (auto& [meshIdx, meshInfo] : meshInfos) { + + for (auto& light : meshInfo.triangleLights) { + + for (size_t i = 0; i < meshInfo.instanceIndices.size(); i++) { + + auto instanceIdx = meshInfo.instanceIndices[i]; + auto& matrix = meshInfo.matrices[i]; + + vec3 P = matrix * vec4(vec3(light.P), 1.0f); + vec3 N = matrix * vec4(vec3(light.N), 0.0f); + + auto transformedLight = light; + + transformedLight.data.w = reinterpret_cast(instanceIdx); + + transformedLight.P = vec4(P, light.P.w); + transformedLight.N = vec4(N, light.N.w); + + triangleLights.push_back(transformedLight); + + } + + } + + } + + } + } } \ No newline at end of file diff --git a/src/engine/scene/RTData.h b/src/engine/scene/RTData.h index 2afb5f1b1..a7fba358f 100644 --- a/src/engine/scene/RTData.h +++ b/src/engine/scene/RTData.h @@ -32,7 +32,7 @@ namespace Atlas { void Build(); - void Update(); + void Update(bool updateTriangleLights); void UpdateMaterials(bool updateTextures = false); @@ -43,20 +43,48 @@ namespace Atlas { void Clear(); private: + struct MeshInfo { + Ref blas = nullptr; + + int32_t offset = 0; + int32_t triangleOffset = 0; + + std::vector triangleLights; + std::vector instanceIndices; + std::vector matrices; + }; + void UpdateMaterials(std::vector& materials, bool updateTextures); GPUTexture CreateGPUTextureStruct(std::vector slices); GPUTextureLevel CreateGPUTextureLevelStruct(Texture::TextureAtlas::Slice slice); + void BuildForSoftwareRayTracing(); + + void BuildForHardwareRayTracing(); + + std::vector UpdateForSoftwareRayTracing(std::vector& gpuBvhInstances, + std::vector& actorAABBs); + + void UpdateForHardwareRayTracing(std::vector& actors); + + void BuildTriangleLightsForMesh(ResourceHandle& mesh); + + void UpdateTriangleLights(); + Scene* scene; + Ref tlas; + std::vector> blases; + Buffer::Buffer triangleBuffer; Buffer::Buffer bvhTriangleBuffer; Buffer::Buffer materialBuffer; Buffer::Buffer bvhInstanceBuffer; Buffer::Buffer tlasNodeBuffer; Buffer::Buffer blasNodeBuffer; + Buffer::Buffer geometryTriangleOffsetBuffer; Texture::TextureAtlas baseColorTextureAtlas; Texture::TextureAtlas opacityTextureAtlas; @@ -68,7 +96,9 @@ namespace Atlas { std::vector triangleLights; std::unordered_map materialAccess; - std::unordered_map meshInfo; + std::unordered_map meshInfos; + + bool hardwareRayTracing = false; std::atomic_bool isValid = false; std::mutex mutex; diff --git a/src/engine/scene/RTStructures.h b/src/engine/scene/RTStructures.h index 4760120b3..192d6e883 100644 --- a/src/engine/scene/RTStructures.h +++ b/src/engine/scene/RTStructures.h @@ -3,6 +3,8 @@ #include +#include "../graphics/BLAS.h" + namespace Atlas { struct GPUTriangle { @@ -71,19 +73,14 @@ namespace Atlas { vec3 max; }; - struct GPUMesh { - int32_t nodeOffset; - int32_t triangleOffset; - }; - struct GPUBVHInstance { mat3x4 inverseMatrix; int32_t blasOffset; + int32_t triangleOffset; int32_t padding0; int32_t padding1; - int32_t padding2; }; struct GPUBVHNode { @@ -97,9 +94,10 @@ namespace Atlas { }; struct GPULight { - vec4 data0; - vec4 data1; + vec4 P; vec4 N; + vec4 color; + vec4 data; }; } diff --git a/src/engine/scene/Scene.cpp b/src/engine/scene/Scene.cpp index 276bfe07c..72c102898 100644 --- a/src/engine/scene/Scene.cpp +++ b/src/engine/scene/Scene.cpp @@ -57,7 +57,7 @@ namespace Atlas { // Make sure this is changed just once at the start of a frame rtDataValid = rtData.IsValid(); if (rtDataValid) { - rtData.Update(); + rtData.Update(true); } }