Devsh-Graphics-Programming · alichraghi · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 5, 2024
diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h
@@ -33,7 +33,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 				{
 					system::path absolutePath = {};
 					std::string contents = {};
-					std::array<uint64_t, 4> hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future
+					core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future
 					// Could be used in the future for early rejection of cache hit
 					//nbl::system::IFileBase::time_point_t lastWriteTime = {};
 
@@ -183,9 +183,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 
 			public:
 				// Used to check compatibility of Caches before reading
-				constexpr static inline std::string_view VERSION = "1.0.0";
+				constexpr static inline std::string_view VERSION = "1.1.0";
 
-				using hash_t = std::array<uint64_t,4>;
 				static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8
 
 				struct SEntry
@@ -196,11 +195,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 					{
 						public:
 							// Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this 
-							inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, const std::string_view& _contents, bool _standardInclude, std::array<uint64_t, 4> _hash) :
-								requestingSourceDir(_requestingSourceDir), identifier(_identifier), contents(_contents), standardInclude(_standardInclude), hash(_hash)
-							{
-								assert(!_contents.empty());
-							}
+							inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) :
+								requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash)
+							{}
 
 							inline SPreprocessingDependency(SPreprocessingDependency&) = default;
 							inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete;
@@ -218,11 +215,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 							// path or identifier
 							system::path requestingSourceDir = "";
 							std::string identifier = "";
-							// file contents
-							// TODO: change to `core::vector<uint8_t>` a compressed blob of LZMA, and store all contents together in the `SEntry`
-							std::string contents = ""; 
 							// hash of the contents - used to check against a found_t
-							std::array<uint64_t, 4> hash = {};
+							core::blake3_hash_t hash = {};
 							// If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative`
 							bool standardInclude = false;
 					};
@@ -248,6 +242,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 						private:
 							friend class SCompilerArgs;
 							friend class SEntry;
+							friend class CCache;
 							friend void to_json(nlohmann::json&, const SPreprocessorArgs&);
 							friend void from_json(const nlohmann::json&, SPreprocessorArgs&);
 
@@ -271,7 +266,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 							std::string sourceIdentifier;
 							std::vector<SMacroDefinition> extraDefines;
 					};
-					// TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and operators
+					// TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and decompressShader
 					struct SCompilerArgs final
 					{
 						public:
@@ -290,6 +285,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 
 						private:
 							friend class SEntry;
+							friend class CCache;
 							friend void to_json(nlohmann::json&, const SCompilerArgs&);
 							friend void from_json(const nlohmann::json&, SCompilerArgs&);
 
@@ -351,33 +347,40 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 
 						// Now add the mainFileContents and produce both lookup and early equality rejection hashes
 						hashable.insert(hashable.end(), mainFileContents.begin(), mainFileContents.end());
-						hash = nbl::core::XXHash_256(hashable.data(), hashable.size());
-						lookupHash = hash[0];
-						for (auto i = 1u; i < 4; i++) {
-							core::hash_combine<uint64_t>(lookupHash, hash[i]);
-						}
+
+						core::blake3_hasher hasher;
+						hasher.update(hashable.data(), hashable.size());
+						hash = static_cast<core::blake3_hash_t>(hasher);
+						lookupHash = std::hash<core::blake3_hash_t>{}(hash);
 					}
 
 					// Needed to get the vector deserialization automatically
 					inline SEntry() {}
 
 					// Making the copy constructor deep-copy everything but the shader 
-					inline SEntry(const SEntry& other) 
-						: mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash), lookupHash(other.lookupHash), 
-						  dependencies(other.dependencies), cpuShader(other.cpuShader) {}
+					inline SEntry(const SEntry& other)
+						: mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash),
+						lookupHash(other.lookupHash), dependencies(other.dependencies), spirv(other.spirv),
+						uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize) {}
 
 					inline SEntry& operator=(SEntry& other) = delete;
 					inline SEntry(SEntry&& other) = default;
 					// Used for late initialization while looking up a cache, so as not to always initialize an entry even if caching was not requested
 					inline SEntry& operator=(SEntry&& other) = default;
 
+					void setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer, dependency_container_t&& dependencies);
+
+					core::smart_refctd_ptr<ICPUShader> decompressShader() const;
+
 					// TODO: make some of these private
 					std::string mainFileContents;
 					SCompilerArgs compilerArgs;
-					std::array<uint64_t,4> hash;
+					core::blake3_hash_t hash;
 					size_t lookupHash;
 					dependency_container_t dependencies;
-					core::smart_refctd_ptr<asset::ICPUShader> cpuShader;
+					core::smart_refctd_ptr<asset::ICPUBuffer> spirv;
+					core::blake3_hash_t uncompressedContentHash;
+					size_t uncompressedSize;
 				};
 
 				inline void insert(SEntry&& entry)
@@ -429,48 +432,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted
 
 				};
 
-				using EntrySet = core::unordered_multiset<SEntry, Hash, KeyEqual>;
+				using EntrySet = core::unordered_set<SEntry, Hash, KeyEqual>;
 				EntrySet m_container;
 
 				NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const;
 		};
 
-		inline core::smart_refctd_ptr<ICPUShader> compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const
-		{
-			CCache::SEntry entry;
-			std::vector<CCache::SEntry::SPreprocessingDependency> dependencies;
-			if (options.readCache || options.writeCache)
-				entry = std::move(CCache::SEntry(code, options));
-
-			if (options.readCache)
-			{
-				auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder);
-				if (found != options.readCache->m_container.end())
-				{
-					if (options.writeCache)
-					{
-						CCache::SEntry writeEntry = *found;
-						options.writeCache->insert(std::move(writeEntry));
-					}
-					return found->cpuShader;
-				}
-			}
-
-			auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &dependencies : nullptr);
-			// compute the SPIR-V shader content hash
-			{
-				auto backingBuffer = retVal->getContent();
-				const_cast<ICPUBuffer*>(backingBuffer)->setContentHash(backingBuffer->computeContentHash());
-			}
-
-			if (options.writeCache)
-			{
-				entry.dependencies = std::move(dependencies);
-				entry.cpuShader = retVal;
-				options.writeCache->insert(std::move(entry));
-			}
-			return retVal;
-		}
+		core::smart_refctd_ptr<ICPUShader> compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const;
 
 		inline core::smart_refctd_ptr<ICPUShader> compileToSPIRV(const char* code, const SCompilerOptions& options) const
 		{

diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp
@@ -13,6 +13,9 @@
 #include <regex>
 #include <iterator>
 
+#include <lzma/C/LzmaEnc.h>
+#include <lzma/C/LzmaDec.h>
+
 using namespace nbl;
 using namespace nbl::asset;
 
@@ -22,6 +25,42 @@ IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr<system::ISystem>&& syste
     m_defaultIncludeFinder = core::make_smart_refctd_ptr<CIncludeFinder>(core::smart_refctd_ptr(m_system));
 }
 
+inline core::smart_refctd_ptr<ICPUShader> nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const
+{
+    CCache::SEntry entry;
+    std::vector<CCache::SEntry::SPreprocessingDependency> dependencies;
+    if (options.readCache || options.writeCache)
+        entry = std::move(CCache::SEntry(code, options));
+
+    if (options.readCache)
+    {
+        auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder);
+        if (found != options.readCache->m_container.end())
+        {
+            if (options.writeCache)
+            {
+                CCache::SEntry writeEntry = *found;
+                options.writeCache->insert(std::move(writeEntry));
+            }
+            return found->decompressShader();
+        }
+    }
+
+    auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &dependencies : nullptr);
+    // compute the SPIR-V shader content hash
+    {
+        auto backingBuffer = retVal->getContent();
+        const_cast<ICPUBuffer*>(backingBuffer)->setContentHash(backingBuffer->computeContentHash());
+    }
+
+    if (options.writeCache)
+    {
+        entry.setContent(retVal->getContent(), std::move(dependencies));
+        options.writeCache->insert(std::move(entry));
+    }
+    return retVal;
+}
+
 std::string IShaderCompiler::preprocessShader(
     system::IFile* sourcefile,
     IShader::E_SHADER_STAGE stage,
@@ -116,7 +155,10 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req
         retVal = std::move(contents);
     else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName);
 
-    retVal.hash = nbl::core::XXHash_256((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t)));
+
+    core::blake3_hasher hasher;
+    hasher.update((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t)));
+    retVal.hash = static_cast<core::blake3_hash_t>(hasher);
     return retVal;
 }
 
@@ -129,7 +171,10 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req
     if (auto contents = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName))
         retVal = std::move(contents);
     else retVal = std::move(trySearchPaths(includeName));
-    retVal.hash = nbl::core::XXHash_256((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t)));
+
+    core::blake3_hasher hasher;
+    hasher.update((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t)));
+    retVal.hash = static_cast<core::blake3_hash_t>(hasher);
     return retVal;
 }
 
@@ -218,7 +263,10 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in
 
 core::smart_refctd_ptr<asset::ICPUShader> IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const
 {
-    return find_impl(mainFile, finder)->cpuShader;
+    const auto found = find_impl(mainFile, finder);
+    if (found==m_container.end())
+        return nullptr;
+    return found->decompressShader();
 }
 
 IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const
@@ -238,7 +286,7 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_
             else
                 header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier);
 
-            if (header.hash != dependency.hash || header.contents != dependency.contents)
+            if (header.hash != dependency.hash)
             {
                 allDependenciesMatch = false;
                 break;
@@ -267,10 +315,10 @@ core::smart_refctd_ptr<ICPUBuffer> IShaderCompiler::CCache::serialize() const
         // We keep a copy of the offsets and the sizes of each shader. This is so that later on, when we add the shaders to the buffer after json creation
         // (where the params array has been moved) we don't have to read the json to get the offsets again
         offsets[i] = shaderBufferSize;
-        sizes[i] = entry.cpuShader->getContent()->getSize();
+        sizes[i] = entry.spirv->getSize();
 
         // And add the params to the shader creation parameters array
-        shaderCreationParams.emplace_back(entry.cpuShader->getStage(), entry.cpuShader->getContentType(), entry.cpuShader->getFilepathHint(), sizes[i], shaderBufferSize);
+        shaderCreationParams.emplace_back(entry.compilerArgs.stage, entry.compilerArgs.preprocessorArgs.sourceIdentifier.data(), sizes[i], shaderBufferSize);
         // Enlarge the shader buffer by the size of the current shader
         shaderBufferSize += sizes[i];
         i++;
@@ -294,7 +342,7 @@ core::smart_refctd_ptr<ICPUBuffer> IShaderCompiler::CCache::serialize() const
     // Loop over entries again, adding each one's shader to the buffer. 
     i = 0u;
     for (auto& entry : m_container) {
-        memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.cpuShader->getContent()->getPointer(), sizes[i]);
+        memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.spirv->getPointer(), sizes[i]);
         i++;
     }
 
@@ -324,7 +372,6 @@ core::smart_refctd_ptr<IShaderCompiler::CCache> IShaderCompiler::CCache::deseria
             return nullptr;
         }
     }
-
 
     // Now retrieve two vectors, one with the entries and one with the extra data to recreate the CPUShaders
     std::vector<SEntry> entries;
@@ -337,13 +384,64 @@ core::smart_refctd_ptr<IShaderCompiler::CCache> IShaderCompiler::CCache::deseria
         // Create buffer to hold the code
         auto code = core::make_smart_refctd_ptr<ICPUBuffer>(shaderCreationParams[i].codeByteSize);
         // Copy the shader bytecode into the buffer
+
         memcpy(code->getPointer(), serializedCache.data() + SHADER_BUFFER_SIZE_BYTES + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize);
         code->setContentHash(code->computeContentHash());
-        // Create the ICPUShader
-        entries[i].cpuShader = core::make_smart_refctd_ptr<ICPUShader>(std::move(code), shaderCreationParams[i].stage, shaderCreationParams[i].contentType, std::move(shaderCreationParams[i].filepathHint));
+        entries[i].spirv = std::move(code);
 
         retVal->insert(std::move(entries[i]));
     }
 
     return retVal;
-}
+}
+
+static void* SzAlloc(ISzAllocPtr p, size_t size) { p = p; return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); }
+static void SzFree(ISzAllocPtr p, void* address) { p = p; _NBL_ALIGNED_FREE(address); }
+
+void nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer, dependency_container_t&& dependencies)
+{
+    dependencies = std::move(dependencies);
+    uncompressedContentHash = uncompressedSpirvBuffer->getContentHash();
+    uncompressedSize = uncompressedSpirvBuffer->getSize();
+
+    size_t propsSize = LZMA_PROPS_SIZE;
+    size_t destLen = uncompressedSpirvBuffer->getSize() + uncompressedSpirvBuffer->getSize() / 3 + 128;
+    std::vector<unsigned char> compressedSpirv = {};
+    compressedSpirv.resize(propsSize + destLen);
+
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    props.dictSize = 1 << 16; // 64KB
+    props.writeEndMark = 1;
+
+    ISzAlloc alloc = { SzAlloc, SzFree };
+    int res = LzmaEncode(
+        compressedSpirv.data() + LZMA_PROPS_SIZE, &destLen,
+        reinterpret_cast<const unsigned char*>(uncompressedSpirvBuffer->getPointer()), uncompressedSpirvBuffer->getSize(),
+        &props, compressedSpirv.data(), &propsSize, props.writeEndMark,
+        nullptr, &alloc, &alloc);
+
+    assert(propsSize == LZMA_PROPS_SIZE);
+    assert(res == SZ_OK);
+
+    spirv = core::make_smart_refctd_ptr<ICPUBuffer>(propsSize + destLen);
+    memcpy(spirv->getPointer(), compressedSpirv.data(), spirv->getSize());
+}
+
+core::smart_refctd_ptr<ICPUShader> nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const
+{
+    auto uncompressedBuf = core::make_smart_refctd_ptr<ICPUBuffer>(uncompressedSize);
+    uncompressedBuf->setContentHash(uncompressedContentHash);
+
+    size_t dstSize = uncompressedBuf->getSize();
+    size_t srcSize = spirv->getSize() - LZMA_PROPS_SIZE;
+    ELzmaStatus status;
+    ISzAlloc alloc = { SzAlloc, SzFree };
+    SRes res = LzmaDecode(
+        reinterpret_cast<unsigned char*>(uncompressedBuf->getPointer()), &dstSize,
+        reinterpret_cast<const unsigned char*>(spirv->getPointer()) + LZMA_PROPS_SIZE, &srcSize,
+        reinterpret_cast<const unsigned char*>(spirv->getPointer()), LZMA_PROPS_SIZE,
+        LZMA_FINISH_ANY, &status, &alloc);
+    assert(res == SZ_OK);
+    return core::make_smart_refctd_ptr<asset::ICPUShader>(std::move(uncompressedBuf), compilerArgs.stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data());
+}