diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 1ed613f27..ac7ed5eb1 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -33,7 +33,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { system::path absolutePath = {}; std::string contents = {}; - std::array hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future + core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future // Could be used in the future for early rejection of cache hit //nbl::system::IFileBase::time_point_t lastWriteTime = {}; @@ -183,9 +183,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: // Used to check compatibility of Caches before reading - constexpr static inline std::string_view VERSION = "1.0.0"; + constexpr static inline std::string_view VERSION = "1.1.0"; - using hash_t = std::array; static auto const SHADER_BUFFER_SIZE_BYTES = sizeof(uint64_t) / sizeof(uint8_t); // It's obviously 8 struct SEntry @@ -196,11 +195,9 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted { public: // Perf note: hashing while preprocessor lexing is likely to be slower than just hashing the whole array like this - inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, const std::string_view& _contents, bool _standardInclude, std::array _hash) : - requestingSourceDir(_requestingSourceDir), identifier(_identifier), contents(_contents), standardInclude(_standardInclude), hash(_hash) - { - assert(!_contents.empty()); - } + inline SPreprocessingDependency(const system::path& _requestingSourceDir, const std::string_view& _identifier, bool _standardInclude, core::blake3_hash_t _hash) : + requestingSourceDir(_requestingSourceDir), identifier(_identifier), standardInclude(_standardInclude), hash(_hash) + {} inline SPreprocessingDependency(SPreprocessingDependency&) = default; inline SPreprocessingDependency& operator=(SPreprocessingDependency&) = delete; @@ -218,11 +215,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // path or identifier system::path requestingSourceDir = ""; std::string identifier = ""; - // file contents - // TODO: change to `core::vector` a compressed blob of LZMA, and store all contents together in the `SEntry` - std::string contents = ""; // hash of the contents - used to check against a found_t - std::array hash = {}; + core::blake3_hash_t hash = {}; // If true, then `getIncludeStandard` was used to find, otherwise `getIncludeRelative` bool standardInclude = false; }; @@ -248,6 +242,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted private: friend class SCompilerArgs; friend class SEntry; + friend class CCache; friend void to_json(nlohmann::json&, const SPreprocessorArgs&); friend void from_json(const nlohmann::json&, SPreprocessorArgs&); @@ -271,7 +266,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::string sourceIdentifier; std::vector extraDefines; }; - // TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and operators + // TODO: SPreprocessorArgs could just be folded into `SCompilerArgs` to have less classes and decompressShader struct SCompilerArgs final { public: @@ -290,6 +285,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted private: friend class SEntry; + friend class CCache; friend void to_json(nlohmann::json&, const SCompilerArgs&); friend void from_json(const nlohmann::json&, SCompilerArgs&); @@ -351,33 +347,40 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // Now add the mainFileContents and produce both lookup and early equality rejection hashes hashable.insert(hashable.end(), mainFileContents.begin(), mainFileContents.end()); - hash = nbl::core::XXHash_256(hashable.data(), hashable.size()); - lookupHash = hash[0]; - for (auto i = 1u; i < 4; i++) { - core::hash_combine(lookupHash, hash[i]); - } + + core::blake3_hasher hasher; + hasher.update(hashable.data(), hashable.size()); + hash = static_cast(hasher); + lookupHash = std::hash{}(hash); } // Needed to get the vector deserialization automatically inline SEntry() {} // Making the copy constructor deep-copy everything but the shader - inline SEntry(const SEntry& other) - : mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash), lookupHash(other.lookupHash), - dependencies(other.dependencies), cpuShader(other.cpuShader) {} + inline SEntry(const SEntry& other) + : mainFileContents(other.mainFileContents), compilerArgs(other.compilerArgs), hash(other.hash), + lookupHash(other.lookupHash), dependencies(other.dependencies), spirv(other.spirv), + uncompressedContentHash(other.uncompressedContentHash), uncompressedSize(other.uncompressedSize) {} inline SEntry& operator=(SEntry& other) = delete; inline SEntry(SEntry&& other) = default; // Used for late initialization while looking up a cache, so as not to always initialize an entry even if caching was not requested inline SEntry& operator=(SEntry&& other) = default; + bool setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer, dependency_container_t&& dependencies); + + core::smart_refctd_ptr decompressShader() const; + // TODO: make some of these private std::string mainFileContents; SCompilerArgs compilerArgs; - std::array hash; + core::blake3_hash_t hash; size_t lookupHash; dependency_container_t dependencies; - core::smart_refctd_ptr cpuShader; + core::smart_refctd_ptr spirv; + core::blake3_hash_t uncompressedContentHash; + size_t uncompressedSize; }; inline void insert(SEntry&& entry) @@ -429,48 +432,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted }; - using EntrySet = core::unordered_multiset; + using EntrySet = core::unordered_set; EntrySet m_container; NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; }; - inline core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const - { - CCache::SEntry entry; - std::vector dependencies; - if (options.readCache || options.writeCache) - entry = std::move(CCache::SEntry(code, options)); - - if (options.readCache) - { - auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); - if (found != options.readCache->m_container.end()) - { - if (options.writeCache) - { - CCache::SEntry writeEntry = *found; - options.writeCache->insert(std::move(writeEntry)); - } - return found->cpuShader; - } - } - - auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &dependencies : nullptr); - // compute the SPIR-V shader content hash - { - auto backingBuffer = retVal->getContent(); - const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); - } - - if (options.writeCache) - { - entry.dependencies = std::move(dependencies); - entry.cpuShader = retVal; - options.writeCache->insert(std::move(entry)); - } - return retVal; - } + core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const { diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index ba23ba628..f8c9e545d 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -13,6 +13,9 @@ #include #include +#include +#include + using namespace nbl; using namespace nbl::asset; @@ -22,6 +25,42 @@ IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& syste m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); } +inline core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const +{ + CCache::SEntry entry; + std::vector dependencies; + if (options.readCache || options.writeCache) + entry = std::move(CCache::SEntry(code, options)); + + if (options.readCache) + { + auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); + if (found != options.readCache->m_container.end()) + { + if (options.writeCache) + { + CCache::SEntry writeEntry = *found; + options.writeCache->insert(std::move(writeEntry)); + } + return found->decompressShader(); + } + } + + auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &dependencies : nullptr); + // compute the SPIR-V shader content hash + { + auto backingBuffer = retVal->getContent(); + const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); + } + + if (options.writeCache) + { + if (entry.setContent(retVal->getContent(), std::move(dependencies))) + options.writeCache->insert(std::move(entry)); + } + return retVal; +} + std::string IShaderCompiler::preprocessShader( system::IFile* sourcefile, IShader::E_SHADER_STAGE stage, @@ -116,7 +155,10 @@ auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& req retVal = std::move(contents); else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName); - retVal.hash = nbl::core::XXHash_256((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + + core::blake3_hasher hasher; + hasher.update((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); return retVal; } @@ -129,7 +171,10 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req if (auto contents = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName)) retVal = std::move(contents); else retVal = std::move(trySearchPaths(includeName)); - retVal.hash = nbl::core::XXHash_256((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + + core::blake3_hasher hasher; + hasher.update((uint8_t*)(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); return retVal; } @@ -218,7 +263,10 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { - return find_impl(mainFile, finder)->cpuShader; + const auto found = find_impl(mainFile, finder); + if (found==m_container.end()) + return nullptr; + return found->decompressShader(); } IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_impl(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const @@ -238,7 +286,7 @@ IShaderCompiler::CCache::EntrySet::const_iterator IShaderCompiler::CCache::find_ else header = finder->getIncludeRelative(dependency.requestingSourceDir, dependency.identifier); - if (header.hash != dependency.hash || header.contents != dependency.contents) + if (header.hash != dependency.hash) { allDependenciesMatch = false; break; @@ -267,10 +315,10 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const // We keep a copy of the offsets and the sizes of each shader. This is so that later on, when we add the shaders to the buffer after json creation // (where the params array has been moved) we don't have to read the json to get the offsets again offsets[i] = shaderBufferSize; - sizes[i] = entry.cpuShader->getContent()->getSize(); + sizes[i] = entry.spirv->getSize(); // And add the params to the shader creation parameters array - shaderCreationParams.emplace_back(entry.cpuShader->getStage(), entry.cpuShader->getContentType(), entry.cpuShader->getFilepathHint(), sizes[i], shaderBufferSize); + shaderCreationParams.emplace_back(entry.compilerArgs.stage, entry.compilerArgs.preprocessorArgs.sourceIdentifier.data(), sizes[i], shaderBufferSize); // Enlarge the shader buffer by the size of the current shader shaderBufferSize += sizes[i]; i++; @@ -294,7 +342,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const // Loop over entries again, adding each one's shader to the buffer. i = 0u; for (auto& entry : m_container) { - memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.cpuShader->getContent()->getPointer(), sizes[i]); + memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + offsets[i], entry.spirv->getPointer(), sizes[i]); i++; } @@ -324,7 +372,6 @@ core::smart_refctd_ptr IShaderCompiler::CCache::deseria return nullptr; } } - // Now retrieve two vectors, one with the entries and one with the extra data to recreate the CPUShaders std::vector entries; @@ -337,13 +384,65 @@ core::smart_refctd_ptr IShaderCompiler::CCache::deseria // Create buffer to hold the code auto code = core::make_smart_refctd_ptr(shaderCreationParams[i].codeByteSize); // Copy the shader bytecode into the buffer + memcpy(code->getPointer(), serializedCache.data() + SHADER_BUFFER_SIZE_BYTES + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); code->setContentHash(code->computeContentHash()); - // Create the ICPUShader - entries[i].cpuShader = core::make_smart_refctd_ptr(std::move(code), shaderCreationParams[i].stage, shaderCreationParams[i].contentType, std::move(shaderCreationParams[i].filepathHint)); + entries[i].spirv = std::move(code); retVal->insert(std::move(entries[i])); } return retVal; -} \ No newline at end of file +} + +static void* SzAlloc(ISzAllocPtr p, size_t size) { p = p; return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); } +static void SzFree(ISzAllocPtr p, void* address) { p = p; _NBL_ALIGNED_FREE(address); } + +bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer, dependency_container_t&& dependencies) +{ + dependencies = std::move(dependencies); + uncompressedContentHash = uncompressedSpirvBuffer->getContentHash(); + uncompressedSize = uncompressedSpirvBuffer->getSize(); + + size_t propsSize = LZMA_PROPS_SIZE; + size_t destLen = uncompressedSpirvBuffer->getSize() + uncompressedSpirvBuffer->getSize() / 3 + 128; + std::vector compressedSpirv = {}; + compressedSpirv.resize(propsSize + destLen); + + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.dictSize = 1 << 16; // 64KB + props.writeEndMark = 1; + + ISzAlloc alloc = { SzAlloc, SzFree }; + int res = LzmaEncode( + compressedSpirv.data() + LZMA_PROPS_SIZE, &destLen, + reinterpret_cast(uncompressedSpirvBuffer->getPointer()), uncompressedSpirvBuffer->getSize(), + &props, compressedSpirv.data(), &propsSize, props.writeEndMark, + nullptr, &alloc, &alloc); + + if (res != SZ_OK || propsSize != LZMA_PROPS_SIZE) return false; + + spirv = core::make_smart_refctd_ptr(propsSize + destLen); + memcpy(spirv->getPointer(), compressedSpirv.data(), spirv->getSize()); + + return true; +} + +core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const +{ + auto uncompressedBuf = core::make_smart_refctd_ptr(uncompressedSize); + uncompressedBuf->setContentHash(uncompressedContentHash); + + size_t dstSize = uncompressedBuf->getSize(); + size_t srcSize = spirv->getSize() - LZMA_PROPS_SIZE; + ELzmaStatus status; + ISzAlloc alloc = { SzAlloc, SzFree }; + SRes res = LzmaDecode( + reinterpret_cast(uncompressedBuf->getPointer()), &dstSize, + reinterpret_cast(spirv->getPointer()) + LZMA_PROPS_SIZE, &srcSize, + reinterpret_cast(spirv->getPointer()), LZMA_PROPS_SIZE, + LZMA_FINISH_ANY, &status, &alloc); + assert(res == SZ_OK); + return core::make_smart_refctd_ptr(std::move(uncompressedBuf), compilerArgs.stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, compilerArgs.preprocessorArgs.sourceIdentifier.data()); +} diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h index cd964d568..ca81df640 100644 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ b/src/nbl/asset/utils/shaderCompiler_serialization.h @@ -116,8 +116,7 @@ inline void to_json(json& j, const SEntry::SPreprocessingDependency& dependency) j = json{ { "requestingSourceDir", dependency.requestingSourceDir }, { "identifier", dependency.identifier }, - { "contents", dependency.contents }, - { "hash", dependency.hash }, + { "hash", dependency.hash.data }, { "standardInclude", dependency.standardInclude }, }; } @@ -126,8 +125,7 @@ inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependenc { j.at("requestingSourceDir").get_to(dependency.requestingSourceDir); j.at("identifier").get_to(dependency.identifier); - j.at("contents").get_to(dependency.contents); - j.at("hash").get_to(dependency.hash); + j.at("hash").get_to(dependency.hash.data); j.at("standardInclude").get_to(dependency.standardInclude); } @@ -135,13 +133,12 @@ inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependenc struct CPUShaderCreationParams { IShader::E_SHADER_STAGE stage; - IShader::E_CONTENT_TYPE contentType; //I think this one could be skipped since it's always going to be SPIR-V std::string filepathHint; uint64_t codeByteSize = 0; uint64_t offset = 0; // Offset into the serialized .bin for the Cache where code starts - CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, IShader::E_CONTENT_TYPE _contentType, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) - : stage(_stage), contentType(_contentType), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) + CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) + : stage(_stage), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) {} CPUShaderCreationParams() {}; @@ -150,10 +147,8 @@ struct CPUShaderCreationParams { inline void to_json(json& j, const CPUShaderCreationParams& creationParams) { uint32_t stage = static_cast(creationParams.stage); - uint32_t contentType = static_cast(creationParams.contentType); j = json{ { "stage", stage }, - { "contentType", contentType }, { "filepathHint", creationParams.filepathHint }, { "codeByteSize", creationParams.codeByteSize }, { "offset", creationParams.offset }, @@ -162,14 +157,12 @@ inline void to_json(json& j, const CPUShaderCreationParams& creationParams) inline void from_json(const json& j, CPUShaderCreationParams& creationParams) { - uint32_t stage, contentType; + uint32_t stage; j.at("stage").get_to(stage); - j.at("contentType").get_to(contentType); j.at("filepathHint").get_to(creationParams.filepathHint); j.at("codeByteSize").get_to(creationParams.codeByteSize); j.at("offset").get_to(creationParams.offset); creationParams.stage = static_cast(stage); - creationParams.contentType = static_cast(stage); } // Serialize SEntry, keeping some fields as extra serialization to keep them separate on disk @@ -179,9 +172,11 @@ inline void to_json(json& j, const SEntry& entry) j = json{ { "mainFileContents", entry.mainFileContents }, { "compilerArgs", entry.compilerArgs }, - { "hash", entry.hash }, + { "hash", entry.hash.data }, { "lookupHash", entry.lookupHash }, { "dependencies", entry.dependencies }, + { "uncompressedContentHash", entry.uncompressedContentHash.data }, + { "uncompressedSize", entry.uncompressedSize }, }; } @@ -189,10 +184,12 @@ inline void from_json(const json& j, SEntry& entry) { j.at("mainFileContents").get_to(entry.mainFileContents); j.at("compilerArgs").get_to(entry.compilerArgs); - j.at("hash").get_to(entry.hash); + j.at("hash").get_to(entry.hash.data); j.at("lookupHash").get_to(entry.lookupHash); j.at("dependencies").get_to(entry.dependencies); - entry.cpuShader = nullptr; + j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); + j.at("uncompressedSize").get_to(entry.uncompressedSize); + entry.spirv = nullptr; } } diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index 696a2b9fb..a37439157 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -533,7 +533,7 @@ template<> inline bool boost::wave::impl::pp_iterator_functor