From 80fb6ee5adabd5acc96500e695e53d7af3916787 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 27 Sep 2024 14:24:23 -0400 Subject: [PATCH] Fix occasional GPU crash when a smaller descriptor set replaces a larger one. If a descriptor set with fewer than 64 descriptors replaced one with more, the resources it contained were not made resident to the GPU, due to MVKBitArray thrashing incorrectly when transitioning between static and dynamic memory allocations. - Overhaul MVKBitArray design: - Never downsize memory allocation unless reset() is called. - No longer support static allocation for smaller sizes, to avoid thrashing between dynamic and static allocs when frequently resizing up and down. - Use realloc() instead of malloc/copy to improve performance. - Simplify tracking of partially and fully disabled sections. - Rename setXX() and clearXX() functions to enableXX() & disableXX(). - Rename several internal functions. - getIndexOfFirstEnabledBit() & enumerateEnabledBits() no longer have an option to disable the bit. - MVKDescriptorPool track highest descriptor set allocated, instead of querying MVKBitArray. --- .../Commands/MVKCommandEncoderState.mm | 4 +- .../MoltenVK/GPUObjects/MVKDescriptorSet.h | 7 +- .../MoltenVK/GPUObjects/MVKDescriptorSet.mm | 28 +- MoltenVK/MoltenVK/Utility/MVKBitArray.h | 284 ++++++++---------- 4 files changed, 136 insertions(+), 187 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 03f9f2a9e..7a0de21ae 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -645,7 +645,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl if (dsChanged) { auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex]; usageDirty.resize(descSet->getDescriptorCount()); - usageDirty.setAllBits(); + usageDirty.enableAllBits(); } // Update dynamic buffer offsets @@ -717,7 +717,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl MVKCommandEncoderState::markDirty(); if (_cmdEncoder->isUsingMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) { - _metalUsageDirtyDescriptors[dsIdx].setAllBits(); + _metalUsageDirtyDescriptors[dsIdx].enableAllBits(); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index 96a9cf841..00ed01307 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -305,9 +305,9 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { MVKSmallVector _descriptorSets; MVKBitArray _descriptorSetAvailablility; - id _metalArgumentBuffer; - NSUInteger _nextMetalArgumentBufferOffset; MVKMTLBufferAllocator _mtlBufferAllocator; + id _metalArgumentBuffer = nil; + NSUInteger _nextMetalArgumentBufferOffset = 0; MVKDescriptorTypePool _uniformBufferDescriptors; MVKDescriptorTypePool _storageBufferDescriptors; @@ -322,7 +322,8 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { MVKDescriptorTypePool _uniformTexelBufferDescriptors; MVKDescriptorTypePool _storageTexelBufferDescriptors; - VkDescriptorPoolCreateFlags _flags; + VkDescriptorPoolCreateFlags _flags = 0; + size_t _maxAllocDescSetCount = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index f692b08bd..8474559af 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -292,7 +292,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) { auto& dslBind = _bindings[bindIdx]; if (context.isResourceUsed(spvExecModels[stage], descSetIndex, dslBind.getBinding())) { - bindingUse.setBit(bindIdx); + bindingUse.enableBit(bindIdx); descSetIsUsed = true; } } @@ -628,9 +628,9 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf bool& dynamicAllocation, MVKDescriptorPool* pool) { VkResult errRslt = VK_ERROR_OUT_OF_POOL_MEMORY; - size_t availDescIdx = _availability.getIndexOfFirstSetBit(); + size_t availDescIdx = _availability.getIndexOfFirstEnabledBit(); if (availDescIdx < size()) { - _availability.clearBit(availDescIdx); // Mark the descriptor as taken + _availability.disableBit(availDescIdx); // Mark the descriptor as taken *pMVKDesc = &_descriptors[availDescIdx]; (*pMVKDesc)->reset(); // Reset descriptor before reusing. dynamicAllocation = false; @@ -657,7 +657,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf DescriptorClass* pFirstDesc = _descriptors.data(); int64_t descIdx = pDesc >= pFirstDesc ? pDesc - pFirstDesc : pFirstDesc - pDesc; if (descIdx >= 0 && descIdx < size()) { - _availability.setBit(descIdx); + _availability.enableBit(descIdx); } else { mvkDesc->destroy(); } @@ -666,13 +666,13 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf // Preallocated descriptors will be reset when they are reused template void MVKDescriptorTypePool::reset() { - _availability.setAllBits(); + _availability.enableAllBits(); } template size_t MVKDescriptorTypePool::getRemainingDescriptorCount() { size_t enabledCount = 0; - _availability.enumerateEnabledBits(false, [&](size_t bitIdx) { enabledCount++; return true; }); + _availability.enumerateEnabledBits([&](size_t bitIdx) { enabledCount++; return true; }); return enabledCount; } @@ -740,7 +740,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf uint64_t mtlArgBuffEncAlignedSize = mvkAlignByteCount(mtlArgBuffEncSize, getMetalFeatures().mtlBufferAlignment); size_t dsCnt = _descriptorSetAvailablility.size(); - _descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) { + _descriptorSetAvailablility.enumerateEnabledBits([&](size_t dsIdx) { bool isSpaceAvail = true; // If not using Metal arg buffers, space will always be available. MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx]; NSUInteger mtlArgBuffOffset = mvkDS->getMetalArgumentBuffer().getMetalArgumentBufferOffset(); @@ -772,11 +772,12 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf if (rslt) { freeDescriptorSet(mvkDS, false); } else { + _descriptorSetAvailablility.disableBit(dsIdx); + _maxAllocDescSetCount = std::max(_maxAllocDescSetCount, dsIdx + 1); *pVKDS = (VkDescriptorSet)mvkDS; } return false; } else { - _descriptorSetAvailablility.setBit(dsIdx); // We didn't consume this one after all, so it's still available return true; } }); @@ -800,7 +801,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf mvkDS->free(isPoolReset); if ( !isPoolReset ) { size_t dsIdx = mvkDS - _descriptorSets.data(); - _descriptorSetAvailablility.setBit(dsIdx); + _descriptorSetAvailablility.enableBit(dsIdx); } } else { reportError(VK_ERROR_INITIALIZATION_FAILED, "A descriptor set is being returned to a descriptor pool that did not allocate it."); @@ -810,11 +811,10 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf // Free allocated descriptor sets and reset descriptor pools. // Don't waste time freeing desc sets that were never allocated. VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) { - size_t dsCnt = _descriptorSetAvailablility.getLowestNeverClearedBitIndex(); - for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { + for (uint32_t dsIdx = 0; dsIdx < _maxAllocDescSetCount; dsIdx++) { freeDescriptorSet(&_descriptorSets[dsIdx], true); } - _descriptorSetAvailablility.setAllBits(); + _descriptorSetAvailablility.enableAllBits(); _uniformBufferDescriptors.reset(); _storageBufferDescriptors.reset(); @@ -830,6 +830,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf _storageTexelBufferDescriptors.reset(); _nextMetalArgumentBufferOffset = 0; + _maxAllocDescSetCount = 0; return VK_SUCCESS; } @@ -1003,9 +1004,6 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf } void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo) { - _metalArgumentBuffer = nil; - _nextMetalArgumentBufferOffset = 0; - if ( !isUsingMetalArgumentBuffers() ) { return; } auto& mtlFeats = getMetalFeatures(); diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h index 6f9afa3dd..48c967a00 100755 --- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h +++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h @@ -27,117 +27,84 @@ /** Represents an array of bits, optimized for reduced storage and fast scanning for bits that are set. */ class MVKBitArray { - static constexpr size_t SectionMaskSize = 6; // 64 bits - static constexpr size_t SectionBitCount = (size_t)1U << SectionMaskSize; - static constexpr size_t SectionByteCount = SectionBitCount / 8; - static constexpr uint64_t SectionMask = SectionBitCount - 1; - public: /** - * Returns the value of the bit, and optionally clears that bit if it was set. - * Returns false if the bitIndex is beyond the size of this array, returns false. + * Returns the value of the bit, and optionally disables that bit if it was enabled. + * Returns false if the bitIndex is beyond the size of this array. */ - bool getBit(size_t bitIndex, bool shouldClear = false) { + bool getBit(size_t bitIndex, bool shouldDisable = false) { if (bitIndex >= _bitCount) { return false; } - bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getSectionSetMask(bitIndex)); - if (shouldClear && val) { clearBit(bitIndex); } + bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getBitPositionSectionMask(bitIndex)); + if (val && shouldDisable) { disableBit(bitIndex); } return val; } - /** Sets the value of the bit to the val (or to 1 by default). */ - void setBit(size_t bitIndex, bool val = true) { + /** Sets the value of the bit to the val. */ + void setBit(size_t bitIndex, bool val) { if (bitIndex >= _bitCount) { return; } - size_t secIdx = getIndexOfSection(bitIndex); + auto secIdx = getIndexOfSection(bitIndex); + auto& sectionData = getSection(secIdx); if (val) { - mvkEnableFlags(getSection(secIdx), getSectionSetMask(bitIndex)); - if (secIdx < _clearedSectionCount) { _clearedSectionCount = secIdx; } + mvkEnableFlags(getSection(secIdx), getBitPositionSectionMask(bitIndex)); } else { - mvkDisableFlags(getSection(secIdx), getSectionSetMask(bitIndex)); - if (secIdx == _clearedSectionCount && !getSection(secIdx)) { _clearedSectionCount++; } - _lowestNeverClearedBitIndex = std::max(_lowestNeverClearedBitIndex, bitIndex + 1); + mvkDisableFlags(getSection(secIdx), getBitPositionSectionMask(bitIndex)); } - } - - /** Sets the value of the bit to 0. */ - void clearBit(size_t bitIndex) { setBit(bitIndex, false); } - /** Sets all bits in the array to 1. */ - void setAllBits() { - // Nothing to do if no bits have been cleared (also ensure _lowestNeverClearedBitIndex doesn't go negative) - if (_lowestNeverClearedBitIndex) { - size_t endSecIdx = getIndexOfSection(_lowestNeverClearedBitIndex - 1); - for (size_t secIdx = 0; secIdx <= endSecIdx; secIdx++) { - getSection(secIdx) = ~0; + // Adjust fully disabled tracker + if (isFullyDisabled(sectionData)) { + if (secIdx == _fullyDisabledSectionCount) { + auto secCnt = getSectionCount(); + while (++_fullyDisabledSectionCount < secCnt && isFullyDisabled(getSection(_fullyDisabledSectionCount))); } + } else { + _fullyDisabledSectionCount = std::min(_fullyDisabledSectionCount, (uint32_t)secIdx); } - _clearedSectionCount = 0; - _lowestNeverClearedBitIndex = 0; - } - /** Clears all bits in the array to 0. */ - void clearAllBits() { - size_t secCnt = getSectionCount(); - while (_clearedSectionCount < secCnt) { - getSection(_clearedSectionCount++) = 0; + // Adjust partially disabled tracker + if (isFullyEnabled(sectionData)) { + if (secIdx + 1 == _partiallyDisabledSectionCount) { + while (--_partiallyDisabledSectionCount > 0 && isFullyEnabled(getSection(_partiallyDisabledSectionCount - 1))); + } + } else { + _partiallyDisabledSectionCount = std::max(_partiallyDisabledSectionCount, (uint32_t)secIdx + 1); } - _lowestNeverClearedBitIndex = _bitCount; } - /** - * Returns the index of the first bit that is set, at or after the specified index, - * and optionally clears that bit. If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) { - size_t startSecIdx = getIndexOfSection(startIndex); - if (startSecIdx < _clearedSectionCount) { - startSecIdx = _clearedSectionCount; - startIndex = 0; - } - size_t bitIdx = startSecIdx << SectionMaskSize; - size_t secCnt = getSectionCount(); - for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) { - size_t lclBitIdx = getIndexOfFirstSetBitInSection(getSection(secIdx), getBitIndexInSection(startIndex)); - bitIdx += lclBitIdx; - if (lclBitIdx < SectionBitCount) { - if (startSecIdx == _clearedSectionCount && !getSection(startSecIdx)) { _clearedSectionCount = secIdx; } - if (shouldClear) { clearBit(bitIdx); } - return std::min(bitIdx, _bitCount); - } - startIndex = 0; + /** Enables the bit. */ + void enableBit(size_t bitIndex) { setBit(bitIndex, true); } + + /** Enables all bits in the array. */ + void enableAllBits() { + for (size_t secIdx = 0; secIdx < _partiallyDisabledSectionCount; secIdx++) { + getSection(secIdx) = FullyEnabledSectionMask; } - return std::min(bitIdx, _bitCount); + _partiallyDisabledSectionCount = 0; + _fullyDisabledSectionCount = 0; } - /** - * Returns the index of the first bit that is set, at or after the specified index. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(size_t startIndex) { - return getIndexOfFirstSetBit(startIndex, false); - } + /** Disables the bit. */ + void disableBit(size_t bitIndex) { setBit(bitIndex, false); } - /** - * Returns the index of the first bit that is set and optionally clears that bit. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(bool shouldClear) { - return getIndexOfFirstSetBit(0, shouldClear); + /** Disables all bits in the array. */ + void disableAllBits() { + size_t secCnt = getSectionCount(); + for (size_t secIdx = _fullyDisabledSectionCount; secIdx < secCnt; secIdx++) { + getSection(secIdx) = 0; + } + _partiallyDisabledSectionCount = (uint32_t)secCnt; + _fullyDisabledSectionCount = (uint32_t)secCnt; } - /** - * Returns the index of the lowest bit that has never been cleared since the last time all the bits were set or cleared. - * In other words, this bit, and all above it, have never been cleared since the last time they were all set or cleared. - */ - size_t getLowestNeverClearedBitIndex() { return _lowestNeverClearedBitIndex; } - - /** - * Returns the index of the first bit that is set. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit() { - return getIndexOfFirstSetBit(0, false); + /** Returns the index of the first bit that is enabled, at or after the specified index. */ + size_t getIndexOfFirstEnabledBit(size_t startIndex = 0) { + size_t secIdx = getIndexOfSection(startIndex); + if (secIdx < _fullyDisabledSectionCount) { + secIdx = _fullyDisabledSectionCount; + startIndex = 0; + } + return std::min((secIdx * SectionBitCount) + getIndexOfFirstEnabledBitInSection(getSection(secIdx), getBitIndexInSection(startIndex)), _bitCount); } /** @@ -149,13 +116,11 @@ class MVKBitArray { * The custom function should return true to continue processing further bits, or false * to stop processing further bits. This function returns false if any of the invocations * of the custom function halted further invocations, and returns true otherwise. - * - * If shouldClear is true, each enabled bit is cleared before the custom function executes. */ - bool enumerateEnabledBits(bool shouldClear, std::function func) { - for (size_t bitIdx = getIndexOfFirstSetBit(shouldClear); + bool enumerateEnabledBits(std::function func) { + for (size_t bitIdx = getIndexOfFirstEnabledBit(); bitIdx < _bitCount; - bitIdx = getIndexOfFirstSetBit(++bitIdx, shouldClear)) { + bitIdx = getIndexOfFirstEnabledBit(++bitIdx)) { if ( !func(bitIdx) ) { return false; } } @@ -166,7 +131,7 @@ class MVKBitArray { size_t size() const { return _bitCount; } /** Returns whether this array is empty. */ - bool empty() const { return !_bitCount; } + bool empty() const { return _bitCount == 0; } /** * Resize this array to the specified number of bits. @@ -174,57 +139,61 @@ class MVKBitArray { * The value of existing bits that fit within the new size are retained, and any * new bits that are added to accommodate the new size are set to the given value. * - * If the new size is larger than the existing size, new memory may be allocated. - * If the new size is less than the existing size, consumed memory is retained - * unless the size is set to zero. + * If the new size is larger than the existing size, new memory is allocated. + * If the new size is less than the existing size, consumed memory is retained. */ void resize(size_t size, bool val = false) { + assert(size < SectionBitCount * std::numeric_limits::max()); // Limited by _partially/fullyDisabledSectionCount + if (size == _bitCount) { return; } size_t oldBitCnt = _bitCount; size_t oldSecCnt = getSectionCount(); - size_t oldEndBitCnt = oldSecCnt << SectionMaskSize; + size_t oldEndBitCnt = oldSecCnt * SectionBitCount; - // Some magic here. If we need only one section, _data is used as that section, - // and it will be stomped on if we reallocate, so we cache it here. - uint64_t* oldData = _data; - uint64_t* pOldData = oldSecCnt > 1 ? oldData : (uint64_t*)&oldData; - - _bitCount = size; + _bitCount = size; // After here, functions refer to new data characteristics. + // If the number of data sections is not growing, we retain the existing data memory, + // to avoid having to reallocate if this array is resized larger in the future. + // If the number of data sections is growing, we need to expand memory. size_t newSecCnt = getSectionCount(); - if (newSecCnt == 0) { - // Clear out the existing data - if (oldSecCnt > 1) { free(pOldData); } - _data = 0; - _clearedSectionCount = 0; - _lowestNeverClearedBitIndex = 0; - } else if (newSecCnt == oldSecCnt) { + if (newSecCnt == oldSecCnt) { + // The number of data sections is staying the same. // Keep the existing data, but fill any bits in the last section // that were beyond the old bit count with the new initial value. - for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); } + for (size_t bitIdx = oldBitCnt; bitIdx < _bitCount; bitIdx++) { setBit(bitIdx, val); } } else if (newSecCnt > oldSecCnt) { - size_t oldByteCnt = oldSecCnt * SectionByteCount; - size_t newByteCnt = newSecCnt * SectionByteCount; - - // If needed, allocate new memory. - if (newSecCnt > 1) { _data = (uint64_t*)malloc(newByteCnt); } - - // Fill the new memory with the new initial value, copy the old contents to - // the new memory, fill any bits in the old last section that were beyond - // the old bit count with the new initial value, and remove the old memory. - uint64_t* pNewData = getData(); - memset(pNewData, val ? ~0 : 0, newByteCnt); - memcpy(pNewData, pOldData, oldByteCnt); + // The number of data sections is growing. + // Reallocate new memory to keep the existing contents. + _data = (uint64_t*)realloc(_data, newSecCnt * SectionByteCount); + + // Fill any bits in the last section that were beyond the old bit count with the fill value. for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); } - if (oldSecCnt > 1) { free(pOldData); } - if (!val) { _lowestNeverClearedBitIndex = _bitCount; } // Cover additional sections - // If the entire old array and the new array are cleared, move the uncleared indicator to the new end. - if (_clearedSectionCount == oldSecCnt && !val) { _clearedSectionCount = newSecCnt; } + // Fill the additional sections with the fill value. + uint64_t* pExtraData = &_data[oldSecCnt]; + memset(pExtraData, val ? (uint8_t)FullyEnabledSectionMask : 0, (newSecCnt - oldSecCnt) * SectionByteCount); + + // If the additional sections have been cleared, extend the associated trackers. + if ( !val ) { + if (_partiallyDisabledSectionCount == oldSecCnt) { _partiallyDisabledSectionCount = (uint32_t)newSecCnt; } + if (_fullyDisabledSectionCount == oldSecCnt) { _fullyDisabledSectionCount = (uint32_t)newSecCnt; } + } + } else { + // The number of data sections is shrinking. + // Retain existing allocation, but ensure these values still fit. + _partiallyDisabledSectionCount = std::min(_partiallyDisabledSectionCount, (uint32_t)newSecCnt); + _fullyDisabledSectionCount = std::min(_fullyDisabledSectionCount, (uint32_t)newSecCnt); } - // If we shrank, ensure this value still fits - if (_lowestNeverClearedBitIndex > _bitCount) { _lowestNeverClearedBitIndex = _bitCount; } + } + + /** Resets back to zero size and frees all data. */ + void reset() { + free(_data); + _data = nullptr; + _bitCount = 0; + _partiallyDisabledSectionCount = 0; + _fullyDisabledSectionCount = 0; } /** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */ @@ -232,68 +201,49 @@ class MVKBitArray { MVKBitArray(const MVKBitArray& other) { resize(other._bitCount); - memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount); - _clearedSectionCount = other._clearedSectionCount; - _lowestNeverClearedBitIndex = other._lowestNeverClearedBitIndex; + memcpy(_data, other._data, getSectionCount() * SectionByteCount); } MVKBitArray& operator=(const MVKBitArray& other) { - resize(0); // Clear out the old memory resize(other._bitCount); - memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount); - _clearedSectionCount = other._clearedSectionCount; - _lowestNeverClearedBitIndex = other._lowestNeverClearedBitIndex; + memcpy(_data, other._data, getSectionCount() * SectionByteCount); return *this; } - ~MVKBitArray() { resize(0); } + ~MVKBitArray() { reset(); } protected: - // Returns a pointer do the data. - // Some magic here. If we need only one section, _data is used as that section. - uint64_t* getData() const { - return getSectionCount() > 1 ? _data : (uint64_t*)&_data; - } - - // Returns a reference to the section. - uint64_t& getSection(size_t secIdx) { - return getData()[secIdx]; - } - - // Returns the number of sections. - size_t getSectionCount() const { - return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0; - } - - // Returns the index of the section that contains the specified bit. - static size_t getIndexOfSection(size_t bitIndex) { - return bitIndex >> SectionMaskSize; - } + uint64_t& getSection(size_t secIdx) { return _data[secIdx]; } + size_t getSectionCount() const { return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0; } - // Converts the bit index to a local bit index within a section, and returns that local bit index. - static size_t getBitIndexInSection(size_t bitIndex) { - return bitIndex & SectionMask; - } + static size_t getIndexOfSection(size_t bitIndex) { return bitIndex / SectionBitCount; } + static uint8_t getBitIndexInSection(size_t bitIndex) { return bitIndex & (SectionBitCount - 1); } + static bool isFullyEnabled(uint64_t sectionData) { return sectionData == FullyEnabledSectionMask; } + static bool isFullyDisabled(uint64_t sectionData) { return sectionData == 0; } // Returns a section mask containing a single 1 value in the bit in the section that // corresponds to the specified global bit index, and 0 values in all other bits. - static uint64_t getSectionSetMask(size_t bitIndex) { + static uint64_t getBitPositionSectionMask(size_t bitIndex) { return (uint64_t)1U << ((SectionBitCount - 1) - getBitIndexInSection(bitIndex)); } - // Returns the local index of the first set bit in the section, starting from the highest order bit. - // Clears all bits ahead of the start bit so they will be ignored, then counts the number of zeros - // ahead of the set bit. If there are no set bits, returns the number of bits in a section. - static size_t getIndexOfFirstSetBitInSection(uint64_t section, size_t lclStartBitIndex) { - uint64_t lclStartMask = ~(uint64_t)0; + // Returns the local index of the first enabled bit in the section, starting from the highest order bit. + // Disables all bits ahead of the start bit so they will be ignored, then counts the number of zeros + // ahead of the set bit. If there are no enabled bits, returns the number of bits in a section. + static uint8_t getIndexOfFirstEnabledBitInSection(uint64_t section, uint8_t lclStartBitIndex) { + uint64_t lclStartMask = FullyEnabledSectionMask; lclStartMask >>= lclStartBitIndex; section &= lclStartMask; return section ? __builtin_clzll(section) : SectionBitCount; } + static constexpr size_t SectionBitCount = 64; + static constexpr size_t SectionByteCount = SectionBitCount / 8; + static constexpr uint64_t FullyEnabledSectionMask = ~static_cast(0); + uint64_t* _data = nullptr; size_t _bitCount = 0; - size_t _clearedSectionCount = 0; // Tracks where to start looking for bits that are set - size_t _lowestNeverClearedBitIndex = 0; // Tracks the lowest bit that has never been cleared + uint32_t _partiallyDisabledSectionCount = 0; // Tracks where to stop filling when enabling all bits + uint32_t _fullyDisabledSectionCount = 0; // Tracks where to start looking for enabled bits };