From 7a7acd0ac9eac1b737191e20033c89ce021c9da3 Mon Sep 17 00:00:00 2001 From: Rory Neithinger Date: Tue, 20 Aug 2024 11:26:55 -0700 Subject: [PATCH] M-06 minimize calls to storage for bytes/string --- stylus-sdk/src/storage/bytes.rs | 347 +++++++++++++++++++++----------- 1 file changed, 235 insertions(+), 112 deletions(-) diff --git a/stylus-sdk/src/storage/bytes.rs b/stylus-sdk/src/storage/bytes.rs index d8e5af1..8f9354c 100644 --- a/stylus-sdk/src/storage/bytes.rs +++ b/stylus-sdk/src/storage/bytes.rs @@ -7,8 +7,8 @@ use alloc::{ string::{String, ToString}, vec::Vec, }; -use alloy_primitives::{U256, U8}; -use core::cell::OnceCell; +use alloy_primitives::{B256, U256, U8}; +use core::{borrow::Borrow, cell::OnceCell}; /// Accessor for storage-backed bytes. pub struct StorageBytes { @@ -45,17 +45,7 @@ impl StorageBytes { /// Gets the number of bytes stored. pub fn len(&self) -> usize { - let word = Storage::get_word(self.root); - - // check if the data is short - let slot: &[u8] = word.as_ref(); - if slot[31] & 1 == 0 { - return (slot[31] / 2) as usize; - } - - let word: U256 = word.into(); - let len = word / U256::from(2); - len.try_into().unwrap() + BytesRoot::new(self).len() } /// Overwrites the collection's length, moving bytes as needed. @@ -65,121 +55,113 @@ impl StorageBytes { /// May populate the vector with junk bytes from prior dirty operations. /// Note that [`StorageBytes`] has unlimited capacity, so all lengths are valid. pub unsafe fn set_len(&mut self, len: usize) { - let old = self.len(); + let mut root = BytesRoot::new_mut(self); + let old = root.len(); - // if representation hasn't changed, just update the length if (old < 32) == (len < 32) { - return self.write_len(len); - } - - // if shrinking, pull data in - if (len < 32) && (old > 32) { - let word = Storage::get_word(*self.base()); - Storage::set_word(self.root, word); - return self.write_len(len); - } - - // if growing, push data out - let mut word = Storage::get_word(self.root); - word[31] = 0; // clear len byte - Storage::set_word(*self.base(), word); - self.write_len(len) - } - - /// Updates the length while being conscious of representation. - unsafe fn write_len(&mut self, len: usize) { - if len < 32 { - // place the len in the last byte of the root with the long bit low - Storage::set_uint(self.root, 31, U8::from(len * 2)); + // if representation hasn't changed, just update the length + root.write_len(len); + } else if (len < 32) && (old > 32) { + // if shrinking, pull data in + root.word = Storage::get_word(*root.storage.base()); + root.write_len(len); } else { - // place the len in the root with the long bit high - Storage::set_word(self.root, U256::from(len * 2 + 1).into()) + // if growing, push data out + root.word[31] = 0; // clear len byte + Storage::set_word(*root.storage.base(), root.word); + root.write_len(len); } } /// Adds a byte to the end. - pub fn push(&mut self, b: u8) { - let index = self.len(); - let value = U8::from(b); - - macro_rules! assign { - ($slot:expr) => { - unsafe { - Storage::set_uint($slot, index % 32, value); // pack value - self.write_len(index + 1); - } - }; - } + pub fn push(&mut self, value: u8) { + let mut root = BytesRoot::new_mut(self); + let index = root.len(); if index < 31 { - return assign!(self.root); - } - - // convert to multi-word representation - if index == 31 { + // still short representation after adding a byte + // add the byte and update length + root.word[index] = value; + unsafe { + root.write_len(index + 1); + } + } else if index == 31 { + // convert to multi-word representation // copy content over (len byte will be overwritten) - let word = Storage::get_word(self.root); - unsafe { Storage::set_word(*self.base(), word) }; + root.word[index] = value; + unsafe { + Storage::set_word(*root.storage.base(), root.word); + root.write_len(index + 1); + } + } else { + // already long representation + // add the new byte and update length + let slot = root.storage.base() + U256::from(index / 32); + unsafe { + Storage::set_uint(slot, index % 32, U8::from(value)); + root.write_len(index + 1); + } } - - let slot = self.base() + U256::from(index / 32); - assign!(slot); } /// Removes and returns the last byte, if it exists. /// As an optimization, underlying storage slots are only erased when all bytes in /// a given word are freed when in the multi-word representation. pub fn pop(&mut self) -> Option { - let len = self.len(); + let mut root = BytesRoot::new_mut(self); + let len = root.len(); + let index = len - 1; + if len == 0 { return None; } - let index = len - 1; - let clean = index % 32 == 0; - let byte = self.get(index)?; - - let clear = |slot| unsafe { Storage::clear_word(slot) }; - // convert to single-word representation if len == 32 { // copy content over - let word = Storage::get_word(*self.base()); - unsafe { Storage::set_word(self.root, word) }; - clear(*self.base()); + let base = *root.storage.base(); + root.word = Storage::get_word(base); + let byte = root.word[index]; + unsafe { + root.write_len(index); + Storage::clear_word(base); + } + return Some(byte); } + let byte = root.get(index)?; + let clean = index % 32 == 0; + // clear distant word if len > 32 && clean { - clear(self.index_slot(len - 1).0); + unsafe { + Storage::clear_word(root.index_slot(len - 1).0); + } } // clear the value if len < 32 { - unsafe { Storage::set_byte(self.root, index, 0) }; + root.word[index] = 0; } // set the new length - unsafe { self.write_len(index) }; + unsafe { root.write_len(index) }; Some(byte) } /// Gets the byte at the given index, if it exists. pub fn get(&self, index: impl TryInto) -> Option { - let index = index.try_into().ok()?; - if index >= self.len() { - return None; - } - unsafe { Some(self.get_unchecked(index)) } + BytesRoot::new(self).get(index) } /// Gets a mutable accessor to the byte at the given index, if it exists. pub fn get_mut(&mut self, index: impl TryInto) -> Option> { + let root = BytesRoot::new_mut(self); let index = index.try_into().ok()?; - if index >= self.len() { + if index >= root.len() { return None; } - let (slot, offset) = self.index_slot(index); + let (slot, offset) = root.index_slot(index); let value = unsafe { StorageB8::new(slot, offset) }; Some(StorageGuardMut::new(value)) } @@ -190,19 +172,32 @@ impl StorageBytes { /// /// UB if index is out of bounds. pub unsafe fn get_unchecked(&self, index: usize) -> u8 { - let (slot, offset) = self.index_slot(index); - unsafe { Storage::get_byte(slot, offset.into()) } + BytesRoot::new(self).get_unchecked(index) } /// Gets the full contents of the collection. pub fn get_bytes(&self) -> Vec { - let len = self.len(); + let root = BytesRoot::new(self); + let len = root.len(); let mut bytes = Vec::with_capacity(len); - // TODO: efficient extraction - for i in 0..len { - let byte = unsafe { self.get_unchecked(i) }; - bytes.push(byte); + // for short representation, use appropriate number of bytes from root + if len < 32 { + bytes.extend_from_slice(&root.word[..len]); + return bytes; + } + + // for long representation, read one word at a time from storage + for idx in (0..len).step_by(32) { + let (slot, _) = root.index_slot(idx); + let word = Storage::get_word(slot); + if idx + 32 <= len { + // entire word is part of the byte array + bytes.extend(word.0); + } else { + // for the last word, only get remaining bytes + bytes.extend(&word.0[..len - idx]); + }; } bytes } @@ -213,15 +208,6 @@ impl StorageBytes { self.extend(bytes.as_ref()); } - /// Determines the slot and offset for the element at an index. - fn index_slot(&self, index: usize) -> (U256, u8) { - let slot = match self.len() { - 33.. => self.base() + U256::from(index / 32), - _ => self.root, - }; - (slot, (index % 32) as u8) - } - /// Determines where in storage indices start. Could be made `const` in the future. fn base(&self) -> &U256 { self.base @@ -231,33 +217,173 @@ impl StorageBytes { impl Erase for StorageBytes { fn erase(&mut self) { - let mut len = self.len() as isize; + let root = BytesRoot::new(self); + let mut len = root.len() as isize; + // clear any slots used in long storage if len > 31 { while len > 0 { - let slot = self.index_slot(len as usize - 1).0; + let slot = root.index_slot(len as usize - 1).0; unsafe { Storage::clear_word(slot) }; len -= 32; } } - unsafe { Storage::clear_word(self.root) }; + // set length and data in root storage to zero + unsafe { Storage::clear_word(root.storage.root) }; } } -// TODO: efficient bulk insertion impl Extend for StorageBytes { fn extend>(&mut self, iter: T) { - for elem in iter { - self.push(elem); + let mut root = BytesRoot::new_mut(self); + let old_len = root.len(); + let mut chunk = Vec::with_capacity(32); + let mut iter = iter.into_iter(); + let mut len = old_len; + + // get the current tail to begin writing. unused if resulting data length is < 32 + let mut slot = if old_len < 32 { + *root.storage.base() + } else { + let (slot, _) = root.index_slot(old_len - 1); + slot + }; + + // we want to work with word-aligned chunks, fill in first chunk to get there + if old_len % 32 != 0 { + if old_len < 32 { + // short data, get from root word + chunk.extend_from_slice(&root.word[..old_len]); + } else { + // long data, get from last word + let (slot, offset) = root.index_slot(old_len - 1); + let stored_word = Storage::get_word(slot); + chunk.extend_from_slice(&stored_word[..offset as usize + 1]); + } + len -= chunk.len(); // this will be added back as an entire word + + while chunk.len() < 32 { + if let Some(byte) = iter.next() { + chunk.push(byte); + } else { + break; + } + } + + // if total data length < 32, store short represenatation + if chunk.len() < 32 { + root.word[old_len..chunk.len()].copy_from_slice(&chunk[old_len..]); + unsafe { + root.write_len(chunk.len()); + } + return; + } + + // write the word we just filled in + unsafe { + Storage::set_word(slot, B256::from_slice(&chunk)); + } + chunk.clear(); + len += 32; + slot += U256::from(1); + } + + // write to storage, a word at a time + for byte in iter { + chunk.push(byte); + if chunk.len() == 32 { + unsafe { + Storage::set_word(slot, B256::from_slice(&chunk)); + } + chunk.clear(); + len += 32; + slot += U256::from(1); + } + } + + if !chunk.is_empty() { + unsafe { + Storage::set_word(slot, B256::right_padding_from(&chunk)); + } + len += chunk.len(); + } + unsafe { + root.write_len(len); } } } -// TODO: efficient bulk insertion impl<'a> Extend<&'a u8> for StorageBytes { fn extend>(&mut self, iter: T) { - for elem in iter { - self.push(*elem); + self.extend(iter.into_iter().cloned()); + } +} + +struct BytesRoot { + storage: T, + word: B256, +} + +impl> BytesRoot { + fn new(storage: T) -> Self { + let word = Storage::get_word(storage.borrow().root); + Self { storage, word } + } + + fn len(&self) -> usize { + // check if the data is short + let slot: &[u8] = self.word.as_ref(); + if slot[31] & 1 == 0 { + return (slot[31] / 2) as usize; } + + let word: U256 = self.word.into(); + let len = word / U256::from(2); + len.try_into().unwrap() + } + + /// Gets the byte at the given index, if it exists. + pub fn get(&self, index: impl TryInto) -> Option { + let index = index.try_into().ok()?; + if index >= self.len() { + return None; + } + unsafe { Some(self.get_unchecked(index)) } + } + + /// Gets the byte at the given index, even if beyond the collection. + /// + /// # Safety + /// + /// UB if index is out of bounds. + pub unsafe fn get_unchecked(&self, index: usize) -> u8 { + let (slot, offset) = self.index_slot(index); + unsafe { Storage::get_byte(slot, offset.into()) } + } + + /// Determines the slot and offset for the element at an index. + fn index_slot(&self, index: usize) -> (U256, u8) { + let storage = self.storage.borrow(); + let slot = match self.len() { + 32.. => storage.base() + U256::from(index / 32), + _ => storage.root, + }; + (slot, (index % 32) as u8) + } +} + +impl<'a> BytesRoot<&'a mut StorageBytes> { + fn new_mut(storage: &'a mut StorageBytes) -> Self { + let word = Storage::get_word(storage.root); + Self { storage, word } + } + + unsafe fn write_len(&mut self, len: usize) { + if len < 32 { + self.word[31] = len as u8 * 2; + } else { + self.word = U256::from(len * 2 + 1).into(); + } + Storage::set_word(self.storage.root, self.word); } } @@ -308,9 +434,7 @@ impl StorageString { /// Overwrites the underlying [`String`], erasing what was previously stored. pub fn set_str(&mut self, text: impl AsRef) { self.erase(); - for c in text.as_ref().chars() { - self.push(c); - } + self.0.extend(text.as_ref().bytes()); } } @@ -322,8 +446,7 @@ impl Erase for StorageString { impl Extend for StorageString { fn extend>(&mut self, iter: T) { - for c in iter { - self.push(c); - } + let s = iter.into_iter().collect::(); + self.0.extend(s.bytes()); } }