From 0f6ea4612af53b455ee8969761b3606561932234 Mon Sep 17 00:00:00 2001 From: Seonghyun Kim Date: Thu, 25 Jul 2024 11:18:32 +0900 Subject: [PATCH] Fix memory error on Yarr If we want to store WTF::String with Vector or HashSet, we would use another type of allocator Signed-off-by: Seonghyun Kim --- third_party/yarr/HashSet.h | 33 ++++++ third_party/yarr/Vector.h | 185 +++++++++++++++++++++++++++++++++- third_party/yarr/YarrParser.h | 6 +- 3 files changed, 219 insertions(+), 5 deletions(-) diff --git a/third_party/yarr/HashSet.h b/third_party/yarr/HashSet.h index 95d304de0..bcc2cd4fe 100644 --- a/third_party/yarr/HashSet.h +++ b/third_party/yarr/HashSet.h @@ -57,6 +57,39 @@ class HashSet : public std::unordered_set, std::equal_to> +class GCHashSet : public Escargot::HashSet, std::equal_to, Allocator> { +public: + struct AddResult { + bool isNewEntry; + }; + AddResult add(const Key& k) + { + AddResult r; + r.isNewEntry = Escargot::HashSet, std::equal_to, Allocator>::insert(k).second; + return r; + } + + template + void formUnion(const Other& other) + { + for (const auto& value: other) { + add(value); + } + } + + bool contains(const Key& k) + { + return Escargot::HashSet, std::equal_to, Allocator>::find(k) != Escargot::HashSet, std::equal_to, Allocator>::end(); + } + + bool isEmpty() + { + return Escargot::HashSet, std::equal_to, Allocator>::empty(); + } +}; + } // namespace WTF using WTF::HashSet; +using WTF::GCHashSet; diff --git a/third_party/yarr/Vector.h b/third_party/yarr/Vector.h index cd0230fde..4e96f82f0 100644 --- a/third_party/yarr/Vector.h +++ b/third_party/yarr/Vector.h @@ -30,9 +30,7 @@ class Vector { public: typedef typename std::vector::iterator iterator; typedef typename std::vector::const_iterator const_iterator; - std::vector impl; -public: Vector() {} Vector(const Vector& v) { @@ -208,8 +206,191 @@ class Vector { std::fill(begin(), end(), val); } +private: + std::vector impl; +}; + +template +class GCVector { +public: + using iterator = T*; + using const_iterator = const T*; + + GCVector() {} + GCVector(const GCVector& v) + { + append(v); + } + + GCVector(const T* v, size_t len) + { + impl.reserve(len); + for (size_t i = 0; i < len; i ++) { + impl.push_back(v[i]); + } + } + + GCVector(std::initializer_list list) + { + impl.reserve(list.size()); + for (auto& i : list) { + impl.push_back(i); + } + } + + size_t size() const + { + return impl.size(); + } + + T& operator[](size_t i) + { + return impl[i]; + } + + const T& operator[](size_t i) const + { + return impl[i]; + } + + T& at(size_t i) + { + return impl[i]; + } + + T* data() + { + return impl.data(); + } + + iterator begin() + { + return impl.begin(); + } + + iterator end() + { + return impl.end(); + } + + const_iterator begin() const + { + return impl.begin(); + } + + const_iterator end() const + { + return impl.end(); + } + + T& last() + { + return impl.back(); + } + + bool isEmpty() const + { + return impl.empty(); + } + + template + void append(const U& u) + { + impl.push_back(static_cast(u)); + } + + void append(T&& u) + { + impl.push_back(std::move(u)); + } + + template + void append(const Vector& v) + { + impl.insert(impl.end(), v.impl.begin(), v.impl.end()); + } + + void insert(size_t i, const T& t) + { + impl.insert(impl.begin() + i, t); + } + + void remove(size_t i) + { + impl.erase(impl.begin() + i); + } + + void removeLast() + { + impl.pop_back(); + } + + void clear() + { + impl.clear(); + } + + void grow(size_t s) + { + impl.resize(s); + } + + void shrink(size_t newLength) + { + ASSERT(newLength <= impl.size()); + while (impl.size() != newLength) { + impl.pop_back(); + } + } + + void shrinkToFit() + { + impl.shrink_to_fit(); + } + + size_t capacity() const + { + return impl.capacity(); + } + + void reserveInitialCapacity(size_t siz) + { + impl.reserve(siz); + } + + void deleteAllValues() + { + clear(); + } + + void reserve(size_t capacity) + { + impl.reserve(capacity); + } + + T takeLast() + { + T last(*impl.rbegin()); + impl.pop_back(); + return last; + } + + void fill(const T& val, size_t newSize) + { + if (size() > newSize) + shrink(newSize); + else if (newSize > capacity()) { + clear(); + grow(newSize); + } + std::fill(begin(), end(), val); + } + +private: + Escargot::Vector> impl; }; } // namespace WTF using WTF::Vector; +using WTF::GCVector; diff --git a/third_party/yarr/YarrParser.h b/third_party/yarr/YarrParser.h index 66753d8bb..ffeb0d682 100644 --- a/third_party/yarr/YarrParser.h +++ b/third_party/yarr/YarrParser.h @@ -62,7 +62,7 @@ class Parser { }; class NamedCaptureGroups { - typedef HashSet GroupNameHashSet; + typedef GCHashSet GroupNameHashSet; public: NamedCaptureGroups() @@ -117,7 +117,7 @@ class Parser { private: GroupNameHashSet m_captureGroupNames; - Vector m_activeCaptureGroupNames; + GCVector m_activeCaptureGroupNames; }; /* @@ -2076,7 +2076,7 @@ class Parser { bool m_kIdentityEscapeSeen { false }; Vector m_parenthesesStack; NamedCaptureGroups m_namedCaptureGroups; - HashSet m_forwardReferenceNames; + GCHashSet m_forwardReferenceNames; // Derived by empirical testing of compile time in PCRE and WREC. static constexpr unsigned MAX_PATTERN_SIZE = 1024 * 1024;