From f281050ddb3b9d658cff67a254eedc3b79de5c5d Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Tue, 29 Oct 2024 10:28:11 -0500 Subject: Reduce string size, adopt clang-style SSO. --- include/fud_allocator.hpp | 5 +- include/fud_result.hpp | 4 +- include/fud_string.hpp | 175 ++++++++++++++++++++++++++++++---------------- 3 files changed, 120 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/fud_allocator.hpp b/include/fud_allocator.hpp index 693111b..d4feccf 100644 --- a/include/fud_allocator.hpp +++ b/include/fud_allocator.hpp @@ -26,7 +26,7 @@ namespace fud { -class alignas(size_t) Allocator { +class alignas(std::max_align_t) Allocator { public: virtual ~Allocator() = default; @@ -37,7 +37,8 @@ class alignas(size_t) Allocator { virtual bool isEqual(const Allocator& rhs) const = 0; }; -constexpr bool operator==(const Allocator& lhs, const Allocator& rhs) { +constexpr bool operator==(const Allocator& lhs, const Allocator& rhs) +{ return &lhs == &rhs; } diff --git a/include/fud_result.hpp b/include/fud_result.hpp index 95f3e5c..497b007 100644 --- a/include/fud_result.hpp +++ b/include/fud_result.hpp @@ -18,8 +18,8 @@ #ifndef FUD_RESULT_HPP #define FUD_RESULT_HPP -#include #include +#include namespace fud { @@ -159,7 +159,7 @@ class [[nodiscard]] Result { std::variant m_value; }; -#define M_TakeOrReturn(HYGIENE_EXPRESSION) \ +#define M_TakeOrReturn(HYGIENE_EXPRESSION) \ ({ \ auto HYGIENE_RESULT{(HYGIENE_EXPRESSION)}; \ if (HYGIENE_RESULT.isError()) { \ diff --git a/include/fud_string.hpp b/include/fud_string.hpp index 55b1e86..0020c67 100644 --- a/include/fud_string.hpp +++ b/include/fud_string.hpp @@ -41,6 +41,8 @@ struct DrainResult { FudStatus status; }; +/* TODO: make SSO_BUF_LENGTH user configurable. */ + /** \brief The maximum length of a string using the small string optimization * buffer. */ constexpr size_t SSO_BUF_LENGTH = 15; @@ -49,6 +51,8 @@ constexpr size_t SSO_BUF_LENGTH = 15; * for the null terminator. */ constexpr size_t SSO_BUF_SIZE = SSO_BUF_LENGTH + 1; +static constexpr size_t SsoBufSize = 23; + class String; /** \brief A result containing a valid @String or the @FudStatus error @@ -130,6 +134,7 @@ class String { Array lengths{}; Array strPointers{}; size_t index = 0; + for (const auto* cStringItem : {cStrings...}) { const char* cString = nullptr; if constexpr (std::is_same_v) { @@ -142,11 +147,12 @@ class String { strPointers[index] = cString; auto lengthResult = cStringLength(cString); - if (lengthResult < 0 || lengthResult >= SSIZE_MAX) { + + if (lengthResult < 0 || lengthResult >= std::numeric_limits::max()) { return StringResult::error(FudStatus::ArgumentInvalid); } auto stringLength = static_cast(lengthResult); - if (SIZE_MAX - totalLength < stringLength) { + if (maxStringLength - totalLength < stringLength) { return StringResult::error(FudStatus::Failure); } totalLength += stringLength; @@ -154,38 +160,48 @@ class String { index++; } + fudAssert(totalLength < maxStringLength); + String output{}; - output.m_length = totalLength; output.m_allocator = allocator; - if (output.m_length >= output.m_capacity) { - output.m_capacity = output.m_length + 1; - /* Avoid using compiler expansions in headers */ - auto dataResult = output.allocator()->allocate(output.m_capacity); + utf8* data{nullptr}; + size_t capacity = totalLength + 1; + bool isLarge = capacity > SsoBufSize; + if (isLarge) { + output.m_repr.large.capacity = capacity & largeStringCapacitymask; + output.m_repr.large.length = totalLength; + auto dataResult = output.allocator()->allocate(output.m_repr.large.capacity); if (dataResult.isError()) { return StringResult::error(dataResult.getError()); } - output.m_data = static_cast(dataResult.getOkay()); + output.m_repr.large.data = static_cast(dataResult.getOkay()); + output.m_repr.large.isLarge = 1; + data = output.m_repr.large.data; + } else { + capacity = SsoBufSize; + static_assert(SsoBufSize < std::numeric_limits::max()); + output.m_repr.small.isLarge = 0; + output.m_repr.small.length = static_cast(totalLength) & smallStringLengthMask; + data = output.m_repr.small.buffer.data(); } - auto* data = output.dataMut(); + fudAssert(data != nullptr); + size_t cumulativeLength = 0; for (size_t idx = 0; idx < strPointers.size(); ++idx) { const auto* cString = strPointers[idx]; - auto copyStatus = copyMem( - data + cumulativeLength, - output.m_capacity - cumulativeLength, - cString, - lengths[idx]); + auto copyStatus = copyMem(data + cumulativeLength, capacity - cumulativeLength, cString, lengths[idx]); fudAssert(copyStatus == FudStatus::Success); cumulativeLength += lengths[idx]; } - auto terminateStatus = output.nullTerminate(); - fudAssert(terminateStatus == FudStatus::Success); + data[cumulativeLength] = '\0'; return StringResult::okay(std::move(output)); } + /** \brief Default constructs a small string of zero length using the global + * fud allocator. */ String() noexcept = default; String(const String& rhs) = delete; @@ -205,35 +221,40 @@ class String { FudStatus copy(const String& rhs); /** \brief The raw length of the string's data, excluding the null terminator. */ - [[nodiscard]] constexpr size_t length() const + [[nodiscard]] size_t length() const { - return m_length; + if (isLarge()) { + return m_repr.large.length; + } + return m_repr.small.length; } /** \brief Indicates if no characters are present in the string's data. */ - [[nodiscard]] constexpr bool empty() const + [[nodiscard]] bool empty() const { - return m_length == 0; + return length() == 0; } /** \brief The total size of the string's data, including the null terminator. */ - [[nodiscard]] constexpr size_t size() const + [[nodiscard]] size_t size() const { - return m_length + 1; + return length() + 1; } /** \brief The current capacity of the string, excluding the reserved slot * for the null terminator. */ - [[nodiscard]] constexpr size_t capacity() const + [[nodiscard]] size_t capacity() const { - fudAssert(m_capacity > 0); - return m_capacity - 1; + if (isLarge()) { + return m_repr.large.capacity - 1U; + } + return SsoBufSize - 1U; } /** \brief The underlying data, guaranteed to have c string representation. */ - [[nodiscard]] constexpr const utf8* data() const + [[nodiscard]] const utf8* data() const { - return isLarge() ? m_data : m_buffer.data(); + return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } /** \brief The underlying data as an explicit c string. */ @@ -248,13 +269,13 @@ class String { [[nodiscard]] Option back(); - [[nodiscard]] constexpr size_t remainingLength() const + [[nodiscard]] size_t remainingLength() const { - if (m_length >= m_capacity) { + if (length() > capacity()) { return 0; } - return m_capacity - 1U - m_length; + return capacity() - length(); } [[nodiscard]] inline StringView asView() const @@ -295,11 +316,19 @@ class String { const utf8* end() const; private: - static constexpr uint8_t capacityMask = 0x07; + static constexpr size_t maxStringLength = (static_cast(1) << 63) - 1; + static constexpr size_t largeStringCapacitymask = (static_cast(1) << 63) - 1; + static constexpr uint8_t maxSmallStringLength = SsoBufSize; + static constexpr uint8_t smallStringLengthMask = 0x7F; [[nodiscard]] static bool allocatorValid(Allocator* allocator) { - return ((reinterpret_cast(allocator) & capacityMask) == 0); + return allocator != nullptr; + } + + Allocator* allocator() const + { + return m_allocator; } [[nodiscard]] bool nullTerminated() const; @@ -308,52 +337,78 @@ class String { FudStatus nullTerminate(); - Allocator* allocator() const - { - constexpr uintptr_t ALLOCATOR_MASK = ~static_cast(capacityMask); - const auto allocptr = reinterpret_cast(m_allocator); - return reinterpret_cast(allocptr & ALLOCATOR_MASK); - } - /** \brief The underlying data, guaranteed to have c string * representation. */ - [[nodiscard]] constexpr utf8* dataMut() + [[nodiscard]] utf8* dataMut() { - return isLarge() ? m_data : m_buffer.data(); + return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } void cleanup(); FudStatus resize(size_t newCapacity); - void setLength(size_t newLength) - { - m_length = newLength; - } - /** \brief The allocator used to get storage for characters when the string * is large. */ Allocator* m_allocator{&globalFudAllocator}; - using BufType = Array; + using BufType = Array; union { - /** \brief The storage for string characters when using SSO. */ - BufType m_buffer{BufType::constFill(0)}; - /** \brief The storage for string characters when the string is - * large. */ - utf8* m_data; - }; + struct { + uint8_t isLarge : 1; + size_t capacity : 63; + size_t length; + utf8* data; + } large; + struct { + uint8_t isLarge : 1 = 0; + uint8_t length : 7 = 0; + BufType buffer{}; + } small{}; + } m_repr{}; + + /** \brief Whether or not the string must use its allocator for storage. */ + [[nodiscard]] bool isLarge() const + { + struct { + uint8_t isLarge : 1; + uint8_t length : 7; + } determinant; + copyMem<1>(determinant, m_repr); + return determinant.isLarge; + } - /** \brief The length of the string excluding the null terminator. */ - size_t m_length{0}; + [[nodiscard]] size_t smallLength() const + { + struct { + uint8_t isLarge : 1; + uint8_t length : 7; + } determinant; + copyMem<1>(determinant, m_repr); + return determinant.isLarge; + } - /** \brief The capacity of the string, including the null terminator. */ - size_t m_capacity{SSO_BUF_SIZE}; + void addToLength(size_t augend) + { + if (isLarge()) { + fudAssert(m_repr.large.length + augend < maxStringLength); + m_repr.large.length += augend; + } else { + fudAssert(m_repr.small.length + augend < maxSmallStringLength); + m_repr.small.length = static_cast((m_repr.small.length + augend)) & + smallStringLengthMask; + } + } - /** \brief Whether or not the string must use its allocator for storage. */ - [[nodiscard]] constexpr bool isLarge() const + void setLength(size_t newLength) { - return m_capacity > SSO_BUF_SIZE; + if (isLarge()) { + fudAssert(newLength < maxStringLength); + m_repr.large.length = newLength; + } else { + fudAssert(newLength < maxSmallStringLength); + m_repr.small.length = static_cast(newLength) & smallStringLengthMask; + } } }; -- cgit v1.2.3