diff options
author | Dominick Allen <djallen@librehumanitas.org> | 2024-10-29 10:28:11 -0500 |
---|---|---|
committer | Dominick Allen <djallen@librehumanitas.org> | 2024-10-29 10:28:11 -0500 |
commit | f281050ddb3b9d658cff67a254eedc3b79de5c5d (patch) | |
tree | 62c8673ca990a1df6169d08435924a69cc0b24b2 /include/fud_string.hpp | |
parent | 24cd7c8896b2091114e89ffda06b5c63eb2827c7 (diff) |
Reduce string size, adopt clang-style SSO.
Diffstat (limited to 'include/fud_string.hpp')
-rw-r--r-- | include/fud_string.hpp | 175 |
1 files changed, 115 insertions, 60 deletions
diff --git a/include/fud_string.hpp b/include/fud_string.hpp index 55b1e86..0020c67 100644 --- a/include/fud_string.hpp +++ b/include/fud_string.hpp @@ -41,6 +41,8 @@ struct DrainResult { FudStatus status; }; +/* TODO: make SSO_BUF_LENGTH user configurable. */ + /** \brief The maximum length of a string using the small string optimization * buffer. */ constexpr size_t SSO_BUF_LENGTH = 15; @@ -49,6 +51,8 @@ constexpr size_t SSO_BUF_LENGTH = 15; * for the null terminator. */ constexpr size_t SSO_BUF_SIZE = SSO_BUF_LENGTH + 1; +static constexpr size_t SsoBufSize = 23; + class String; /** \brief A result containing a valid @String or the @FudStatus error @@ -130,6 +134,7 @@ class String { Array<size_t, sizeof...(cStrings)> lengths{}; Array<const char*, sizeof...(cStrings)> strPointers{}; size_t index = 0; + for (const auto* cStringItem : {cStrings...}) { const char* cString = nullptr; if constexpr (std::is_same_v<decltype(cStringItem), const char*>) { @@ -142,11 +147,12 @@ class String { strPointers[index] = cString; auto lengthResult = cStringLength(cString); - if (lengthResult < 0 || lengthResult >= SSIZE_MAX) { + + if (lengthResult < 0 || lengthResult >= std::numeric_limits<ssize_t>::max()) { return StringResult::error(FudStatus::ArgumentInvalid); } auto stringLength = static_cast<size_t>(lengthResult); - if (SIZE_MAX - totalLength < stringLength) { + if (maxStringLength - totalLength < stringLength) { return StringResult::error(FudStatus::Failure); } totalLength += stringLength; @@ -154,38 +160,48 @@ class String { index++; } + fudAssert(totalLength < maxStringLength); + String output{}; - output.m_length = totalLength; output.m_allocator = allocator; - if (output.m_length >= output.m_capacity) { - output.m_capacity = output.m_length + 1; - /* Avoid using compiler expansions in headers */ - auto dataResult = output.allocator()->allocate(output.m_capacity); + utf8* data{nullptr}; + size_t capacity = totalLength + 1; + bool isLarge = capacity > SsoBufSize; + if (isLarge) { + output.m_repr.large.capacity = capacity & largeStringCapacitymask; + output.m_repr.large.length = totalLength; + auto dataResult = output.allocator()->allocate(output.m_repr.large.capacity); if (dataResult.isError()) { return StringResult::error(dataResult.getError()); } - output.m_data = static_cast<utf8*>(dataResult.getOkay()); + output.m_repr.large.data = static_cast<utf8*>(dataResult.getOkay()); + output.m_repr.large.isLarge = 1; + data = output.m_repr.large.data; + } else { + capacity = SsoBufSize; + static_assert(SsoBufSize < std::numeric_limits<int8_t>::max()); + output.m_repr.small.isLarge = 0; + output.m_repr.small.length = static_cast<uint8_t>(totalLength) & smallStringLengthMask; + data = output.m_repr.small.buffer.data(); } - auto* data = output.dataMut(); + fudAssert(data != nullptr); + size_t cumulativeLength = 0; for (size_t idx = 0; idx < strPointers.size(); ++idx) { const auto* cString = strPointers[idx]; - auto copyStatus = copyMem( - data + cumulativeLength, - output.m_capacity - cumulativeLength, - cString, - lengths[idx]); + auto copyStatus = copyMem(data + cumulativeLength, capacity - cumulativeLength, cString, lengths[idx]); fudAssert(copyStatus == FudStatus::Success); cumulativeLength += lengths[idx]; } - auto terminateStatus = output.nullTerminate(); - fudAssert(terminateStatus == FudStatus::Success); + data[cumulativeLength] = '\0'; return StringResult::okay(std::move(output)); } + /** \brief Default constructs a small string of zero length using the global + * fud allocator. */ String() noexcept = default; String(const String& rhs) = delete; @@ -205,35 +221,40 @@ class String { FudStatus copy(const String& rhs); /** \brief The raw length of the string's data, excluding the null terminator. */ - [[nodiscard]] constexpr size_t length() const + [[nodiscard]] size_t length() const { - return m_length; + if (isLarge()) { + return m_repr.large.length; + } + return m_repr.small.length; } /** \brief Indicates if no characters are present in the string's data. */ - [[nodiscard]] constexpr bool empty() const + [[nodiscard]] bool empty() const { - return m_length == 0; + return length() == 0; } /** \brief The total size of the string's data, including the null terminator. */ - [[nodiscard]] constexpr size_t size() const + [[nodiscard]] size_t size() const { - return m_length + 1; + return length() + 1; } /** \brief The current capacity of the string, excluding the reserved slot * for the null terminator. */ - [[nodiscard]] constexpr size_t capacity() const + [[nodiscard]] size_t capacity() const { - fudAssert(m_capacity > 0); - return m_capacity - 1; + if (isLarge()) { + return m_repr.large.capacity - 1U; + } + return SsoBufSize - 1U; } /** \brief The underlying data, guaranteed to have c string representation. */ - [[nodiscard]] constexpr const utf8* data() const + [[nodiscard]] const utf8* data() const { - return isLarge() ? m_data : m_buffer.data(); + return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } /** \brief The underlying data as an explicit c string. */ @@ -248,13 +269,13 @@ class String { [[nodiscard]] Option<utf8> back(); - [[nodiscard]] constexpr size_t remainingLength() const + [[nodiscard]] size_t remainingLength() const { - if (m_length >= m_capacity) { + if (length() > capacity()) { return 0; } - return m_capacity - 1U - m_length; + return capacity() - length(); } [[nodiscard]] inline StringView asView() const @@ -295,11 +316,19 @@ class String { const utf8* end() const; private: - static constexpr uint8_t capacityMask = 0x07; + static constexpr size_t maxStringLength = (static_cast<size_t>(1) << 63) - 1; + static constexpr size_t largeStringCapacitymask = (static_cast<size_t>(1) << 63) - 1; + static constexpr uint8_t maxSmallStringLength = SsoBufSize; + static constexpr uint8_t smallStringLengthMask = 0x7F; [[nodiscard]] static bool allocatorValid(Allocator* allocator) { - return ((reinterpret_cast<uintptr_t>(allocator) & capacityMask) == 0); + return allocator != nullptr; + } + + Allocator* allocator() const + { + return m_allocator; } [[nodiscard]] bool nullTerminated() const; @@ -308,52 +337,78 @@ class String { FudStatus nullTerminate(); - Allocator* allocator() const - { - constexpr uintptr_t ALLOCATOR_MASK = ~static_cast<uintptr_t>(capacityMask); - const auto allocptr = reinterpret_cast<uintptr_t>(m_allocator); - return reinterpret_cast<Allocator*>(allocptr & ALLOCATOR_MASK); - } - /** \brief The underlying data, guaranteed to have c string * representation. */ - [[nodiscard]] constexpr utf8* dataMut() + [[nodiscard]] utf8* dataMut() { - return isLarge() ? m_data : m_buffer.data(); + return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } void cleanup(); FudStatus resize(size_t newCapacity); - void setLength(size_t newLength) - { - m_length = newLength; - } - /** \brief The allocator used to get storage for characters when the string * is large. */ Allocator* m_allocator{&globalFudAllocator}; - using BufType = Array<utf8, SSO_BUF_SIZE>; + using BufType = Array<utf8, SsoBufSize>; union { - /** \brief The storage for string characters when using SSO. */ - BufType m_buffer{BufType::constFill(0)}; - /** \brief The storage for string characters when the string is - * large. */ - utf8* m_data; - }; + struct { + uint8_t isLarge : 1; + size_t capacity : 63; + size_t length; + utf8* data; + } large; + struct { + uint8_t isLarge : 1 = 0; + uint8_t length : 7 = 0; + BufType buffer{}; + } small{}; + } m_repr{}; + + /** \brief Whether or not the string must use its allocator for storage. */ + [[nodiscard]] bool isLarge() const + { + struct { + uint8_t isLarge : 1; + uint8_t length : 7; + } determinant; + copyMem<1>(determinant, m_repr); + return determinant.isLarge; + } - /** \brief The length of the string excluding the null terminator. */ - size_t m_length{0}; + [[nodiscard]] size_t smallLength() const + { + struct { + uint8_t isLarge : 1; + uint8_t length : 7; + } determinant; + copyMem<1>(determinant, m_repr); + return determinant.isLarge; + } - /** \brief The capacity of the string, including the null terminator. */ - size_t m_capacity{SSO_BUF_SIZE}; + void addToLength(size_t augend) + { + if (isLarge()) { + fudAssert(m_repr.large.length + augend < maxStringLength); + m_repr.large.length += augend; + } else { + fudAssert(m_repr.small.length + augend < maxSmallStringLength); + m_repr.small.length = static_cast<decltype(m_repr.small.length)>((m_repr.small.length + augend)) & + smallStringLengthMask; + } + } - /** \brief Whether or not the string must use its allocator for storage. */ - [[nodiscard]] constexpr bool isLarge() const + void setLength(size_t newLength) { - return m_capacity > SSO_BUF_SIZE; + if (isLarge()) { + fudAssert(newLength < maxStringLength); + m_repr.large.length = newLength; + } else { + fudAssert(newLength < maxSmallStringLength); + m_repr.small.length = static_cast<uint8_t>(newLength) & smallStringLengthMask; + } } }; |