/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FUD_STRING_HPP #define FUD_STRING_HPP #include "fud_allocator.hpp" #include "fud_assert.hpp" #include "fud_c_string.hpp" #include "fud_option.hpp" #include "fud_result.hpp" #include "fud_status.hpp" #include "fud_string_view.hpp" #include "fud_utf8.hpp" #include #include static_assert(CHAR_BIT == 8); /** @file */ namespace fud { struct DrainResult { size_t bytesWritten; FudStatus status; }; /* TODO: make SSO_BUF_LENGTH user configurable. */ /** \brief The maximum length of a string using the small string optimization * buffer. */ constexpr size_t SSO_BUF_LENGTH = 15; /** \brief The size of the small string optimization buffer, to include space * for the null terminator. */ constexpr size_t SSO_BUF_SIZE = SSO_BUF_LENGTH + 1; static constexpr size_t SsoBufSize = 23; class String; /** \brief A result containing a valid @String or the @FudStatus error * encountered during its creation. */ using StringResult = Result; /** \brief A null terminated, growable, array of utf8 code points, with a custom * allocator. */ class String { public: /** \brief Create a string from a C String. * * \param [in] cString a pointer to a C string to populate the String. * * \returns String on success. * \returns NullPointer if cString is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ static StringResult makeFromCString(const char* cString); /** \brief Create a string from a C String, specifying the allocator. * * \param [in] cString a pointer to a C string to populate the String. * \param [in] allocator the allocator the string will use. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ static StringResult makeFromCString(const char* cString, Allocator* allocator); /** \brief Create a string from concatenating multiple C Strings. * * This function uses the default globalFudAllocator. * * \tparam CStrings a parameter pack of types convertible to const char*. * \param [in] cStrings pointers to C strings to concatenate into a single String. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ template static StringResult makeFromCStrings(CStrings... cStrings) { return makeFromCStringsAlloc(&globalFudAllocator, cStrings...); } /** \brief Create a string from concatenating multiple C Strings. * * \tparam CStrings a parameter pack of types convertible to const char*. * \param [in] cStrings pointers to C strings to concatenate into a single String. * \param [in] allocator the allocator the string will use. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ template static StringResult makeFromCStringsAlloc(Allocator* allocator, CStrings... cStrings) { if (allocator == nullptr) { return StringResult::error(FudStatus::NullPointer); } if (!String::allocatorValid(allocator)) { return StringResult::error(FudStatus::ArgumentInvalid); } size_t totalLength = 0; Array lengths{}; Array strPointers{}; size_t index = 0; for (const auto* cStringItem : {cStrings...}) { const char* cString = nullptr; if constexpr (std::is_same_v) { cString = cStringItem; } else if constexpr (std::is_same_v) { cString = reinterpret_cast(cStringItem); } else { static_assert(!std::is_same_v); } strPointers[index] = cString; auto lengthResult = cStringLength(cString); if (lengthResult < 0 || lengthResult >= std::numeric_limits::max()) { return StringResult::error(FudStatus::ArgumentInvalid); } auto stringLength = static_cast(lengthResult); if (maxStringLength - totalLength < stringLength) { return StringResult::error(FudStatus::Failure); } totalLength += stringLength; lengths[index] = stringLength; index++; } fudAssert(totalLength < maxStringLength); String output{}; output.m_allocator = allocator; utf8* data{nullptr}; size_t capacity = totalLength + 1; bool isLarge = capacity > SsoBufSize; if (isLarge) { output.m_repr.large.capacity = capacity & largeStringCapacitymask; output.m_repr.large.length = totalLength; auto dataResult = output.allocator()->allocate(output.m_repr.large.capacity); if (dataResult.isError()) { return StringResult::error(dataResult.getError()); } output.m_repr.large.data = static_cast(dataResult.getOkay()); output.m_repr.large.isLarge = 1; data = output.m_repr.large.data; } else { capacity = SsoBufSize; static_assert(SsoBufSize < std::numeric_limits::max()); output.m_repr.small.isLarge = 0; output.m_repr.small.length = static_cast(totalLength) & smallStringLengthMask; data = output.m_repr.small.buffer.data(); } fudAssert(data != nullptr); size_t cumulativeLength = 0; for (size_t idx = 0; idx < strPointers.size(); ++idx) { const auto* cString = strPointers[idx]; auto copyStatus = copyMem(data + cumulativeLength, capacity - cumulativeLength, cString, lengths[idx]); fudAssert(copyStatus == FudStatus::Success); cumulativeLength += lengths[idx]; } data[cumulativeLength] = '\0'; return StringResult::okay(std::move(output)); } /** \brief Default constructs a small string of zero length using the global * fud allocator. */ String() noexcept = default; String(const String& rhs) = delete; String(String&& rhs) noexcept; ~String(); String& operator=(const String& rhs) = delete; String& operator=(String&& rhs) noexcept; static StringResult from(const String& rhs); static StringResult from(StringView view, Allocator* allocator = &globalFudAllocator); FudStatus copy(const String& rhs); /** \brief The raw length of the string's data, excluding the null terminator. */ [[nodiscard]] size_t length() const { if (isLarge()) { return m_repr.large.length; } return m_repr.small.length; } /** \brief Indicates if no characters are present in the string's data. */ [[nodiscard]] bool empty() const { return length() == 0; } /** \brief The total size of the string's data, including the null terminator. */ [[nodiscard]] size_t size() const { return length() + 1; } /** \brief The current capacity of the string, excluding the reserved slot * for the null terminator. */ [[nodiscard]] size_t capacity() const { if (isLarge()) { return m_repr.large.capacity - 1U; } return SsoBufSize - 1U; } /** \brief The underlying data, guaranteed to have c string representation. */ [[nodiscard]] const utf8* data() const { return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } /** \brief The underlying data as an explicit c string. */ [[nodiscard]] inline const char* c_str() const { return reinterpret_cast(data()); } [[nodiscard]] bool utf8Valid() const; FudStatus reserve(size_t newCapacity); [[nodiscard]] Option back(); [[nodiscard]] size_t remainingLength() const { if (length() > capacity()) { return 0; } return capacity() - length(); } [[nodiscard]] inline StringView asView() const { return StringView(*this); } FudStatus pushBack(char letter); FudStatus pushBack(utf8 letter); FudStatus pushBack(const FudUtf8& letter); Option pop(); FudStatus append(const char* source); FudStatus append(const String& source); FudStatus append(StringView source); DrainResult drain(const char* source); DrainResult drain(const String& source); DrainResult drain(StringView source); [[nodiscard]] StringResult catenate(const String& rhs) const; [[nodiscard]] StringResult catenate(const char* rhs) const; [[nodiscard]] bool compare(const String& rhs) const; FudStatus clear(); const utf8* begin() const; const utf8* end() const; private: static constexpr size_t maxStringLength = (static_cast(1) << 63) - 1; static constexpr size_t largeStringCapacitymask = (static_cast(1) << 63) - 1; static constexpr uint8_t maxSmallStringLength = SsoBufSize; static constexpr uint8_t smallStringLengthMask = 0x7F; [[nodiscard]] static bool allocatorValid(Allocator* allocator) { return allocator != nullptr; } Allocator* allocator() const { return m_allocator; } [[nodiscard]] bool nullTerminated() const; [[nodiscard]] bool valid() const; FudStatus nullTerminate(); /** \brief The underlying data, guaranteed to have c string * representation. */ [[nodiscard]] utf8* dataMut() { return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } void cleanup(); FudStatus resize(size_t newCapacity); /** \brief The allocator used to get storage for characters when the string * is large. */ Allocator* m_allocator{&globalFudAllocator}; using BufType = Array; union { struct { uint8_t isLarge : 1; size_t capacity : 63; size_t length; utf8* data; } large; struct { uint8_t isLarge : 1 = 0; uint8_t length : 7 = 0; BufType buffer{}; } small{}; } m_repr{}; /** \brief Whether or not the string must use its allocator for storage. */ [[nodiscard]] bool isLarge() const { struct { uint8_t isLarge : 1; uint8_t length : 7; } determinant; copyMem<1>(determinant, m_repr); return determinant.isLarge; } [[nodiscard]] size_t smallLength() const { struct { uint8_t isLarge : 1; uint8_t length : 7; } determinant; copyMem<1>(determinant, m_repr); return determinant.isLarge; } void addToLength(size_t augend) { if (isLarge()) { fudAssert(m_repr.large.length + augend < maxStringLength); m_repr.large.length += augend; } else { fudAssert(m_repr.small.length + augend < maxSmallStringLength); m_repr.small.length = static_cast((m_repr.small.length + augend)) & smallStringLengthMask; } } void setLength(size_t newLength) { if (isLarge()) { fudAssert(newLength < maxStringLength); m_repr.large.length = newLength; } else { fudAssert(newLength < maxSmallStringLength); m_repr.small.length = static_cast(newLength) & smallStringLengthMask; } } }; } // namespace fud #endif