/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FUD_STRING_HPP #define FUD_STRING_HPP #include "fud_allocator.hpp" #include "fud_assert.hpp" #include "fud_c_string.hpp" #include "fud_drain.hpp" #include "fud_option.hpp" #include "fud_result.hpp" #include "fud_status.hpp" #include "fud_string_view.hpp" #include "fud_utf8.hpp" #include #include static_assert(CHAR_BIT == 8); /** @file */ namespace fud { constexpr size_t SsoBufSize = 23; class String; /** \brief A result containing a valid @String or the @FudStatus error * encountered during its creation. */ using StringResult = Result; /** \brief A null terminated, growable, array of utf8 code points, with a custom * allocator. */ class String { public: /** \brief Create a string from a C String. * * \param [in] cString a pointer to a C string to populate the String. * * \returns String on success. * \returns NullPointer if cString is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ static StringResult makeFromCString(const char* cString); /** @copydoc String::makeFromCString(const char* cString) */ static StringResult makeFromCString(const char8_t* cString); /** \brief Create a string from a C String, specifying the allocator. * * \param [in] cString a pointer to a C string to populate the String. * \param [in] allocator the allocator the string will use. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ static StringResult makeFromCString(const char* cString, Allocator* allocator); /** @copydoc String::makeFromCString(const char* cString, Allocator* allocator) */ static StringResult makeFromCString(const char8_t* cString, Allocator* allocator); /** \brief Create a string from concatenating multiple C Strings. * * This function uses the default globalFudAllocator. * * \tparam CStrings a parameter pack of types convertible to const char*. * \param [in] cStrings pointers to C strings to concatenate into a single String. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ template static StringResult makeFromCStrings(CStrings... cStrings) { return makeFromCStringsAlloc(&globalFudAllocator, cStrings...); } /** \brief Create a string from concatenating multiple C Strings. * * \tparam CStrings a parameter pack of types convertible to const char*. * \param [in] cStrings pointers to C strings to concatenate into a single String. * \param [in] allocator the allocator the string will use. * * \returns String on success. * \returns NullPointer if cString or allocator is null. * \returns ArgumentInvalid if the length of cString is greater than or * equal to SSIZE_MAX. * \returns FudStatus::AllocFailure if the allocator fails. */ template static StringResult makeFromCStringsAlloc(Allocator* allocator, CStrings... cStrings) { if (allocator == nullptr) { return StringResult::error(FudStatus::NullPointer); } if (!String::allocatorValid(allocator)) { return StringResult::error(FudStatus::ArgumentInvalid); } size_t totalLength = 0; Array lengths{}; Array strPointers{}; size_t index = 0; for (const auto* cStringItem : {cStrings...}) { const char* cString = nullptr; if constexpr (std::is_same_v) { cString = cStringItem; } else if constexpr (std::is_same_v) { cString = reinterpret_cast(cStringItem); } else { static_assert(!std::is_same_v); } strPointers[index] = cString; auto lengthResult = cStringLength(cString); if (lengthResult < 0 || lengthResult >= std::numeric_limits::max()) { return StringResult::error(FudStatus::ArgumentInvalid); } auto stringLength = static_cast(lengthResult); if (maxStringLength - totalLength < stringLength) { return StringResult::error(FudStatus::Failure); } totalLength += stringLength; lengths[index] = stringLength; index++; } fudAssert(totalLength < maxStringLength); String output{}; output.m_allocator = reinterpret_cast(allocator); utf8* data{nullptr}; size_t outputCapacity = totalLength + 1; bool isLarge = outputCapacity > SsoBufSize; if (isLarge) { auto status = output.makeLarge(outputCapacity, totalLength, data); if (status != FudStatus::Success) { return StringResult::error(status); } } else { output.makeSmall(outputCapacity, totalLength, data); } fudAssert(data != nullptr); size_t cumulativeLength = 0; for (size_t idx = 0; idx < strPointers.size(); ++idx) { const auto* cString = strPointers[idx]; auto copyStatus = copyMem( data + cumulativeLength, outputCapacity - cumulativeLength, cString, lengths[idx]); fudAssert(copyStatus == FudStatus::Success); cumulativeLength += lengths[idx]; } data[cumulativeLength] = '\0'; return StringResult::okay(std::move(output)); } /** \brief Default constructs a small string of zero length using the global * fud allocator. */ String() noexcept = default; /* The copy constructor is deleted because it is fallible. */ String(const String& rhs) = delete; /** \brief Infallibly moves the string. */ String(String&& rhs) noexcept; /* Destructors need no documentation. */ ~String() noexcept; /* The copy assignment operator not deleted because it is fallible. */ String& operator=(const String& rhs) = delete; /** \brief Takes ownership of rhs, destroying the contents of this string in * the process. The allocator is taken from rhs. */ String& operator=(String&& rhs) noexcept; /** \brief Create a String by copying from an existing rhs, optionally * specifying a different allocator. If allocatorOption is NullOpt, the * allocator from rhs is used. */ static StringResult from(const String& rhs, Option allocatorOption = NullOpt); /** \brief Create a String by copying from a view, with the specified allocator. */ static StringResult from(StringView view, Allocator* allocator = &globalFudAllocator); /** \brief Copy the contents of rhs, without modifying rhs. */ FudStatus copy(const String& rhs); /** \brief The raw length of the string's data, excluding the null terminator. */ [[nodiscard]] size_t length() const { if (isLarge()) { return m_repr.large.length; } return m_repr.small.length; } /** \brief Indicates if no characters are present in the string's data. */ [[nodiscard]] bool empty() const { return length() == 0; } /** \brief The total size of the string's data, including the null terminator. */ [[nodiscard]] size_t size() const { return length() + 1; } /** \brief The current capacity of the string, excluding the reserved slot * for the null terminator. */ [[nodiscard]] size_t capacity() const { if (isLarge()) { return m_repr.large.capacity - 1U; } return SsoBufSize - 1U; } /** \brief Returns the remaining capacity for characters excluding the null * terminating byte. */ [[nodiscard]] size_t remainingCapacity() const { if (length() > capacity()) { return 0; } return capacity() - length(); } /** \brief The underlying data, guaranteed to have c string representation. */ [[nodiscard]] const utf8* data() const { return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } /** \brief The underlying data as an explicit c string. */ [[nodiscard]] inline const char* c_str() const { return reinterpret_cast(data()); } [[nodiscard]] inline StringView asView() const { return StringView(*this); } /** \brief Indicates if the contents of the string form a valid sequence of * UTF8 code points. */ [[nodiscard]] bool utf8Valid() const; /** \brief Attempts to reserve newCapacity bytes of storage. */ FudStatus reserve(size_t newCapacity); /** \brief Returns the first character in the sequence if the length is * greater than zero. */ [[nodiscard]] Option front(); /** \brief Returns the last character in the sequence if the length is * greater than zero. */ [[nodiscard]] Option back(); /** \brief Append a character to the back of the string, growing it if necessary. */ FudStatus pushBack(char letter); /** @copydoc String::pushBack(char letter) */ FudStatus pushBack(utf8 letter); /** @copydoc String::pushBack(char letter) */ FudStatus pushBack(const Utf8& letter); /** \brief Pop and return a character from the back of the string if its * length is greater than zero. */ Option pop(); /** \brief Append a C string to the back of the string, growing it as necessary. */ FudStatus append(const char* source); /** \brief Append a String to the back of the string, growing it as necessary. */ FudStatus append(const String& source); /** \brief Append a StringView to the back of the string, growing it as necessary. */ FudStatus append(StringView source); /** \brief Create a new string with the contents of this string and rhs. */ [[nodiscard]] StringResult catenate(const String& rhs) const; /** \@copydoc String::catenate(const String& rhs) const */ [[nodiscard]] StringResult catenate(const char* rhs) const; /** \brief Insert as much of source into the string as possible, returning * how many bytes and the status of the insertion. */ DrainResult drain(const char* source); /** @copydoc String::drain(const char* source) */ DrainResult drain(const String& source); /** @copydoc String::drain(const char* source) */ DrainResult drain(StringView source); [[nodiscard]] bool compare(const String& rhs) const; FudStatus clear(); const utf8* begin() const; const utf8* end() const; private: static constexpr size_t maxStringLength = (static_cast(1) << 63) - 1; static constexpr uint8_t maxSmallStringLength = SsoBufSize; static constexpr uint8_t smallStringLengthMask = 0xFF; static constexpr auto isLargeMask = static_cast(0x01); static constexpr auto allocatorMask = ~isLargeMask; [[nodiscard]] static bool allocatorValid(Allocator* allocator) { return (reinterpret_cast(allocator) & isLargeMask) == 0; } Allocator* allocator() const { auto* allocPtr = reinterpret_cast(m_allocator & allocatorMask); fudAssert(allocPtr != nullptr); return allocPtr; } [[nodiscard]] bool nullTerminated() const; [[nodiscard]] bool valid() const; FudStatus nullTerminate(); /** \brief The underlying data, guaranteed to have c string * representation. */ [[nodiscard]] utf8* dataMut() { return isLarge() ? m_repr.large.data : m_repr.small.buffer.data(); } void cleanup(); FudStatus resize(size_t newCapacity); FudStatus grow(); /** \brief The allocator used to get storage for characters when the string * is large. */ uintptr_t m_allocator{reinterpret_cast(&globalFudAllocator)}; using BufType = Array; union { struct { size_t capacity; size_t length; utf8* data; } large; struct { uint8_t length = 0; BufType buffer{}; } small{}; } m_repr{}; /** \brief Whether or not the string must use its allocator for storage. */ [[nodiscard]] bool isLarge() const { return (m_allocator & isLargeMask) != 0; } void setLarge() { m_allocator |= isLargeMask; } void setSmall() { m_allocator &= allocatorMask; } void addToLength(size_t augend); void setLength(size_t newLength); FudStatus makeLarge(size_t cap, size_t len, utf8*& outputData); void makeSmall(size_t& cap, size_t len, utf8*& outputData); }; } // namespace fud #endif