From 6a27a2a4032e88fa9154ef0f0741edc584f7a701 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Sun, 20 Oct 2024 10:48:19 -0500 Subject: Lots of work. --- .clang-format | 1 - CMakeLists.txt | 12 +- include/fud_algorithm.hpp | 162 ++++++ include/fud_array.hpp | 4 +- include/fud_c_string.hpp | 55 ++ include/fud_format.hpp | 362 +++++-------- include/fud_result.hpp | 35 +- include/fud_span.hpp | 75 ++- include/fud_status.hpp | 3 + include/fud_string.hpp | 1 + include/fud_string_view.hpp | 14 +- include/fud_utf8.hpp | 130 +++-- include/fud_vector.hpp | 64 +++ source/fud_assert.cpp | 8 +- source/fud_format.cpp | 14 + source/fud_string.cpp | 25 - source/fud_string_view.cpp | 38 +- source/fud_utf8.cpp | 174 +++---- source/fud_utf8_iterator.cpp | 4 +- test/CMakeLists.txt | 12 +- test/test_common.hpp | 11 +- test/test_format.cpp | 12 +- test/test_utf8.cpp | 1163 ++++++++++++++++++++++++++++++++++++++++++ 23 files changed, 1914 insertions(+), 465 deletions(-) create mode 100644 include/fud_algorithm.hpp create mode 100644 include/fud_c_string.hpp create mode 100644 include/fud_vector.hpp create mode 100644 source/fud_format.cpp create mode 100644 test/test_utf8.cpp diff --git a/.clang-format b/.clang-format index 87aff92..50883a6 100644 --- a/.clang-format +++ b/.clang-format @@ -30,7 +30,6 @@ AlignConsecutiveMacros: PadOperators: false AlignEscapedNewlines: Left AlignOperands: false -AlignTrailingComments: Never AllowAllArgumentsOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false AllowShortBlocksOnASingleLine: Never diff --git a/CMakeLists.txt b/CMakeLists.txt index 7358151..f0e9aff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,14 +19,15 @@ add_library(fud SHARED source/libfud.cpp source/fud_allocator.cpp source/fud_assert.cpp - source/fud_memory.cpp + source/fud_format.cpp + source/fud_directory.cpp source/fud_c_file.cpp + source/fud_memory.cpp + source/fud_sqlite.cpp source/fud_string_view.cpp source/fud_string.cpp source/fud_utf8.cpp source/fud_utf8_iterator.cpp - source/fud_sqlite.cpp - source/fud_directory.cpp ) include(cmake/warnings.cmake) @@ -39,7 +40,7 @@ target_link_libraries(fud ${SQLite3_LIBRARIES}) set_target_properties( fud PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 C_STANDARD 23 CXX_EXTENSIONS OFF C_EXTENSIONS OFF @@ -89,9 +90,11 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/FudConfig.cmake set(FUD_HEADERS "include/libfud.hpp" "include/fud_allocator.hpp" + "include/fud_algorithm.hpp" "include/fud_array.hpp" "include/fud_assert.hpp" "include/fud_c_file.hpp" + "include/fud_c_string.hpp" "include/fud_directory.hpp" "include/fud_fud_type_traits.hpp" "include/fud_memory.hpp" @@ -105,6 +108,7 @@ set(FUD_HEADERS "include/fud_unique_array.hpp" "include/fud_utf8.hpp" "include/fud_utf8_iterator.hpp" + "include/fud_vector.hpp" ) set_target_properties(fud PROPERTIES PUBLIC_HEADER "${FUD_HEADERS}") diff --git a/include/fud_algorithm.hpp b/include/fud_algorithm.hpp new file mode 100644 index 0000000..e3d5d3b --- /dev/null +++ b/include/fud_algorithm.hpp @@ -0,0 +1,162 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_ALGORITHM_HPP +#define FUD_ALGORITHM_HPP + +#include "fud_span.hpp" + +#include +#include +#include +#include + +namespace fud { + +template +class Iota { + public: + constexpr Iota() noexcept : m_value{}, m_increment{static_cast(1)}, m_limit{std::numeric_limits::max()} + { + } + + constexpr Iota(T value) noexcept : + m_value{value}, m_increment{static_cast(1)}, m_limit{std::numeric_limits::max()} + { + } + + constexpr Iota(T value, T increment) noexcept : + m_value{value}, m_increment{increment}, m_limit{std::numeric_limits::max()} + { + } + + constexpr Iota(T value, T increment, T limit) noexcept : m_value{value}, m_increment{increment}, m_limit{limit} + { + } + + constexpr std::optional operator()() noexcept + { + auto value = m_value; + if (m_increment > 0) { + if (m_limit - m_increment < m_value) { + return std::nullopt; + } + } else { + if (m_limit + m_increment + 1 >= m_value) { + return std::nullopt; + } + } + m_value += m_increment; + return value; + } + + void set(T value) { + m_value = value; + } + + private: + T m_value; + const T m_increment; + const T m_limit; +}; + +template +Span forEach(Span input, Func&& mapFunc) +{ + for (auto& element : input) { + element = std::forward(mapFunc)(element); + } + + return input; +} + +template +Span mapTo(Span input, Span output, Func&& mapFunc) +{ + for (auto idx = 0; idx < input.size(); ++idx) { + output[idx] = std::forward(mapFunc)(input[idx]); + } + + return input; +} + +template +auto map(Span input, Func&& mapFunc, Builder&& builder) -> decltype(std::forward(builder)()) +{ + Output output{std::forward(builder)()}; + for (auto idx = 0; idx < input.size() && idx < output.size(); ++idx) { + output[idx] = std::forward(mapFunc)(input[idx]); + } + + return input; +} + +template +auto generate(Builder&& builder, Generator&& generator) -> decltype(std::forward(builder)()) +{ + using Output = decltype(std::forward(builder)()); + Output output{std::forward(builder)()}; + for (auto idx = 0; idx < output.size(); ++idx) { + output[idx] = std::forward(generator)(); + } + + return output; +} + +template +bool allOf(Span input, Func&& predicate) +{ + bool result = input.size() > 0; + for (size_t idx = 0; result && idx < input.size(); ++idx) { + result = result && std::forward(predicate)(input[idx]); + } + return result; +} + +template +bool allOf(Generator&& generator, Func&& predicate) +{ + bool result = true; + while (auto val = std::forward(generator)()) { + result = result && std::forward(predicate)(*val); + } + return result; +} + +template +bool anyOf(Span input, Func&& predicate) +{ + bool result = !(input.size() > 0); + for (size_t idx = 0; result && idx < input.size(); ++idx) { + result = result || std::forward(predicate)(input[idx]); + } + return result; +} + +template +bool anyOf(Generator&& generator, Func&& predicate) +{ + bool result = false; + while (auto val = std::forward(generator)()) { + result = result || std::forward(predicate)(*val); + } + return result; +} + +} // namespace fud + +#endif diff --git a/include/fud_array.hpp b/include/fud_array.hpp index 4e2c702..807621a 100644 --- a/include/fud_array.hpp +++ b/include/fud_array.hpp @@ -18,9 +18,9 @@ #ifndef FUD_ARRAY_HPP #define FUD_ARRAY_HPP -#include "fud_memory.hpp" +#include -#include +#include "fud_memory.hpp" namespace fud { diff --git a/include/fud_c_string.hpp b/include/fud_c_string.hpp new file mode 100644 index 0000000..44e0dc8 --- /dev/null +++ b/include/fud_c_string.hpp @@ -0,0 +1,55 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_C_STRING_HPP +#define FUD_C_STRING_HPP + +#include +#include +#include + + +namespace fud { + +constexpr ssize_t cStringLength(const char* str, size_t maxLength) +{ + if (str == nullptr || maxLength > (SSIZE_MAX - 1)) { + return -1; + } + + ssize_t size = 0; + + while (str[size] != 0 && static_cast(size) < maxLength) { + size++; + } + + if (str[size] != 0 && static_cast(size) == maxLength) { + return static_cast(maxLength) + 1; + } + + return size; +} + +constexpr ssize_t cStringLength(const char* str) +{ + constexpr auto maxLength = SSIZE_MAX - 1; + return cStringLength(str, maxLength); +} + +} // namespace fud + +#endif diff --git a/include/fud_format.hpp b/include/fud_format.hpp index 8985faf..ea32bd8 100644 --- a/include/fud_format.hpp +++ b/include/fud_format.hpp @@ -18,275 +18,179 @@ #ifndef FUD_FORMAT_HPP #define FUD_FORMAT_HPP -#include "fud_assert.hpp" +// #include "fud_assert.hpp" #include "fud_result.hpp" -#include "fud_span.hpp" #include "fud_status.hpp" #include "fud_string_view.hpp" +#include "fud_utf8.hpp" -#include // for std::format_string #include #include +#include namespace fud { -template -using CharSpan = Span; - -template -using FormatLiteral = std::format_string; +struct FormatAlign +{ + enum class Value : uint8_t + { + Left, + Right, + Center + }; + + constexpr static std::optional from(utf8 letter) + { + FormatAlign formatAlign; + switch (letter) { + case '<': + formatAlign.value = Value::Left; + break; + case '>': + formatAlign.value = Value::Right; + break; + case '^': + formatAlign.value = Value::Center; + break; + default: + return std::nullopt; + } -template -Result format(CharSpan buffer, FormatLiteral formatLiteral, Args&&... args); + return formatAlign; + } -enum class FormatAlign : uint8_t { - Left, - Right, - Center + Value value; }; struct FormatFill { FormatAlign align; - char fill; + utf8 fill; + + constexpr static Result, FudStatus> parse(StringView formatView) { + // "{:A, FudStatus>; + if (formatView.length() < 3) { + return RetType::okay(std::nullopt); + } + + const auto* data = formatView.data(); + if (data[0] != 'A') { + return RetType::okay(std::nullopt); + } + + auto align = FormatAlign::from(data[1]); + if (!align.has_value()) { + return FudStatus::FormatInvalid; + } + + auto fill = data[2]; + if (!Ascii::valid(fill)) { + return FudStatus::Utf8Invalid; + } + + return RetType::okay(FormatFill{*align, fill}); + } }; -enum class FormatSign : uint8_t { +enum class FormatSign : uint8_t +{ Plus, Minus, Space }; -struct FormatSpec { - std::optional fill; - std::optional formatSign; - uint32_t minWidth; +enum class FormatStringType : uint8_t +{ + String, + Escaped, }; -namespace detail { - -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView, - Arg&& arg, - Args&&... args); - -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView, - Arg&& arg); - -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView); - -} // namespace detail - -template -Result format(CharSpan buffer, FormatLiteral formatLiteral, Args&&... args) +enum class FormatIntegerType : uint8_t { - static_assert(Size > 0); - - if (buffer.data() == nullptr) { - return FudStatus::NullPointer; - } - - StringView formatView{formatLiteral.get()}; - - if (formatView.length() == 0 || formatView.data()[0] == '\0') { - return 0U; - } - - size_t argCount = sizeof...(args); - static_cast(argCount); - - size_t formattedSize = 0; + BinaryLower, + BinaryUpper, + Character, + Decimal, + Octal, + HexLower, + HexUpper, +}; - return detail::formatHelper(buffer, formattedSize, formatView, std::forward(args)...); -} +enum class FormatCharacterType : uint8_t +{ + BinaryLower, + BinaryUpper, + Character, + Decimal, + Octal, + HexLower, + HexUpper, +}; -namespace detail { +enum class FormatBoolType : uint8_t +{ + BinaryLower, + BinaryUpper, + Character, + Decimal, + Octal, + HexLower, + HexUpper, +}; -#define FUDETAIL_ADVANCE_FORMAT(FORMAT_VIEW, ADVANCE_BY) \ - fudAssert(ADVANCE_BY <= FORMAT_VIEW.m_length); \ - FORMAT_VIEW.m_length -= ADVANCE_BY; \ - FORMAT_VIEW.m_data += ADVANCE_BY; \ +enum class FormatFloatingType : uint8_t +{ + FloatHexLower, + FloatHexUpper, + ScientificLower, + ScientificUpper, + Fixed, + GeneralLower, + GeneralUpper +}; -constexpr bool findBracket(size_t& copyLength, StringView formatView) +enum class FormatPointerType : uint8_t { - while (copyLength < formatView.m_length) { - if (formatView.m_data[copyLength] == '{') { - return true; - } - copyLength++; - } + HexLower, + HexUpper +}; - return false; -} +using FormatType = std::variant< // break + std::monostate, + FormatStringType, + FormatIntegerType, + FormatCharacterType, + FormatBoolType, + FormatFloatingType, + FormatPointerType>; -template -size_t copyRemaining( - CharSpan buffer, - size_t formattedSize, - StringView& formatView, - size_t copyLength) -{ - fudAssert(copyLength <= formatView.length()); - if (copyLength + formattedSize > Size) { - copyLength = Size - formattedSize; - } - auto copyResult = copyMem( - buffer.data() + formattedSize, - Size - formattedSize, - formatView.m_data, - copyLength); - fudAssert(copyResult == FudStatus::Success); - FUDETAIL_ADVANCE_FORMAT(formatView, copyLength); - return formattedSize + copyLength; -} - -template -Result handleSpec( - CharSpan buffer, - size_t formattedSize, - StringView& formatView, - Arg&& arg, - bool& consumed) -{ - fudAssert(formattedSize < Size); - fudAssert(formatView.length() > 1); - - if (formatView.m_data[1] == '{') { - consumed = false; - buffer[formattedSize] = '{'; - FUDETAIL_ADVANCE_FORMAT(formatView, 2); - return formattedSize + 1; - } +struct FormatSpec; +using FormatSpecResult = Result; - static_cast(arg); - buffer[formattedSize] = 'X'; - formattedSize += 1; - size_t index = 0; - for (; index < formatView.m_length; ++index) { - if (formatView.m_data[index] == '}') { - break; - } - } - FUDETAIL_ADVANCE_FORMAT(formatView, index + 1); - return formattedSize; -} - -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView, - Arg&& arg, - Args&&... args) -{ - while (formattedSize < Size) { - size_t copyLength = 0; - auto found = findBracket(copyLength, formatView); - formattedSize = copyRemaining(buffer, formattedSize, formatView, copyLength); - fudAssert(formattedSize <= Size); - if (!found || formattedSize == Size) { - return formattedSize; - } +struct FormatSpec { + size_t width; + size_t precision; + FormatFill fill; + FormatSign formatSign; - bool consumed = false; - auto specResult = handleSpec(buffer, formattedSize, formatView, std::forward(arg), consumed); - formattedSize = M_TakeOrReturn(specResult); - fudAssert(formattedSize <= Size); - if (formattedSize == Size) { - return formattedSize; - } + FormatType formatType; - if (consumed) { - return formatHelper(buffer, formattedSize, formatView, std::forward(args)...); - } - } + bool hasWidth; + bool takesWidth; - return formattedSize; -} + bool hasPrecision; + bool takesPrecision; -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView, - Arg&& arg) -{ - while (formattedSize < Size) { - size_t copyLength = 0; - auto found = findBracket(copyLength, formatView); - formattedSize = copyRemaining(buffer, formattedSize, formatView, copyLength); - fudAssert(formattedSize <= Size); - if (!found || formattedSize == Size) { - return formattedSize; - } + bool hasFill; - bool consumed = false; - auto specResult = handleSpec(buffer, formattedSize, formatView, std::forward(arg), consumed); - formattedSize = M_TakeOrReturn(specResult); - if (consumed) { - return formatHelper(buffer, formattedSize, formatView); - } - } - return formattedSize; -} - -template -Result formatHelper( - CharSpan buffer, - size_t formattedSize, - StringView formatView) -{ - size_t index = 0; - while (formattedSize < Size && formatView.m_length > 0) { - while (index < formatView.m_length && formattedSize + index < Size) { - if (formatView.m_data[index] == '{') { - break; - } - index++; - } - bool isBracket{false}; - if (index + 1 < formatView.m_length && formattedSize + index + 1 < Size) { - if (formatView.m_data[index] == '{') { - isBracket = true; - index++; - } - } - auto copyResult = copyMem( - buffer.data() + formattedSize, - Size - formattedSize, - formatView.m_data, - index); - formattedSize += index; - formatView.m_length -= index; - formatView.m_data += index; - if (isBracket) { - index = 0; - if (formatView.m_length > 0) { - formatView.m_length--; - formatView.m_data++; - } - if (formattedSize < Size) { - buffer.data()[formattedSize] = 'X'; - formattedSize++; - } - } - } - return formattedSize; -} + bool hasFormatSign; + + bool alternateForm; -#undef FUDETAIL_ADVANCE_FORMAT + bool leadingZero; -} // namespace detail + static Result make(StringView& formatView, size_t specIndex); +}; } // namespace fud diff --git a/include/fud_result.hpp b/include/fud_result.hpp index 4bfb819..877c49c 100644 --- a/include/fud_result.hpp +++ b/include/fud_result.hpp @@ -19,6 +19,7 @@ #define FUD_RESULT_HPP #include +#include namespace fud { @@ -28,62 +29,62 @@ class [[nodiscard]] Result { public: using ResultType = Result; - Result(const T& value) : m_value{value} + constexpr Result(const T& value) : m_value{value} { } - Result(const E& value) : m_value{value} + constexpr Result(const E& value) : m_value{value} { } - Result(T&& value) : m_value{std::move(value)} + constexpr Result(T&& value) : m_value{std::move(value)} { } - Result(E&& value) : m_value{std::move(value)} + constexpr Result(E&& value) : m_value{std::move(value)} { } - static ResultType okay(const T& okay) + static constexpr ResultType okay(const T& okay) { return ResultType{okay}; } - static ResultType okay(T&& okay) + static constexpr ResultType okay(T&& okay) { return ResultType{std::move(okay)}; } - static ResultType error(const E& error) + static constexpr ResultType error(const E& error) { return ResultType{error}; } - static ResultType error(E&& error) + static constexpr ResultType error(E&& error) { return ResultType{std::move(error)}; } template - static ResultType okay(const Result& okayRes) + static constexpr ResultType okay(const Result& okayRes) { return ResultType{okayRes.getOkay()}; } template - static ResultType okay(Result&& okayRes) + static constexpr ResultType okay(Result&& okayRes) { return ResultType{okayRes.takeOkay()}; } template - static ResultType error(const Result& errorRes) + static constexpr ResultType error(const Result& errorRes) { return ResultType{errorRes.getError()}; } template - static ResultType error(Result&& errorRes) + static constexpr ResultType error(Result&& errorRes) { return ResultType{errorRes.takeError()}; } @@ -98,28 +99,28 @@ class [[nodiscard]] Result { return (m_value.index() == 1); } - [[nodiscard]] const T& getOkay() const& + [[nodiscard]] constexpr const T& getOkay() const& { return std::get(m_value); } - [[nodiscard]] const E& getError() const& + [[nodiscard]] constexpr const E& getError() const& { return std::get(m_value); } - [[nodiscard]] T&& takeOkay() + [[nodiscard]] constexpr T&& takeOkay() { return std::move(std::get(m_value)); } - [[nodiscard]] E&& takeError() + [[nodiscard]] constexpr E&& takeError() { return std::move(std::get(m_value)); } private: - Result() : m_value() + constexpr Result() : m_value() { } diff --git a/include/fud_span.hpp b/include/fud_span.hpp index cc693f8..5b8497e 100644 --- a/include/fud_span.hpp +++ b/include/fud_span.hpp @@ -18,41 +18,71 @@ #ifndef FUD_SPAN_HPP #define FUD_SPAN_HPP -#include - #include "fud_array.hpp" +#include "fud_result.hpp" +#include "fud_status.hpp" + +#include +#include namespace fud { -template +template struct Span { static_assert(Size > 0); using ValueType = T; - static Span make(Array& array) { - Span output{}; - output.m_data = array.data(); + static Span make(Array& array) + { + Span output{array.data(), Size}; return output; } + static Result make(Array& array, size_t size) + { + if (size > Size) { + return FudStatus::ArgumentInvalid; + } + return Span{array.data(), Size}; + } + template - static Span make(const Array& array) { + static Span make(const Array& array) + { static_assert(std::convertible_to); - Span output{}; - output.m_data = array.data(); - return output; + return Span{array.data(), Size}; } template - static Span make(Array& array) { + static Result make(const Array& array, size_t size) + { static_assert(std::convertible_to); - Span output{}; - output.m_data = array.data(); - return output; + if (size > Size) { + return FudStatus::ArgumentInvalid; + } + return Span{array.data(), Size}; + } + + template + static Span make(Array& array) + { + static_assert(std::convertible_to); + return Span{array.data(), array.size()}; + } + + template + static Result make(Array& array, size_t size) + { + static_assert(std::convertible_to); + if (size > Size) { + return FudStatus::ArgumentInvalid; + } + return Span{array.data(), array.size()}; } template - static Span makeCStringBuffer(Array& array) { + static Span makeCStringBuffer(Array& array) + { static_assert(ArraySize > Size); Span output{}; output.m_data = array.data(); @@ -60,10 +90,15 @@ struct Span { } T* m_data; + const size_t m_size; [[nodiscard]] constexpr size_t size() const { - return Size; + if constexpr (Size < SIZE_MAX) { + return Size; + } else { + return m_size; + } } constexpr T& front() @@ -78,12 +113,12 @@ struct Span { constexpr T& back() { - return m_data[Size - 1]; + return m_data[size() - 1]; } constexpr const T& back() const { - return m_data[Size - 1]; + return m_data[size() - 1]; } constexpr T* data() noexcept @@ -108,12 +143,12 @@ struct Span { constexpr T* end() noexcept { - return m_data + Size; + return m_data + size(); } constexpr const T* end() const noexcept { - return m_data + Size; + return m_data + size(); } constexpr T& operator[](size_t index) diff --git a/include/fud_status.hpp b/include/fud_status.hpp index bda646b..91048ac 100644 --- a/include/fud_status.hpp +++ b/include/fud_status.hpp @@ -40,6 +40,7 @@ enum class [[nodiscard]] FudStatus RangeError, VariantInvalid, BadArrayLength, + FormatInvalid, NotImplemented, NotSupported }; @@ -83,6 +84,8 @@ constexpr const char* FudStatusToString(FudStatus status) return "VariantInvalid"; case FudStatus::BadArrayLength: return "BadArrayLength"; + case FudStatus::FormatInvalid: + return "FormatInvalid"; case FudStatus::NotImplemented: return "NotImplemented"; case FudStatus::NotSupported: diff --git a/include/fud_string.hpp b/include/fud_string.hpp index ba05450..60a328f 100644 --- a/include/fud_string.hpp +++ b/include/fud_string.hpp @@ -23,6 +23,7 @@ #include "fud_result.hpp" #include "fud_status.hpp" #include "fud_string_view.hpp" +#include "fud_c_string.hpp" #include "fud_utf8.hpp" #include diff --git a/include/fud_string_view.hpp b/include/fud_string_view.hpp index 7b4925e..8a47ae5 100644 --- a/include/fud_string_view.hpp +++ b/include/fud_string_view.hpp @@ -71,6 +71,14 @@ struct StringView { Result trimWhitespace(); + [[nodiscard]] bool advance(); + + void advanceUnsafe(); + + [[nodiscard]] bool advance(size_t size); + + void advanceUnsafe(size_t size); + FudStatus toUint8(uint8_t& number, uint8_t specifiedRadix, size_t& strLen) const; FudStatus toUint16(uint16_t& number, uint8_t specifiedRadix, size_t& strLen) const; @@ -96,12 +104,6 @@ struct StringView { const utf8* m_data{nullptr}; }; -FudStatus skipWhitespace(StringView& view, size_t& skipIndex); - -ssize_t cStringLength(const char* str); - -ssize_t cStringLength(const char* str, size_t maxLength); - } // namespace fud #endif diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 539e0f4..3b1a6b7 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -19,8 +19,8 @@ #define FUD_UTF8_HPP #include "fud_array.hpp" -#include "fud_status.hpp" #include "fud_unique_array.hpp" +#include "fud_c_string.hpp" #include #include @@ -28,6 +28,8 @@ namespace fud { + + using utf8 = unsigned char; class String; @@ -110,6 +112,18 @@ struct Utf82Byte { constexpr Utf82Byte(utf8 first, utf8 second) noexcept : characters{{first, second}} { } + + __attribute__((nonnull)) + constexpr Utf82Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 2); + if (length < 2) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + } + Array characters; static constexpr size_t size() noexcept { @@ -145,6 +159,18 @@ struct Utf83Byte { { } + __attribute__((nonnull)) + constexpr Utf83Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 3); + if (length < 3) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + characters[2] = static_cast(letterStr[2]); + } + Array characters; static constexpr size_t size() noexcept @@ -187,6 +213,19 @@ struct Utf84Byte { { } + __attribute__((nonnull)) + constexpr Utf84Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 4); + if (length < 4) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + characters[2] = static_cast(letterStr[2]); + characters[3] = static_cast(letterStr[3]); + } + Array characters; static constexpr size_t size() noexcept @@ -250,11 +289,12 @@ struct FudUtf8 { Utf8Variant m_variant{Utf8Variant{Ascii{}}}; static constexpr Ascii invalidAsciiCode{Ascii{0xFF}}; - static FudUtf8 fromString(const String& fudString, size_t index) noexcept; - static FudUtf8 fromStringView(StringView view, size_t index) noexcept; - // static FudUtf8 fromStringView(const StringView& view, size_t index) noexcept; - static constexpr FudUtf8 makeUtf8(const Array& data) + static FudUtf8 from(const String& fudString, size_t index) noexcept; + + static FudUtf8 from(StringView view, size_t index) noexcept; + + static constexpr FudUtf8 make(const Array& data) { FudUtf8 unicode{}; if (Ascii::valid(data[0])) { @@ -271,7 +311,12 @@ struct FudUtf8 { return unicode; } - static constexpr FudUtf8 makeUtf8(const Ascii& utf8Char) + static constexpr FudUtf8 make(utf8 utf8Char) + { + return make(Ascii{utf8Char}); + } + + static constexpr FudUtf8 make(Ascii utf8Char) { FudUtf8 unicode{{Utf8Variant{Ascii{}}}}; if (utf8Char.valid()) { @@ -282,6 +327,15 @@ struct FudUtf8 { return unicode; } + static constexpr FudUtf8 make(Utf8Variant utf8Variant) { + FudUtf8 unicode{}; + unicode.m_variant = utf8Variant; + if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) { + unicode.m_variant = invalidAsciiCode; + } + return unicode; + } + static constexpr FudUtf8 invalidAscii() { FudUtf8 character{}; @@ -460,89 +514,93 @@ struct FudUtf8 { }; /** \brief Checks if a character is ascii. */ -bool char_is_ascii(char character); +[[nodiscard]] bool charIsAscii(char character); -FudStatus utf8_is_ascii(FudUtf8& character, bool& isAscii); +[[nodiscard]] bool utf8IsAscii(FudUtf8 character); /** \brief Checks if a character is alphanumeric. */ -bool char_is_alphanumeric(char character); +[[nodiscard]] bool charIsAlphanumeric(char character); /** \brief Checks if a character is alphanumeric. */ -FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character); /** \brief Checks if a character is alphabetic. */ -bool char_is_alpha(char character); +[[nodiscard]] bool charIsAlpha(char character); /** \brief Checks if a character is alphabetic. */ -FudStatus utf8_is_alpha(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsAlpha(FudUtf8 character); /** \brief Checks if a character is lowercase. */ -bool char_is_lowercase(char character); +[[nodiscard]] bool charIsLowercase(char character); /** \brief Checks if a character is lowercase. */ -FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsLowercase(FudUtf8 character); /** \brief Checks if a character is an uppercase character. */ -bool char_is_uppercase(char character); +[[nodiscard]] bool charIsUppercase(char character); /** \brief Checks if a character is uppercase. */ -FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsUppercase(FudUtf8 character); /** \brief Checks if a character is a digit. */ -bool char_is_digit(char character); +[[nodiscard]] bool charIsDigit(char character); /** \brief Checks if a character is a digit. */ -FudStatus utf8_is_digit(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsDigit(FudUtf8 character); /** \brief Checks if a character is a hexadecimal character. */ -bool char_is_hex_digit(char character); +[[nodiscard]] bool charIsHexDigit(char character); /** \brief Checks if a character is a hexadecimal digit. */ -FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character); /** \brief Checks if a character is a control character. */ -bool char_is_control(char character); +[[nodiscard]] bool charIsControl(char character); /** \brief Checks if a character is a control character. */ -FudStatus utf8_is_control(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsControl(FudUtf8 character); /** \brief Checks if a character is a graphical character. */ -bool char_is_graphical(char character); +[[nodiscard]] bool charIsGraphical(char character); /** \brief Checks if a character is a graphical character. */ -FudStatus utf8_is_graphical(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsGraphical(FudUtf8 character); /** \brief Checks if a character is a space character. */ -bool char_is_space(char character); +[[nodiscard]] bool charIsSpace(char character); /** \brief Checks if a character is a space character. */ -FudStatus utf8_is_space(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsSpace(FudUtf8 character); /** \brief Checks if a character is a blank character. */ -bool char_is_blank(char character); +[[nodiscard]] bool charIsBlank(char character); /** \brief Checks if a character is a blank character. */ -FudStatus utf8_is_blank(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsBlank(FudUtf8 character); /** \brief Checks if a character is a printable character. */ -bool char_is_printable(char character); +[[nodiscard]] bool charIsPrintable(char character); /** \brief Checks if a character is a printable character. */ -FudStatus utf8_is_printable(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsPrintable(FudUtf8 character); /** \brief Checks if a character is a punctuation character. */ -bool char_is_punctuation(char character); +[[nodiscard]] bool charIsPunctuation(char character); /** \brief Checks if a character is a punctuation character. */ -FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character); -uint8_t char_to_lower(uint8_t character); +/** \brief Converts character to lowercase if valid. */ +uint8_t charToLower(uint8_t character); -FudUtf8* utf8_to_lower(FudUtf8* character); +/** \brief Converts character to lowercase if valid. */ +FudUtf8 utf8ToLower(FudUtf8 character); -uint8_t char_to_upper(uint8_t character); +/** \brief Converts character to uppercase if valid. */ +uint8_t charToUpper(uint8_t character); -FudUtf8* utf8_to_upper(FudUtf8* character); +/** \brief Converts character to uppercase if valid. */ +FudUtf8 utf8ToUpper(FudUtf8 character); } // namespace fud diff --git a/include/fud_vector.hpp b/include/fud_vector.hpp new file mode 100644 index 0000000..56e1659 --- /dev/null +++ b/include/fud_vector.hpp @@ -0,0 +1,64 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_VECTOR_HPP +#define FUD_VECTOR_HPP + +#include "fud_allocator.hpp" +#include "fud_result.hpp" +#include "fud_status.hpp" + +#include + +namespace fud { + +template +class Vector { + public: + static Result, FudStatus> from(const Vector& rhs); + + static Vector move(Vector&& rhs); + + FudStatus copy(const Vector& rhs); + + FudStatus take(Vector&& rhs); + + [[nodiscard]] size_t size() const { + return m_length; + } + + [[nodiscard]] size_t capacity() const { + return m_capacity; + } + + FudStatus reserve(); + + FudStatus resize(); + + FudStatus clear(); + + // FudResult at(); + + private: + Allocator* m_allocator{&globalFudAllocator}; + size_t m_length{0}; + size_t m_capacity{0}; +}; + +} // namespace fud + +#endif diff --git a/source/fud_assert.cpp b/source/fud_assert.cpp index 98f17d0..3df6734 100644 --- a/source/fud_assert.cpp +++ b/source/fud_assert.cpp @@ -1,10 +1,11 @@ #include "fud_assert.hpp" -#include "fud_array.hpp" +// #include "fud_array.hpp" #include #include -#include +#include +// #include namespace fud { @@ -20,15 +21,18 @@ constexpr auto MAX_LINE_CHARS = BITS_PER_OCTAL * sizeof(decltype(std::source_loc fputs(file_name, stderr); } + /* constexpr std::size_t assertMsgSize = MAX_LINE_CHARS + 3; Array buffer{}; static_cast(std::format_to_n(buffer.data(), buffer.size() - 1U, ":{}:", sourceLocation.line())); buffer[buffer.size() - 1] = '\0'; fputs(buffer.data(), stderr); + */ fputs(sourceLocation.function_name(), stderr); fputs(": ", stderr); fputs(assertion, stderr); + fputc('\n', stderr); std::terminate(); } diff --git a/source/fud_format.cpp b/source/fud_format.cpp new file mode 100644 index 0000000..ab1bb4f --- /dev/null +++ b/source/fud_format.cpp @@ -0,0 +1,14 @@ +// #include "fud_format.hpp" + +namespace fud { + +/* +Result FormatSpec::make(StringView view, size_t& length) +{ + static_cast(view); + static_cast(length); + return FudStatus::NotImplemented; +} +*/ + +} // namespace fud diff --git a/source/fud_string.cpp b/source/fud_string.cpp index d354fe7..b714dfc 100644 --- a/source/fud_string.cpp +++ b/source/fud_string.cpp @@ -23,31 +23,6 @@ namespace fud { -ssize_t cStringLength(const char* str) -{ - constexpr auto maxLength = SSIZE_MAX - 1; - return cStringLength(str, maxLength); -} - -ssize_t cStringLength(const char* str, size_t maxLength) -{ - if (str == nullptr || maxLength > (SSIZE_MAX - 1)) { - return -1; - } - - ssize_t size = 0; - - while (str[size] != 0 && static_cast(size) < maxLength) { - size++; - } - - if (str[size] != 0 && static_cast(size) == maxLength) { - return static_cast(maxLength) + 1; - } - - return size; -} - StringResult String::makeFromCString(const char* cString) { return makeFromCString(cString, &globalFudAllocator); diff --git a/source/fud_string_view.cpp b/source/fud_string_view.cpp index 23a4671..fdb63b3 100644 --- a/source/fud_string_view.cpp +++ b/source/fud_string_view.cpp @@ -61,7 +61,7 @@ Result StringView::skipWhitespace() return RetType::error(FudStatus::NullPointer); } size_t index = 0; - while (m_length > 0 && char_is_space(static_cast(m_data[0]))) { + while (m_length > 0 && charIsSpace(static_cast(m_data[0]))) { m_data++; m_length--; index++; @@ -78,7 +78,7 @@ Result StringView::trimWhitespace() } size_t count = 0; - while (m_length > 0 && char_is_space(static_cast(m_data[m_length - 1]))) { + while (m_length > 0 && charIsSpace(static_cast(m_data[m_length - 1]))) { m_length--; count++; } @@ -86,6 +86,40 @@ Result StringView::trimWhitespace() return RetType::okay(count); } +bool StringView::advance() +{ + if (m_length < 1) { + return false; + } + m_length--; + m_data++; + return true; +} + +void StringView::advanceUnsafe() +{ + fudAssert(m_length > 0); + m_length--; + m_data++; +} + +bool StringView::advance(size_t size) +{ + if (size > m_length) { + return false; + } + m_length -= size; + m_data += size; + return true; +} + +void StringView::advanceUnsafe(size_t size) +{ + fudAssert(size <= m_length); + m_length -= size; + m_data += size; +} + #if 0 FudStatus fud_string_truncate(ExtBasicString* source, ssize_t newLength) diff --git a/source/fud_utf8.cpp b/source/fud_utf8.cpp index ee8137a..4d617da 100644 --- a/source/fud_utf8.cpp +++ b/source/fud_utf8.cpp @@ -19,26 +19,19 @@ #include "fud_string.hpp" -#include // IWYU pragma: keep - this is for placement new overloads. - namespace fud { -FudUtf8 FudUtf8::fromString(const String& fudString, size_t index) noexcept +FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } - return fromStringView(StringView{fudString}, index); + return from(StringView{fudString}, index); } -// FudUtf8 FudUtf8::fromStringView(const StringView& view, size_t index) noexcept -// { -// return fromStringView(StringView{view}, index); -// } - -FudUtf8 FudUtf8::fromStringView(StringView view, size_t index) noexcept +FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept { auto viewLocal{view}; auto len = viewLocal.length(); @@ -76,126 +69,113 @@ FudUtf8 FudUtf8::fromStringView(StringView view, size_t index) noexcept return invalidAscii(); } -bool char_is_ascii(char character) +bool charIsAscii(char character) { return static_cast(character & ~ASCII_MASK) == 0; } -FudStatus utf8_is_ascii(FudUtf8* character, bool* isAscii) +bool utf8IsAscii(FudUtf8 character) { - if (anyAreNull(character, isAscii)) { - return FudStatus::NullPointer; - } - - *isAscii = character->getType() == Utf8Type::Ascii && character->valid(); - - return FudStatus::Success; + return character.getType() == Utf8Type::Ascii && character.valid(); } namespace impl { -/* Assumes that predicate is not a null pointer! */ template -FudStatus isAsciiPredicate(FudUtf8* character, bool* pred, Predicate&& predicate) +bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate) { - if (anyAreNull(character, pred)) { - return FudStatus::NullPointer; - } - - auto maybeAscii = character->getAscii(); + auto maybeAscii = character.getAscii(); if (!maybeAscii.has_value()) { - return FudStatus::ArgumentInvalid; + return false; } auto asciiChar = *maybeAscii; - *pred = std::forward(predicate)(asciiChar.asChar()); - - return FudStatus::Success; + return std::forward(predicate)(asciiChar.asChar()); } } // namespace impl -bool char_is_alphanumeric(char character) +bool charIsAlphanumeric(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - if (char_is_alpha(character)) { + if (charIsAlpha(character)) { return true; } - return char_is_digit(character); + return charIsDigit(character); } -FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred) +bool utf8IsAlphanumeric(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_alphanumeric); + return impl::isAsciiPredicate(character, charIsAlphanumeric); } -bool char_is_alpha(char character) +bool charIsAlpha(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - if (char_is_uppercase(character)) { + if (charIsUppercase(character)) { return true; } - return char_is_lowercase(character); + return charIsLowercase(character); } -FudStatus utf8_is_alpha(FudUtf8* character, bool* pred) +bool utf8IsAlpha(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_alpha); + return impl::isAsciiPredicate(character, charIsAlpha); } -bool char_is_lowercase(char character) +bool charIsLowercase(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return 'a' <= character && character <= 'z'; } -FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred) +bool utf8IsLowercase(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_lowercase); + return impl::isAsciiPredicate(character, charIsLowercase); } -bool char_is_uppercase(char character) +bool charIsUppercase(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return 'A' <= character && character <= 'Z'; } -FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred) +bool utf8IsUppercase(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_uppercase); + return impl::isAsciiPredicate(character, charIsUppercase); } -bool char_is_digit(char character) +bool charIsDigit(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return '0' <= character && character <= '9'; } -FudStatus utf8_is_digit(FudUtf8* character, bool* pred) +bool utf8IsDigit(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_digit); + return impl::isAsciiPredicate(character, charIsDigit); } -bool char_is_hex_digit(char character) +bool charIsHexDigit(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -203,14 +183,14 @@ bool char_is_hex_digit(char character) ('A' <= character && character <= 'F'); } -FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred) +bool utf8IsHexDigit(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_hex_digit); + return impl::isAsciiPredicate(character, charIsHexDigit); } -bool char_is_control(char character) +bool charIsControl(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -219,70 +199,70 @@ bool char_is_control(char character) return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } -FudStatus utf8_is_control(FudUtf8* character, bool* pred) +bool utf8IsControl(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_control); + return impl::isAsciiPredicate(character, charIsControl); } -bool char_is_graphical(char character) +bool charIsGraphical(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - return char_is_alphanumeric(character) || char_is_punctuation(character); + return charIsAlphanumeric(character) || charIsPunctuation(character); } -FudStatus utf8_is_graphical(FudUtf8* character, bool* pred) +bool utf8IsGraphical(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_graphical); + return impl::isAsciiPredicate(character, charIsGraphical); } -bool char_is_space(char character) +bool charIsSpace(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } -FudStatus utf8_is_space(FudUtf8* character, bool* pred) +bool utf8IsSpace(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_space); + return impl::isAsciiPredicate(character, charIsSpace); } -bool char_is_blank(char character) +bool charIsBlank(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t'; } -FudStatus utf8_is_blank(FudUtf8* character, bool* pred) +bool utf8IsBlank(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_blank); + return impl::isAsciiPredicate(character, charIsBlank); } -bool char_is_printable(char character) +bool charIsPrintable(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return (character >= ' ' && character <= '~'); } -FudStatus utf8_is_printable(FudUtf8* character, bool* pred) +bool utf8IsPrintable(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_printable); + return impl::isAsciiPredicate(character, charIsPrintable); } -bool char_is_punctuation(char character) +bool charIsPunctuation(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -290,14 +270,14 @@ bool char_is_punctuation(char character) (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } -FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred) +bool utf8IsPunctuation(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_punctuation); + return impl::isAsciiPredicate(character, charIsPunctuation); } -uint8_t char_to_lower(uint8_t character) +uint8_t charToLower(uint8_t character) { - if (char_is_uppercase(static_cast(character))) { + if (charIsUppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - upperA) + lowerA; @@ -305,22 +285,18 @@ uint8_t char_to_lower(uint8_t character) return character; } -FudUtf8* utf8_to_lower(FudUtf8* character) +FudUtf8 utf8ToLower(FudUtf8 character) { - if (character == nullptr) { - return character; - } - - static_cast(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{char_to_lower(static_cast(ascii.asChar()))}; + static_cast(character.transformAscii([](Ascii& ascii) { + ascii = Ascii{charToLower(static_cast(ascii.asChar()))}; })); return character; } -uint8_t char_to_upper(uint8_t character) +uint8_t charToUpper(uint8_t character) { - if (char_is_lowercase(static_cast(character))) { + if (charIsLowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - lowerA) + upperA; @@ -328,14 +304,10 @@ uint8_t char_to_upper(uint8_t character) return character; } -FudUtf8* utf8_to_upper(FudUtf8* character) +FudUtf8 utf8ToUpper(FudUtf8 character) { - if (character == nullptr) { - return character; - } - - static_cast(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{char_to_upper(static_cast(ascii.asChar()))}; + static_cast(character.transformAscii([](Ascii& ascii) { + ascii = Ascii{charToUpper(static_cast(ascii.asChar()))}; })); return character; diff --git a/source/fud_utf8_iterator.cpp b/source/fud_utf8_iterator.cpp index 2557dc0..a815c64 100644 --- a/source/fud_utf8_iterator.cpp +++ b/source/fud_utf8_iterator.cpp @@ -25,7 +25,7 @@ std::optional Utf8Iterator::peek() const return std::nullopt; } - auto character = FudUtf8::fromStringView(m_view, m_index); + auto character = FudUtf8::from(m_view, m_index); if (!character.valid()) { return std::nullopt; @@ -41,7 +41,7 @@ std::optional Utf8Iterator::next() return std::nullopt; } - auto character = FudUtf8::fromStringView(m_view, m_index); + auto character = FudUtf8::from(m_view, m_index); if (!character.valid()) { m_index = m_view.length(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0860c0d..c4d957b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -59,23 +59,17 @@ endfunction() fud_add_test(test_fud SOURCES test_fud.cpp) fud_add_test(test_allocator SOURCES test_allocator.cpp) fud_add_test(test_assert SOURCES test_assert.cpp) +# fud_add_test(test_c_file SOURCES test_c_file.cpp) fud_add_test(test_directory SOURCES test_directory.cpp) fud_add_test(test_format SOURCES test_format.cpp) fud_add_test(test_result SOURCES test_result.cpp) fud_add_test(test_span SOURCES test_span.cpp) fud_add_test(test_sqlite SOURCES test_sqlite.cpp) fud_add_test(test_string SOURCES test_string.cpp) +fud_add_test(test_utf8 SOURCES test_utf8.cpp) + # fud_add_test(test_deserialize_number SOURCES test_deserialize_number.cpp) # fud_add_test(test_ext_algorithm SOURCES test_algorithm.cpp) # fud_add_test(test_ext_array SOURCES # test_ext_array.cpp # test_ext_unique_array.cpp) -# fud_add_test(test_ext_utf8 SOURCES -# test_ext_utf8.cpp) -# fud_add_test(test_ext_string SOURCES -# test_ext_string.cpp -# test_ext_string_cxx.cpp) -# fud_add_test(test_ext_string_format SOURCES -# test_ext_string_format.cpp) - -# fud_add_test(test_c_file SOURCES test_c_file.cpp) diff --git a/test/test_common.hpp b/test/test_common.hpp index 05f86db..0ca8eb4 100644 --- a/test/test_common.hpp +++ b/test/test_common.hpp @@ -34,7 +34,16 @@ static_assert(sizeof(THREE_BYTE) == 3 + 1); static_assert(sizeof(FOUR_BYTE) == 4 + 1); #define CHQUOTE "why waste time learning, when ignorance is instantaneous?" -#define CHARACTER_SET "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +#define LOWERCASE_CHARS "abcdefghijklmnopqrstuvwxyz" +#define UPPERCASE_CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define DECIMAL_CHARS "0123456789" +#define ALPHA_CHARS LOWERCASE_CHARS UPPERCASE_CHARS +#define ALPHA_NUMERIC_CHARS ALPHA_CHARS DECIMAL_CHARS +#define PUNCTUATION_CHARS "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" +#define GRAPHICAL_CHARS ALPHA_NUMERIC_CHARS PUNCTUATION_CHARS +#define CHARACTER_SET LOWERCASE_CHARS " " UPPERCASE_CHARS + // NOLINTEND(cppcoreguidelines-macro-usage) constexpr size_t charSetSize = sizeof(CHARACTER_SET) - 1; diff --git a/test/test_format.cpp b/test/test_format.cpp index a373fec..319ed22 100644 --- a/test/test_format.cpp +++ b/test/test_format.cpp @@ -15,21 +15,17 @@ * limitations under the License. */ -#include "fud_array.hpp" -#include "fud_format.hpp" -#include "fud_span.hpp" +// #include "fud_array.hpp" +// #include "fud_format.hpp" +// #include "fud_span.hpp" #include "gtest/gtest.h" namespace fud { -TEST(FormatTest, BasicTest) +TEST(FormatTest, FormatSpecTest) { - auto buffer{Array::constFill('\0')}; - auto span = Span::makeCStringBuffer(buffer); - auto formatResult = format(span, "Hello, {}! {}", "world", 42); - printf("%s\n", buffer.data()); } } // namespace fud diff --git a/test/test_utf8.cpp b/test/test_utf8.cpp new file mode 100644 index 0000000..8f1d655 --- /dev/null +++ b/test/test_utf8.cpp @@ -0,0 +1,1163 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_algorithm.hpp" +#include "fud_allocator.hpp" +#include "fud_array.hpp" +#include "fud_string.hpp" +#include "fud_utf8.hpp" +#include "fud_utf8_iterator.hpp" +#include "fud_vector.hpp" +#include "test_common.hpp" +// #include "fud_format.hpp" +// #include "fud_span.hpp" + +#include "gtest/gtest.h" + +namespace fud { + +constexpr size_t validAsciiSize = INT8_MAX + 1; +constexpr size_t invalidAsciiSize = UINT8_MAX + 1 - validAsciiSize; + +constexpr size_t numControlChars = 33; +constexpr char printableCharOffset = 0x20; + +constexpr auto invalidAscii = FudUtf8::invalidAsciiCode.character(); + +auto generateInvalidAsciiChars() +{ + Iota iota{}; + return generate([]() { return Array{}; }, [&]() { return iota().value(); }); +} + +TEST(Utf8Test, Utf8Creation) +{ + const Array threeByte = {THREE_BYTE}; + + FudUtf8 utf8Point{FudUtf8::make(threeByte)}; + ASSERT_NE(utf8Point.data(), nullptr); + ASSERT_EQ(utf8Point.size(), 3); + ASSERT_NE(utf8Point.hash(), -1); + + const Array asciiLetter = {'A'}; + utf8Point = FudUtf8::make(asciiLetter); + ASSERT_NE(utf8Point.data(), nullptr); + ASSERT_EQ(utf8Point.size(), 1); + + const Array twoByte = {TWO_BYTE}; + utf8Point = FudUtf8::make(twoByte); + ASSERT_NE(utf8Point.data(), nullptr); + ASSERT_EQ(utf8Point.size(), 2); + ASSERT_NE(utf8Point.hash(), -1); + + Array fourByte = { + static_cast(FOUR_BYTE[0]), + static_cast(FOUR_BYTE[1]), + static_cast(FOUR_BYTE[2]), + static_cast(FOUR_BYTE[3])}; + utf8Point = FudUtf8::make(fourByte); + ASSERT_NE(utf8Point.data(), nullptr); + ASSERT_EQ(utf8Point.size(), 4); + ASSERT_NE(utf8Point.hash(), -1); + + const Array invalidBytes = {0xFF, 0xFF, 0xFF, 0xFF}; + utf8Point = FudUtf8::make(invalidBytes); + ASSERT_EQ(utf8Point.data(), nullptr); + ASSERT_EQ(utf8Point.size(), 0); + ASSERT_EQ(utf8Point.hash(), -1); +} + +TEST(Utf8Test, Utf8MultiByte) +{ + Array data{MULTI_BYTE_LITERAL}; + constexpr size_t bufSize = data.size(); + EXPECT_EQ(data[bufSize - 1], '\0'); + + class FixedAllocator final : public Allocator { + private: + Array m_memory{}; + size_t m_allocated{0}; + + public: + virtual ~FixedAllocator() override final = default; + + virtual Result allocate(size_t bytes, size_t alignment) override final + { + static_cast(alignment); + if (bytes > m_memory.size() - m_allocated) { + return FudStatus::AllocFailure; + } + auto* data = m_memory.data() + m_allocated; + m_allocated += bytes; + return data; + } + + virtual FudStatus deallocate(void* pointer, size_t bytes) override final + { + static_cast(pointer); + static_cast(bytes); + return FudStatus::Success; + } + + virtual bool isEqual(const Allocator& rhs) const override final + { + return &rhs == this; + } + }; + FixedAllocator fixedAllocator; + + auto stringBufferRes{String::makeFromCString(MULTI_BYTE_LITERAL, &fixedAllocator)}; + + ASSERT_TRUE(stringBufferRes.isOkay()); + auto stringBuffer{stringBufferRes.takeOkay()}; + EXPECT_EQ(stringBuffer.size(), bufSize); + EXPECT_EQ(stringBuffer.size(), sizeof(data)); + EXPECT_EQ(stringBuffer.length(), bufSize - 1); + EXPECT_TRUE(stringBuffer.nullTerminated()); + EXPECT_TRUE(stringBuffer.valid()); + ASSERT_TRUE(stringBuffer.utf8Valid()); + + Utf8Iterator utf8Iter{stringBuffer}; + auto characterOpt = utf8Iter.next(); + ASSERT_TRUE(characterOpt.has_value()); + + // MULTI_BYTE_LITERAL "test今日素敵はですねƩ®😀z" + const Array multiByteCharacters{ + FudUtf8::make(Utf8Variant{Ascii{'t'}}), + FudUtf8::make(Utf8Variant{Ascii{'e'}}), + FudUtf8::make(Utf8Variant{Ascii{'s'}}), + FudUtf8::make(Utf8Variant{Ascii{'t'}}), + FudUtf8::from(StringView{sizeof("今"), "今"}, 0), + FudUtf8::from(StringView{sizeof("日"), "日"}, 0), + FudUtf8::from(StringView{sizeof("素"), "素"}, 0), + FudUtf8::from(StringView{sizeof("敵"), "敵"}, 0), + FudUtf8::from(StringView{sizeof("は"), "は"}, 0), + FudUtf8::from(StringView{sizeof("で"), "で"}, 0), + FudUtf8::from(StringView{sizeof("す"), "す"}, 0), + FudUtf8::from(StringView{sizeof("ね"), "ね"}, 0), + FudUtf8::from(StringView{sizeof("Ʃ"), "Ʃ"}, 0), + FudUtf8::from(StringView{sizeof("®"), "®"}, 0), + FudUtf8::from(StringView{sizeof("😀"), "😀"}, 0), + FudUtf8::make(Utf8Variant{Ascii{'z'}}), + }; + + size_t idx = 0; + while (characterOpt.has_value()) { + auto character = *characterOpt; + if (character != FudUtf8{Utf8Variant{Ascii{'\0'}}}) { + EXPECT_TRUE(character.size() >= 1); + ASSERT_LT(idx, multiByteCharacters.size()); + EXPECT_EQ(character.size(), multiByteCharacters[idx].size()); + EXPECT_EQ(character, multiByteCharacters[idx]); + EXPECT_TRUE(multiByteCharacters[idx].valid()); + if (character != multiByteCharacters[idx]) { + printf("idx = %zu, %.*s\n", idx, static_cast(character.size()), character.data()); + } + idx++; + } + characterOpt = utf8Iter.next(); + } + utf8Iter.reset(); + ASSERT_TRUE(utf8Iter.next().has_value()); + + FudUtf8 invalid = FudUtf8::invalidAscii(); + ASSERT_FALSE(invalid.valid()); + ASSERT_EQ(invalid.size(), 0); + ASSERT_EQ(invalid.data(), nullptr); + ASSERT_EQ(invalid.hash(), -1); +} + +TEST(Utf8Test, Utf8IsAscii) +{ + ASSERT_FALSE(charIsAscii(invalidAscii)); + + Iota charIota{0, 1, validAsciiSize}; + + ASSERT_TRUE(allOf( + [&]() -> std::optional { + auto value = charIota(); + return value ? std::optional(static_cast(*value)) : std::nullopt; + }, + charIsAscii)); + + Iota invalidCharIota{validAsciiSize, 1, invalidAsciiSize}; + + ASSERT_FALSE(anyOf( + [&]() -> std::optional { + auto value = invalidCharIota(); + return value ? std::optional(static_cast(*value)) : std::nullopt; + }, + charIsAscii)); + + FudUtf8 unicode{FudUtf8::invalidAscii()}; + ASSERT_FALSE(utf8IsAscii(unicode)); + + charIota.set(0); + ASSERT_TRUE(allOf( + [&]() -> std::optional { + auto value = charIota(); + return value ? std::optional(FudUtf8::make(static_cast(*value))) : std::nullopt; + }, + utf8IsAscii)); + + invalidCharIota.set(invalidAsciiSize); + ASSERT_FALSE(anyOf( + [&]() -> std::optional { + auto value = invalidCharIota(); + return value ? std::optional(FudUtf8::make(static_cast(*value))) : std::nullopt; + }, + utf8IsAscii)); +} + +TEST(Utf8Test, Utf8IsAlphaNumeric) +{ + constexpr size_t numAlphaNumericChars = 26 * 2 + 10; + Array alphaNumericCharLiteral{ALPHA_NUMERIC_CHARS}; + Array alphaNumericChars{}; + copyMem(alphaNumericChars, alphaNumericCharLiteral); +#if 0 + ASSERT_TRUE(allOf(alphaNumericChars, charIsAlphanumeric)); + + auto alphaNumericSetResult{StaticSet::makeFromArray(alphaNumericChars)}; + ASSERT_TRUE(alphaNumericSetResult.isOkay()); + auto alphaNumericSet{std::move(alphaNumericSetResult.getOkay())}; + + constexpr size_t numNonAlphaNumericChars = validAsciiSize - numAlphaNumericChars; + FixedVector nonAlphaNumericChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!alphaNumericSet.isKey(idx)) { + ASSERT_TRUE(nonAlphaNumericChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonAlphaNumericChars, charIsAlphanumeric)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, charIsAlphanumeric)); + + ASSERT_TRUE(allOf( + map(alphaNumericChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + ASSERT_FALSE(anyOf( + map(nonAlphaNumericChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + + ASSERT_TRUE(allOf( + map(alphaNumericChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + ASSERT_FALSE(anyOf( + map(nonAlphaNumericChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alphanumeric)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_alphanumeric(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alphanumeric(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alphanumeric(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alphanumeric(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(alphaNumericChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alphanumeric(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonAlphaNumericChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alphanumeric(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonAlphaNumericChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alphanumeric(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +#endif +} + +#if 0 +TEST(Utf8Test, Utf8IsAlpha) +{ + constexpr size_t numAlphaChars = sizeof(ALPHA_CHARS) - 1; + Array alphaCharLiteral{ALPHA_CHARS}; + Array alphaChars{}; + copyMem(alphaChars, alphaCharLiteral); + + ASSERT_TRUE(allOf(alphaChars, ext_lib_char_is_alpha)); + + auto alphaSetResult{StaticSet::makeFromArray(alphaChars)}; + ASSERT_TRUE(alphaSetResult.isOkay()); + auto alphaSet{std::move(alphaSetResult.getOkay())}; + + constexpr size_t numNonAlphaChars = validAsciiSize - numAlphaChars; + FixedVector nonAlphaChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!alphaSet.isKey(idx)) { + ASSERT_TRUE(nonAlphaChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonAlphaChars, ext_lib_char_is_alpha)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_alpha)); + + ASSERT_TRUE(allOf( + map(alphaChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + ASSERT_FALSE(anyOf( + map(nonAlphaChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + + ASSERT_TRUE(allOf( + map(alphaChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + ASSERT_FALSE(anyOf( + map(nonAlphaChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_alpha)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_alpha(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alpha(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alpha(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_alpha(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(alphaChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alpha(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonAlphaChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alpha(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonAlphaChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_alpha(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsLower) +{ + constexpr size_t numLowerChars = 26; + Array lowerCharLiteral{LOWERCASE_CHARS}; + Array lowerChars{}; + copyMem(lowerChars, lowerCharLiteral); + + ASSERT_TRUE(allOf(lowerChars, ext_lib_char_is_lowercase)); + + auto lowerSetResult{StaticSet::makeFromArray(lowerChars)}; + ASSERT_TRUE(lowerSetResult.isOkay()); + auto lowerSet{std::move(lowerSetResult.getOkay())}; + + constexpr size_t numNonLowerChars = validAsciiSize - numLowerChars; + FixedVector nonLowerChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!lowerSet.isKey(idx)) { + ASSERT_TRUE(nonLowerChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonLowerChars, ext_lib_char_is_lowercase)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_lowercase)); + + ASSERT_TRUE(allOf( + map(lowerChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + ASSERT_FALSE(anyOf( + map(nonLowerChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + + ASSERT_TRUE(allOf( + map(lowerChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + ASSERT_FALSE(anyOf( + map(nonLowerChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_lowercase)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_lowercase(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_lowercase(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_lowercase(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_lowercase(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(lowerChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_lowercase(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonLowerChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_lowercase(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonLowerChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_lowercase(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsUpper) +{ + constexpr size_t numUpperChars = 26; + Array upperCharLiteral{UPPERCASE_CHARS}; + Array upperChars{}; + copyMem(upperChars, upperCharLiteral); + + ASSERT_TRUE(allOf(upperChars, ext_lib_char_is_uppercase)); + + auto upperSetResult{StaticSet::makeFromArray(upperChars)}; + ASSERT_TRUE(upperSetResult.isOkay()); + auto upperSet{std::move(upperSetResult.getOkay())}; + + constexpr size_t numNonUpperChars = validAsciiSize - numUpperChars; + FixedVector nonUpperChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!upperSet.isKey(idx)) { + ASSERT_TRUE(nonUpperChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonUpperChars, ext_lib_char_is_uppercase)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_uppercase)); + + ASSERT_TRUE(allOf( + map(upperChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + ASSERT_FALSE(anyOf( + map(nonUpperChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + + ASSERT_TRUE(allOf( + map(upperChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + ASSERT_FALSE(anyOf( + map(nonUpperChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_uppercase)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_uppercase(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_uppercase(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_uppercase(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_uppercase(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(upperChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_uppercase(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonUpperChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_uppercase(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonUpperChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_uppercase(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsDigit) +{ + constexpr size_t numDigitChars = 10; + Array digitCharLiteral{"0123456789"}; + Array digitChars{}; + copyMem(digitChars, digitCharLiteral); + + ASSERT_TRUE(allOf(digitChars, ext_lib_char_is_digit)); + + auto digitSetResult{StaticSet::makeFromArray(digitChars)}; + ASSERT_TRUE(digitSetResult.isOkay()); + auto digitSet{std::move(digitSetResult.getOkay())}; + + constexpr size_t numNonDigitChars = validAsciiSize - numDigitChars; + FixedVector nonDigitChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!digitSet.isKey(idx)) { + ASSERT_TRUE(nonDigitChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonDigitChars, ext_lib_char_is_digit)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_digit)); + + ASSERT_TRUE(allOf( + map(digitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + ASSERT_FALSE(anyOf( + map(nonDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + + ASSERT_TRUE(allOf( + map(digitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + ASSERT_FALSE(anyOf( + map(nonDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_digit)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_digit(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_digit(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_digit(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_digit(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(digitChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_digit(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonDigitChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_digit(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonDigitChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_digit(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsHexDigit) +{ + constexpr size_t numHexDigitChars = 6 * 2 + 10; + Array hexDigitCharLiteral{"abcdefABCDEF0123456789"}; + Array hexDigitChars{}; + copyMem(hexDigitChars, hexDigitCharLiteral); + + ASSERT_TRUE(allOf(hexDigitChars, ext_lib_char_is_hex_digit)); + + auto hexDigitSetResult{StaticSet::makeFromArray(hexDigitChars)}; + ASSERT_TRUE(hexDigitSetResult.isOkay()); + auto hexDigitSet{std::move(hexDigitSetResult.getOkay())}; + + constexpr size_t numNonHexDigitChars = validAsciiSize - numHexDigitChars; + FixedVector nonHexDigitChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!hexDigitSet.isKey(idx)) { + ASSERT_TRUE(nonHexDigitChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonHexDigitChars, ext_lib_char_is_hex_digit)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_hex_digit)); + + ASSERT_TRUE(allOf( + map(hexDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + ASSERT_FALSE(anyOf( + map(nonHexDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + + ASSERT_TRUE(allOf( + map(hexDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + ASSERT_FALSE(anyOf( + map(nonHexDigitChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_hex_digit)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_hex_digit(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_hex_digit(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_hex_digit(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_hex_digit(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(hexDigitChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_hex_digit(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonHexDigitChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_hex_digit(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonHexDigitChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_hex_digit(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsControl) +{ + auto controlChars = generateIndexArray([](int idx) { return static_cast(idx); }); + constexpr const char deleteChar = 0x7F; + controlChars.back() = deleteChar; + + ASSERT_TRUE(allOf(controlChars, ext_lib_char_is_control)); + + constexpr size_t numNonControlChars = 256 - numControlChars; + auto nonControlChars = generateIndexArray([](int idx) { + return static_cast(idx + printableCharOffset); + }); + ASSERT_FALSE(anyOf(nonControlChars, ext_lib_char_is_control)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_control)); + + ASSERT_TRUE(allOf( + map(controlChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + ASSERT_FALSE(anyOf( + map(nonControlChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + + ASSERT_TRUE(allOf( + map(controlChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + ASSERT_FALSE(anyOf( + map(nonControlChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_control)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_control(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_control(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_control(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_control(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(controlChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_control(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonControlChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_control(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonControlChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_control(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsGraphical) +{ + constexpr size_t numGraphicalChars = sizeof(GRAPHICAL_CHARS) - 1; + Array graphicalCharLiteral{GRAPHICAL_CHARS}; + Array graphicalChars{}; + copyMem(graphicalChars, graphicalCharLiteral); + + ASSERT_TRUE(allOf(graphicalChars, ext_lib_char_is_graphical)); + + auto graphicalSetResult{StaticSet::makeFromArray(graphicalChars)}; + ASSERT_TRUE(graphicalSetResult.isOkay()); + auto graphicalSet{std::move(graphicalSetResult.getOkay())}; + + constexpr size_t numNonGraphicalChars = validAsciiSize - numGraphicalChars; + FixedVector nonGraphicalChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!graphicalSet.isKey(idx)) { + ASSERT_TRUE(nonGraphicalChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonGraphicalChars, ext_lib_char_is_graphical)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_graphical)); + + ASSERT_TRUE(allOf( + map(graphicalChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + ASSERT_FALSE(anyOf( + map(nonGraphicalChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + + ASSERT_TRUE(allOf( + map(graphicalChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + ASSERT_FALSE(anyOf( + map(nonGraphicalChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_graphical)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_graphical(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_graphical(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_graphical(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_graphical(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(graphicalChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_graphical(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonGraphicalChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_graphical(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonGraphicalChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_graphical(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsSpace) +{ + constexpr size_t numSpaceChars = sizeof(" \t\v\r\n") - 1; + Array spaceCharLiteral{" \t\v\r\n"}; + Array spaceChars{}; + copyMem(spaceChars, spaceCharLiteral); + + ASSERT_TRUE(allOf(spaceChars, ext_lib_char_is_space)); + + auto spaceSetResult{StaticSet::makeFromArray(spaceChars)}; + ASSERT_TRUE(spaceSetResult.isOkay()); + auto spaceSet{std::move(spaceSetResult.getOkay())}; + + constexpr size_t numNonSpaceChars = validAsciiSize - numSpaceChars; + FixedVector nonSpaceChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!spaceSet.isKey(idx)) { + ASSERT_TRUE(nonSpaceChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonSpaceChars, ext_lib_char_is_space)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_space)); + + ASSERT_TRUE(allOf( + map(spaceChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + ASSERT_FALSE(anyOf( + map(nonSpaceChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + + ASSERT_TRUE(allOf( + map(spaceChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + ASSERT_FALSE(anyOf( + map(nonSpaceChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_space)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_space(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_space(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_space(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_space(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(spaceChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_space(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonSpaceChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_space(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonSpaceChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_space(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsBlank) +{ + constexpr size_t numBlankChars = sizeof(" \t") - 1; + Array blankCharLiteral{" \t"}; + Array blankChars{}; + copyMem(blankChars, blankCharLiteral); + + ASSERT_TRUE(allOf(blankChars, ext_lib_char_is_blank)); + + auto blankSetResult{StaticSet::makeFromArray(blankChars)}; + ASSERT_TRUE(blankSetResult.isOkay()); + auto blankSet{std::move(blankSetResult.getOkay())}; + + constexpr size_t numNonBlankChars = validAsciiSize - numBlankChars; + FixedVector nonBlankChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!blankSet.isKey(idx)) { + ASSERT_TRUE(nonBlankChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonBlankChars, ext_lib_char_is_blank)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_blank)); + + ASSERT_TRUE(allOf( + map(blankChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + ASSERT_FALSE(anyOf( + map(nonBlankChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + + ASSERT_TRUE(allOf( + map(blankChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + ASSERT_FALSE(anyOf( + map(nonBlankChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_blank)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_blank(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_blank(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_blank(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_blank(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(blankChars, [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_blank(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonBlankChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_blank(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonBlankChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_blank(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsPrintable) +{ + constexpr size_t numPrintableChars = validAsciiSize - numControlChars; + auto printableChars = generateIndexArray([](int idx) { + return static_cast(idx + printableCharOffset); + }); + + ASSERT_TRUE(allOf(printableChars, ext_lib_char_is_printable)); + + auto printableSetResult{StaticSet::makeFromArray(printableChars)}; + ASSERT_TRUE(printableSetResult.isOkay()); + auto printableSet{std::move(printableSetResult.getOkay())}; + + constexpr size_t numNonPrintableChars = validAsciiSize - numPrintableChars; + FixedVector nonPrintableChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!printableSet.isKey(idx)) { + ASSERT_TRUE(nonPrintableChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonPrintableChars, ext_lib_char_is_printable)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_printable)); + + ASSERT_TRUE(allOf( + map(printableChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + ASSERT_FALSE(anyOf( + map(nonPrintableChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + + ASSERT_TRUE(allOf( + map(printableChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + ASSERT_FALSE(anyOf( + map(nonPrintableChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_printable)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_printable(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_printable(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_printable(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_printable(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(printableChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_printable(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonPrintableChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_printable(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonPrintableChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_printable(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} + +TEST(Utf8Test, Utf8IsPunctuation) +{ + constexpr size_t numPunctuationChars = sizeof(PUNCTUATION_CHARS) - 1; + Array punctuationCharLiteral{PUNCTUATION_CHARS}; + Array punctuationChars{}; + copyMem(punctuationChars, punctuationCharLiteral); + + ASSERT_TRUE(allOf(punctuationChars, ext_lib_char_is_punctuation)); + + auto punctuationSetResult{StaticSet::makeFromArray(punctuationChars)}; + ASSERT_TRUE(punctuationSetResult.isOkay()); + auto punctuationSet{std::move(punctuationSetResult.getOkay())}; + + constexpr size_t numNonPunctuationChars = validAsciiSize - numPunctuationChars; + FixedVector nonPunctuationChars{}; + for (char idx = 0; idx < INT8_MAX; ++idx) { + if (!punctuationSet.isKey(idx)) { + ASSERT_TRUE(nonPunctuationChars.pushBack(idx)); + } + } + ASSERT_FALSE(anyOf(nonPunctuationChars, ext_lib_char_is_punctuation)); + + auto invalidAsciiChars = generateInvalidAsciiChars(); + ASSERT_FALSE(anyOf(invalidAsciiChars, ext_lib_char_is_punctuation)); + + ASSERT_TRUE(allOf( + map(punctuationChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + ASSERT_FALSE(anyOf( + map(nonPunctuationChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + + ASSERT_TRUE(allOf( + map(punctuationChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + ASSERT_FALSE(anyOf( + map(nonPunctuationChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + ASSERT_FALSE(anyOf( + map(invalidAsciiChars, [](auto input) { return ExtUtf8Char4{static_cast(input)}; }), + ext_lib_char4_is_punctuation)); + + Utf8 utf8{invalidAscii}; + bool isAscii = false; + ASSERT_EQ(ext_lib_utf8_is_punctuation(nullptr, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_punctuation(&utf8, nullptr), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_punctuation(nullptr, &isAscii), ExtNullPointer); + ASSERT_EQ(ext_lib_utf8_is_punctuation(&utf8, &isAscii), ExtInvalidInput); + ASSERT_FALSE(isAscii); + + ASSERT_TRUE(allOf( + map(punctuationChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_punctuation(&utf8Letter, &predicate); + return isPredicateStatus == ExtSuccess && predicate; + })); + ASSERT_FALSE(anyOf( + map(nonPunctuationChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_punctuation(&utf8Letter, &predicate); + return isPredicateStatus != ExtSuccess || predicate; + })); + ASSERT_FALSE(allOf( + map(nonPunctuationChars, + [](char letter) { return FudUtf8::makeUtf8(ExtUtf8Char4{static_cast(letter)}); }), + [](auto& utf8Letter) { + bool predicate = false; + auto isPredicateStatus = ext_lib_utf8_is_punctuation(&utf8Letter, &predicate); + return isPredicateStatus == ExtInvalidInput && !predicate; + })); +} +#endif + +} // namespace fud -- cgit v1.2.3