From 6a27a2a4032e88fa9154ef0f0741edc584f7a701 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Sun, 20 Oct 2024 10:48:19 -0500 Subject: Lots of work. --- include/fud_utf8.hpp | 130 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 94 insertions(+), 36 deletions(-) (limited to 'include/fud_utf8.hpp') diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 539e0f4..3b1a6b7 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -19,8 +19,8 @@ #define FUD_UTF8_HPP #include "fud_array.hpp" -#include "fud_status.hpp" #include "fud_unique_array.hpp" +#include "fud_c_string.hpp" #include #include @@ -28,6 +28,8 @@ namespace fud { + + using utf8 = unsigned char; class String; @@ -110,6 +112,18 @@ struct Utf82Byte { constexpr Utf82Byte(utf8 first, utf8 second) noexcept : characters{{first, second}} { } + + __attribute__((nonnull)) + constexpr Utf82Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 2); + if (length < 2) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + } + Array characters; static constexpr size_t size() noexcept { @@ -145,6 +159,18 @@ struct Utf83Byte { { } + __attribute__((nonnull)) + constexpr Utf83Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 3); + if (length < 3) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + characters[2] = static_cast(letterStr[2]); + } + Array characters; static constexpr size_t size() noexcept @@ -187,6 +213,19 @@ struct Utf84Byte { { } + __attribute__((nonnull)) + constexpr Utf84Byte(const char* letterStr) noexcept : characters{} + { + auto length = cStringLength(letterStr, 4); + if (length < 4) { + return; + } + characters[0] = static_cast(letterStr[0]); + characters[1] = static_cast(letterStr[1]); + characters[2] = static_cast(letterStr[2]); + characters[3] = static_cast(letterStr[3]); + } + Array characters; static constexpr size_t size() noexcept @@ -250,11 +289,12 @@ struct FudUtf8 { Utf8Variant m_variant{Utf8Variant{Ascii{}}}; static constexpr Ascii invalidAsciiCode{Ascii{0xFF}}; - static FudUtf8 fromString(const String& fudString, size_t index) noexcept; - static FudUtf8 fromStringView(StringView view, size_t index) noexcept; - // static FudUtf8 fromStringView(const StringView& view, size_t index) noexcept; - static constexpr FudUtf8 makeUtf8(const Array& data) + static FudUtf8 from(const String& fudString, size_t index) noexcept; + + static FudUtf8 from(StringView view, size_t index) noexcept; + + static constexpr FudUtf8 make(const Array& data) { FudUtf8 unicode{}; if (Ascii::valid(data[0])) { @@ -271,7 +311,12 @@ struct FudUtf8 { return unicode; } - static constexpr FudUtf8 makeUtf8(const Ascii& utf8Char) + static constexpr FudUtf8 make(utf8 utf8Char) + { + return make(Ascii{utf8Char}); + } + + static constexpr FudUtf8 make(Ascii utf8Char) { FudUtf8 unicode{{Utf8Variant{Ascii{}}}}; if (utf8Char.valid()) { @@ -282,6 +327,15 @@ struct FudUtf8 { return unicode; } + static constexpr FudUtf8 make(Utf8Variant utf8Variant) { + FudUtf8 unicode{}; + unicode.m_variant = utf8Variant; + if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) { + unicode.m_variant = invalidAsciiCode; + } + return unicode; + } + static constexpr FudUtf8 invalidAscii() { FudUtf8 character{}; @@ -460,89 +514,93 @@ struct FudUtf8 { }; /** \brief Checks if a character is ascii. */ -bool char_is_ascii(char character); +[[nodiscard]] bool charIsAscii(char character); -FudStatus utf8_is_ascii(FudUtf8& character, bool& isAscii); +[[nodiscard]] bool utf8IsAscii(FudUtf8 character); /** \brief Checks if a character is alphanumeric. */ -bool char_is_alphanumeric(char character); +[[nodiscard]] bool charIsAlphanumeric(char character); /** \brief Checks if a character is alphanumeric. */ -FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character); /** \brief Checks if a character is alphabetic. */ -bool char_is_alpha(char character); +[[nodiscard]] bool charIsAlpha(char character); /** \brief Checks if a character is alphabetic. */ -FudStatus utf8_is_alpha(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsAlpha(FudUtf8 character); /** \brief Checks if a character is lowercase. */ -bool char_is_lowercase(char character); +[[nodiscard]] bool charIsLowercase(char character); /** \brief Checks if a character is lowercase. */ -FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsLowercase(FudUtf8 character); /** \brief Checks if a character is an uppercase character. */ -bool char_is_uppercase(char character); +[[nodiscard]] bool charIsUppercase(char character); /** \brief Checks if a character is uppercase. */ -FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsUppercase(FudUtf8 character); /** \brief Checks if a character is a digit. */ -bool char_is_digit(char character); +[[nodiscard]] bool charIsDigit(char character); /** \brief Checks if a character is a digit. */ -FudStatus utf8_is_digit(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsDigit(FudUtf8 character); /** \brief Checks if a character is a hexadecimal character. */ -bool char_is_hex_digit(char character); +[[nodiscard]] bool charIsHexDigit(char character); /** \brief Checks if a character is a hexadecimal digit. */ -FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character); /** \brief Checks if a character is a control character. */ -bool char_is_control(char character); +[[nodiscard]] bool charIsControl(char character); /** \brief Checks if a character is a control character. */ -FudStatus utf8_is_control(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsControl(FudUtf8 character); /** \brief Checks if a character is a graphical character. */ -bool char_is_graphical(char character); +[[nodiscard]] bool charIsGraphical(char character); /** \brief Checks if a character is a graphical character. */ -FudStatus utf8_is_graphical(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsGraphical(FudUtf8 character); /** \brief Checks if a character is a space character. */ -bool char_is_space(char character); +[[nodiscard]] bool charIsSpace(char character); /** \brief Checks if a character is a space character. */ -FudStatus utf8_is_space(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsSpace(FudUtf8 character); /** \brief Checks if a character is a blank character. */ -bool char_is_blank(char character); +[[nodiscard]] bool charIsBlank(char character); /** \brief Checks if a character is a blank character. */ -FudStatus utf8_is_blank(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsBlank(FudUtf8 character); /** \brief Checks if a character is a printable character. */ -bool char_is_printable(char character); +[[nodiscard]] bool charIsPrintable(char character); /** \brief Checks if a character is a printable character. */ -FudStatus utf8_is_printable(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsPrintable(FudUtf8 character); /** \brief Checks if a character is a punctuation character. */ -bool char_is_punctuation(char character); +[[nodiscard]] bool charIsPunctuation(char character); /** \brief Checks if a character is a punctuation character. */ -FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred); +[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character); -uint8_t char_to_lower(uint8_t character); +/** \brief Converts character to lowercase if valid. */ +uint8_t charToLower(uint8_t character); -FudUtf8* utf8_to_lower(FudUtf8* character); +/** \brief Converts character to lowercase if valid. */ +FudUtf8 utf8ToLower(FudUtf8 character); -uint8_t char_to_upper(uint8_t character); +/** \brief Converts character to uppercase if valid. */ +uint8_t charToUpper(uint8_t character); -FudUtf8* utf8_to_upper(FudUtf8* character); +/** \brief Converts character to uppercase if valid. */ +FudUtf8 utf8ToUpper(FudUtf8 character); } // namespace fud -- cgit v1.2.3