From 0b860bb5dd6d2007db605291d239a6a9d41f57d1 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Mon, 23 Sep 2024 07:36:16 -0500 Subject: Installable library. --- include/fud_utf8.hpp | 86 +++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 44 deletions(-) (limited to 'include/fud_utf8.hpp') diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index da1a5fe..99766d4 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -31,43 +31,44 @@ namespace fud { using utf8 = unsigned char; -struct StringView; +class String; +class StringView; -constexpr uint8_t ASCII_MASK = 0x7F; +constexpr utf8 ASCII_MASK = 0x7F; -constexpr uint8_t UTF8_MB_PATTERN_MASK = 0xC0; -constexpr uint8_t UTF8_MB_PATTERN = 0x80; -constexpr uint8_t UTF8_MB_MASK = static_cast(~UTF8_MB_PATTERN_MASK); +constexpr utf8 UTF8_MB_PATTERN_MASK = 0xC0; +constexpr utf8 UTF8_MB_PATTERN = 0x80; +constexpr utf8 UTF8_MB_MASK = static_cast(~UTF8_MB_PATTERN_MASK); -constexpr uint8_t UTF8_2B_PATTERN_MASK = 0xE0; -constexpr uint8_t UTF8_2B_PATTERN = 0xC0; -constexpr uint8_t UTF8_2B_MASK = static_cast(~UTF8_2B_PATTERN_MASK); +constexpr utf8 UTF8_2B_PATTERN_MASK = 0xE0; +constexpr utf8 UTF8_2B_PATTERN = 0xC0; +constexpr utf8 UTF8_2B_MASK = static_cast(~UTF8_2B_PATTERN_MASK); -constexpr uint8_t UTF8_3B_PATTERN_MASK = 0xF0; -constexpr uint8_t UTF8_3B_PATTERN = 0xE0; -constexpr uint8_t UTF8_3B_MASK = static_cast(~UTF8_3B_PATTERN_MASK); +constexpr utf8 UTF8_3B_PATTERN_MASK = 0xF0; +constexpr utf8 UTF8_3B_PATTERN = 0xE0; +constexpr utf8 UTF8_3B_MASK = static_cast(~UTF8_3B_PATTERN_MASK); -constexpr uint8_t UTF8_4B_PATTERN_MASK = 0xF8; -constexpr uint8_t UTF8_4B_PATTERN = 0xF0; -constexpr uint8_t UTF8_4B_MASK = static_cast(~UTF8_4B_PATTERN_MASK); +constexpr utf8 UTF8_4B_PATTERN_MASK = 0xF8; +constexpr utf8 UTF8_4B_PATTERN = 0xF0; +constexpr utf8 UTF8_4B_MASK = static_cast(~UTF8_4B_PATTERN_MASK); namespace privateImpl { -constexpr bool validUtf8MB(uint8_t code) noexcept +constexpr bool validUtf8MB(utf8 code) noexcept { return (code & UTF8_MB_PATTERN_MASK) == UTF8_MB_PATTERN; } } // namespace privateImpl struct Ascii { - Array characters; + Array characters; constexpr Ascii() noexcept = default; - constexpr explicit Ascii(uint8_t chr) noexcept : characters{{chr}} + constexpr explicit Ascii(utf8 chr) noexcept : characters{{chr}} { } - [[nodiscard]] constexpr uint8_t character() const noexcept + [[nodiscard]] constexpr utf8 character() const noexcept { return characters[0]; } @@ -87,9 +88,9 @@ struct Ascii { return valid(characters[0]); } - static constexpr bool valid(uint8_t character) noexcept + static constexpr bool valid(utf8 character) noexcept { - return static_cast(character & ~ASCII_MASK) == 0; + return static_cast(character & ~ASCII_MASK) == 0; } auto operator<=>(const Ascii& other) const noexcept = default; @@ -107,10 +108,10 @@ static_assert(std::is_standard_layout_v); */ struct Utf82Byte { - constexpr Utf82Byte(uint8_t first, uint8_t second) noexcept : characters{{first, second}} + constexpr Utf82Byte(utf8 first, utf8 second) noexcept : characters{{first, second}} { } - Array characters; + Array characters; static constexpr size_t size() noexcept { return 2; @@ -121,18 +122,18 @@ struct Utf82Byte { return valid(first(), second()); } - static constexpr bool valid(uint8_t first, uint8_t second) noexcept + static constexpr bool valid(utf8 first, utf8 second) noexcept { using privateImpl::validUtf8MB; return ((first & UTF8_2B_PATTERN_MASK) == UTF8_2B_PATTERN) && validUtf8MB(second); } - [[nodiscard]] constexpr uint8_t first() const noexcept + [[nodiscard]] constexpr utf8 first() const noexcept { return characters[0]; } - [[nodiscard]] constexpr uint8_t second() const noexcept + [[nodiscard]] constexpr utf8 second() const noexcept { return characters[1]; } @@ -141,11 +142,11 @@ struct Utf82Byte { }; struct Utf83Byte { - constexpr Utf83Byte(uint8_t first, uint8_t second, uint8_t third) noexcept : characters{{first, second, third}} + constexpr Utf83Byte(utf8 first, utf8 second, utf8 third) noexcept : characters{{first, second, third}} { } - Array characters; + Array characters; static constexpr size_t size() noexcept { @@ -157,23 +158,23 @@ struct Utf83Byte { return valid(first(), second(), third()); } - static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third) noexcept + static constexpr bool valid(utf8 first, utf8 second, utf8 third) noexcept { using privateImpl::validUtf8MB; return ((first & UTF8_3B_PATTERN_MASK) == UTF8_3B_PATTERN) && validUtf8MB(second) && validUtf8MB(third); } - [[nodiscard]] constexpr uint8_t first() const noexcept + [[nodiscard]] constexpr utf8 first() const noexcept { return characters[0]; } - [[nodiscard]] constexpr uint8_t second() const noexcept + [[nodiscard]] constexpr utf8 second() const noexcept { return characters[1]; } - [[nodiscard]] constexpr uint8_t third() const noexcept + [[nodiscard]] constexpr utf8 third() const noexcept { return characters[2]; } @@ -182,12 +183,12 @@ struct Utf83Byte { }; struct Utf84Byte { - constexpr Utf84Byte(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept : + constexpr Utf84Byte(utf8 first, utf8 second, utf8 third, utf8 fourth) noexcept : characters{{first, second, third, fourth}} { } - Array characters; + Array characters; static constexpr size_t size() noexcept { @@ -199,7 +200,7 @@ struct Utf84Byte { return valid(first(), second(), third(), fourth()); } - static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept + static constexpr bool valid(utf8 first, utf8 second, utf8 third, utf8 fourth) noexcept { using privateImpl::validUtf8MB; if ((first & UTF8_4B_PATTERN_MASK) != UTF8_4B_PATTERN) { @@ -208,22 +209,22 @@ struct Utf84Byte { return validUtf8MB(second) && validUtf8MB(third) && validUtf8MB(fourth); } - [[nodiscard]] constexpr uint8_t first() const noexcept + [[nodiscard]] constexpr utf8 first() const noexcept { return characters[0]; } - [[nodiscard]] constexpr uint8_t second() const noexcept + [[nodiscard]] constexpr utf8 second() const noexcept { return characters[1]; } - [[nodiscard]] constexpr uint8_t third() const noexcept + [[nodiscard]] constexpr utf8 third() const noexcept { return characters[2]; } - [[nodiscard]] constexpr uint8_t fourth() const noexcept + [[nodiscard]] constexpr utf8 fourth() const noexcept { return characters[3]; } @@ -246,9 +247,6 @@ static_assert(Utf8TypeSet.m_values[1] == static_cast(Utf8Type::Utf82Byt static_assert(Utf8TypeSet.m_values[2] == static_cast(Utf8Type::Utf83Byte)); static_assert(Utf8TypeSet.m_values[3] == static_cast(Utf8Type::Utf84Byte)); -class String; -class StringView; - struct FudUtf8 { Utf8Variant m_variant{Utf8Variant{Ascii{}}}; @@ -287,9 +285,9 @@ struct FudUtf8 { static constexpr FudUtf8 invalidAscii() { - FudUtf8 utf8{}; - utf8.m_variant = Ascii{invalidAsciiCode}; - return utf8; + FudUtf8 character{}; + character.m_variant = Ascii{invalidAsciiCode}; + return character; } [[nodiscard]] constexpr Utf8Type getType() const -- cgit v1.2.3