summaryrefslogtreecommitdiff
path: root/include/fud_utf8.hpp
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2024-09-23 07:36:16 -0500
committerDominick Allen <djallen@librehumanitas.org>2024-09-23 07:36:16 -0500
commit0b860bb5dd6d2007db605291d239a6a9d41f57d1 (patch)
treefab140e03a3665236503d1405de9d33ba58ccc4a /include/fud_utf8.hpp
parent7da829d48f9059c83ab9cada2c850621e8bbd3f3 (diff)
Installable library.
Diffstat (limited to 'include/fud_utf8.hpp')
-rw-r--r--include/fud_utf8.hpp86
1 files changed, 42 insertions, 44 deletions
diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp
index da1a5fe..99766d4 100644
--- a/include/fud_utf8.hpp
+++ b/include/fud_utf8.hpp
@@ -31,43 +31,44 @@ namespace fud {
using utf8 = unsigned char;
-struct StringView;
+class String;
+class StringView;
-constexpr uint8_t ASCII_MASK = 0x7F;
+constexpr utf8 ASCII_MASK = 0x7F;
-constexpr uint8_t UTF8_MB_PATTERN_MASK = 0xC0;
-constexpr uint8_t UTF8_MB_PATTERN = 0x80;
-constexpr uint8_t UTF8_MB_MASK = static_cast<uint8_t>(~UTF8_MB_PATTERN_MASK);
+constexpr utf8 UTF8_MB_PATTERN_MASK = 0xC0;
+constexpr utf8 UTF8_MB_PATTERN = 0x80;
+constexpr utf8 UTF8_MB_MASK = static_cast<utf8>(~UTF8_MB_PATTERN_MASK);
-constexpr uint8_t UTF8_2B_PATTERN_MASK = 0xE0;
-constexpr uint8_t UTF8_2B_PATTERN = 0xC0;
-constexpr uint8_t UTF8_2B_MASK = static_cast<uint8_t>(~UTF8_2B_PATTERN_MASK);
+constexpr utf8 UTF8_2B_PATTERN_MASK = 0xE0;
+constexpr utf8 UTF8_2B_PATTERN = 0xC0;
+constexpr utf8 UTF8_2B_MASK = static_cast<utf8>(~UTF8_2B_PATTERN_MASK);
-constexpr uint8_t UTF8_3B_PATTERN_MASK = 0xF0;
-constexpr uint8_t UTF8_3B_PATTERN = 0xE0;
-constexpr uint8_t UTF8_3B_MASK = static_cast<uint8_t>(~UTF8_3B_PATTERN_MASK);
+constexpr utf8 UTF8_3B_PATTERN_MASK = 0xF0;
+constexpr utf8 UTF8_3B_PATTERN = 0xE0;
+constexpr utf8 UTF8_3B_MASK = static_cast<utf8>(~UTF8_3B_PATTERN_MASK);
-constexpr uint8_t UTF8_4B_PATTERN_MASK = 0xF8;
-constexpr uint8_t UTF8_4B_PATTERN = 0xF0;
-constexpr uint8_t UTF8_4B_MASK = static_cast<uint8_t>(~UTF8_4B_PATTERN_MASK);
+constexpr utf8 UTF8_4B_PATTERN_MASK = 0xF8;
+constexpr utf8 UTF8_4B_PATTERN = 0xF0;
+constexpr utf8 UTF8_4B_MASK = static_cast<utf8>(~UTF8_4B_PATTERN_MASK);
namespace privateImpl {
-constexpr bool validUtf8MB(uint8_t code) noexcept
+constexpr bool validUtf8MB(utf8 code) noexcept
{
return (code & UTF8_MB_PATTERN_MASK) == UTF8_MB_PATTERN;
}
} // namespace privateImpl
struct Ascii {
- Array<uint8_t, 1> characters;
+ Array<utf8, 1> characters;
constexpr Ascii() noexcept = default;
- constexpr explicit Ascii(uint8_t chr) noexcept : characters{{chr}}
+ constexpr explicit Ascii(utf8 chr) noexcept : characters{{chr}}
{
}
- [[nodiscard]] constexpr uint8_t character() const noexcept
+ [[nodiscard]] constexpr utf8 character() const noexcept
{
return characters[0];
}
@@ -87,9 +88,9 @@ struct Ascii {
return valid(characters[0]);
}
- static constexpr bool valid(uint8_t character) noexcept
+ static constexpr bool valid(utf8 character) noexcept
{
- return static_cast<uint8_t>(character & ~ASCII_MASK) == 0;
+ return static_cast<utf8>(character & ~ASCII_MASK) == 0;
}
auto operator<=>(const Ascii& other) const noexcept = default;
@@ -107,10 +108,10 @@ static_assert(std::is_standard_layout_v<Ascii>);
*/
struct Utf82Byte {
- constexpr Utf82Byte(uint8_t first, uint8_t second) noexcept : characters{{first, second}}
+ constexpr Utf82Byte(utf8 first, utf8 second) noexcept : characters{{first, second}}
{
}
- Array<uint8_t, 2> characters;
+ Array<utf8, 2> characters;
static constexpr size_t size() noexcept
{
return 2;
@@ -121,18 +122,18 @@ struct Utf82Byte {
return valid(first(), second());
}
- static constexpr bool valid(uint8_t first, uint8_t second) noexcept
+ static constexpr bool valid(utf8 first, utf8 second) noexcept
{
using privateImpl::validUtf8MB;
return ((first & UTF8_2B_PATTERN_MASK) == UTF8_2B_PATTERN) && validUtf8MB(second);
}
- [[nodiscard]] constexpr uint8_t first() const noexcept
+ [[nodiscard]] constexpr utf8 first() const noexcept
{
return characters[0];
}
- [[nodiscard]] constexpr uint8_t second() const noexcept
+ [[nodiscard]] constexpr utf8 second() const noexcept
{
return characters[1];
}
@@ -141,11 +142,11 @@ struct Utf82Byte {
};
struct Utf83Byte {
- constexpr Utf83Byte(uint8_t first, uint8_t second, uint8_t third) noexcept : characters{{first, second, third}}
+ constexpr Utf83Byte(utf8 first, utf8 second, utf8 third) noexcept : characters{{first, second, third}}
{
}
- Array<uint8_t, 3> characters;
+ Array<utf8, 3> characters;
static constexpr size_t size() noexcept
{
@@ -157,23 +158,23 @@ struct Utf83Byte {
return valid(first(), second(), third());
}
- static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third) noexcept
+ static constexpr bool valid(utf8 first, utf8 second, utf8 third) noexcept
{
using privateImpl::validUtf8MB;
return ((first & UTF8_3B_PATTERN_MASK) == UTF8_3B_PATTERN) && validUtf8MB(second) && validUtf8MB(third);
}
- [[nodiscard]] constexpr uint8_t first() const noexcept
+ [[nodiscard]] constexpr utf8 first() const noexcept
{
return characters[0];
}
- [[nodiscard]] constexpr uint8_t second() const noexcept
+ [[nodiscard]] constexpr utf8 second() const noexcept
{
return characters[1];
}
- [[nodiscard]] constexpr uint8_t third() const noexcept
+ [[nodiscard]] constexpr utf8 third() const noexcept
{
return characters[2];
}
@@ -182,12 +183,12 @@ struct Utf83Byte {
};
struct Utf84Byte {
- constexpr Utf84Byte(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept :
+ constexpr Utf84Byte(utf8 first, utf8 second, utf8 third, utf8 fourth) noexcept :
characters{{first, second, third, fourth}}
{
}
- Array<uint8_t, 4> characters;
+ Array<utf8, 4> characters;
static constexpr size_t size() noexcept
{
@@ -199,7 +200,7 @@ struct Utf84Byte {
return valid(first(), second(), third(), fourth());
}
- static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept
+ static constexpr bool valid(utf8 first, utf8 second, utf8 third, utf8 fourth) noexcept
{
using privateImpl::validUtf8MB;
if ((first & UTF8_4B_PATTERN_MASK) != UTF8_4B_PATTERN) {
@@ -208,22 +209,22 @@ struct Utf84Byte {
return validUtf8MB(second) && validUtf8MB(third) && validUtf8MB(fourth);
}
- [[nodiscard]] constexpr uint8_t first() const noexcept
+ [[nodiscard]] constexpr utf8 first() const noexcept
{
return characters[0];
}
- [[nodiscard]] constexpr uint8_t second() const noexcept
+ [[nodiscard]] constexpr utf8 second() const noexcept
{
return characters[1];
}
- [[nodiscard]] constexpr uint8_t third() const noexcept
+ [[nodiscard]] constexpr utf8 third() const noexcept
{
return characters[2];
}
- [[nodiscard]] constexpr uint8_t fourth() const noexcept
+ [[nodiscard]] constexpr utf8 fourth() const noexcept
{
return characters[3];
}
@@ -246,9 +247,6 @@ static_assert(Utf8TypeSet.m_values[1] == static_cast<uint8_t>(Utf8Type::Utf82Byt
static_assert(Utf8TypeSet.m_values[2] == static_cast<uint8_t>(Utf8Type::Utf83Byte));
static_assert(Utf8TypeSet.m_values[3] == static_cast<uint8_t>(Utf8Type::Utf84Byte));
-class String;
-class StringView;
-
struct FudUtf8 {
Utf8Variant m_variant{Utf8Variant{Ascii{}}};
@@ -287,9 +285,9 @@ struct FudUtf8 {
static constexpr FudUtf8 invalidAscii()
{
- FudUtf8 utf8{};
- utf8.m_variant = Ascii{invalidAsciiCode};
- return utf8;
+ FudUtf8 character{};
+ character.m_variant = Ascii{invalidAsciiCode};
+ return character;
}
[[nodiscard]] constexpr Utf8Type getType() const