diff options
author | Dominick Allen <djallen@librehumanitas.org> | 2024-10-20 10:48:19 -0500 |
---|---|---|
committer | Dominick Allen <djallen@librehumanitas.org> | 2024-10-20 10:48:19 -0500 |
commit | 6a27a2a4032e88fa9154ef0f0741edc584f7a701 (patch) | |
tree | 92ca58cbcdd2c1d11b7d69deb0d4925d0f979a3f /source/fud_utf8.cpp | |
parent | e94db4695e236b42ae1be44b2605075161d5144f (diff) |
Lots of work.
Diffstat (limited to 'source/fud_utf8.cpp')
-rw-r--r-- | source/fud_utf8.cpp | 174 |
1 files changed, 73 insertions, 101 deletions
diff --git a/source/fud_utf8.cpp b/source/fud_utf8.cpp index ee8137a..4d617da 100644 --- a/source/fud_utf8.cpp +++ b/source/fud_utf8.cpp @@ -19,26 +19,19 @@ #include "fud_string.hpp" -#include <new> // IWYU pragma: keep - this is for placement new overloads. - namespace fud { -FudUtf8 FudUtf8::fromString(const String& fudString, size_t index) noexcept +FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } - return fromStringView(StringView{fudString}, index); + return from(StringView{fudString}, index); } -// FudUtf8 FudUtf8::fromStringView(const StringView& view, size_t index) noexcept -// { -// return fromStringView(StringView{view}, index); -// } - -FudUtf8 FudUtf8::fromStringView(StringView view, size_t index) noexcept +FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept { auto viewLocal{view}; auto len = viewLocal.length(); @@ -76,126 +69,113 @@ FudUtf8 FudUtf8::fromStringView(StringView view, size_t index) noexcept return invalidAscii(); } -bool char_is_ascii(char character) +bool charIsAscii(char character) { return static_cast<uint8_t>(character & ~ASCII_MASK) == 0; } -FudStatus utf8_is_ascii(FudUtf8* character, bool* isAscii) +bool utf8IsAscii(FudUtf8 character) { - if (anyAreNull(character, isAscii)) { - return FudStatus::NullPointer; - } - - *isAscii = character->getType() == Utf8Type::Ascii && character->valid(); - - return FudStatus::Success; + return character.getType() == Utf8Type::Ascii && character.valid(); } namespace impl { -/* Assumes that predicate is not a null pointer! */ template <typename Predicate> -FudStatus isAsciiPredicate(FudUtf8* character, bool* pred, Predicate&& predicate) +bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate) { - if (anyAreNull(character, pred)) { - return FudStatus::NullPointer; - } - - auto maybeAscii = character->getAscii(); + auto maybeAscii = character.getAscii(); if (!maybeAscii.has_value()) { - return FudStatus::ArgumentInvalid; + return false; } auto asciiChar = *maybeAscii; - *pred = std::forward<Predicate>(predicate)(asciiChar.asChar()); - - return FudStatus::Success; + return std::forward<Predicate>(predicate)(asciiChar.asChar()); } } // namespace impl -bool char_is_alphanumeric(char character) +bool charIsAlphanumeric(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - if (char_is_alpha(character)) { + if (charIsAlpha(character)) { return true; } - return char_is_digit(character); + return charIsDigit(character); } -FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred) +bool utf8IsAlphanumeric(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_alphanumeric); + return impl::isAsciiPredicate(character, charIsAlphanumeric); } -bool char_is_alpha(char character) +bool charIsAlpha(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - if (char_is_uppercase(character)) { + if (charIsUppercase(character)) { return true; } - return char_is_lowercase(character); + return charIsLowercase(character); } -FudStatus utf8_is_alpha(FudUtf8* character, bool* pred) +bool utf8IsAlpha(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_alpha); + return impl::isAsciiPredicate(character, charIsAlpha); } -bool char_is_lowercase(char character) +bool charIsLowercase(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return 'a' <= character && character <= 'z'; } -FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred) +bool utf8IsLowercase(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_lowercase); + return impl::isAsciiPredicate(character, charIsLowercase); } -bool char_is_uppercase(char character) +bool charIsUppercase(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return 'A' <= character && character <= 'Z'; } -FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred) +bool utf8IsUppercase(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_uppercase); + return impl::isAsciiPredicate(character, charIsUppercase); } -bool char_is_digit(char character) +bool charIsDigit(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return '0' <= character && character <= '9'; } -FudStatus utf8_is_digit(FudUtf8* character, bool* pred) +bool utf8IsDigit(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_digit); + return impl::isAsciiPredicate(character, charIsDigit); } -bool char_is_hex_digit(char character) +bool charIsHexDigit(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -203,14 +183,14 @@ bool char_is_hex_digit(char character) ('A' <= character && character <= 'F'); } -FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred) +bool utf8IsHexDigit(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_hex_digit); + return impl::isAsciiPredicate(character, charIsHexDigit); } -bool char_is_control(char character) +bool charIsControl(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -219,70 +199,70 @@ bool char_is_control(char character) return ((static_cast<uint8_t>(character) <= maxControlChar)) || character == deleteChar; } -FudStatus utf8_is_control(FudUtf8* character, bool* pred) +bool utf8IsControl(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_control); + return impl::isAsciiPredicate(character, charIsControl); } -bool char_is_graphical(char character) +bool charIsGraphical(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } - return char_is_alphanumeric(character) || char_is_punctuation(character); + return charIsAlphanumeric(character) || charIsPunctuation(character); } -FudStatus utf8_is_graphical(FudUtf8* character, bool* pred) +bool utf8IsGraphical(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_graphical); + return impl::isAsciiPredicate(character, charIsGraphical); } -bool char_is_space(char character) +bool charIsSpace(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } -FudStatus utf8_is_space(FudUtf8* character, bool* pred) +bool utf8IsSpace(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_space); + return impl::isAsciiPredicate(character, charIsSpace); } -bool char_is_blank(char character) +bool charIsBlank(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t'; } -FudStatus utf8_is_blank(FudUtf8* character, bool* pred) +bool utf8IsBlank(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_blank); + return impl::isAsciiPredicate(character, charIsBlank); } -bool char_is_printable(char character) +bool charIsPrintable(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } return (character >= ' ' && character <= '~'); } -FudStatus utf8_is_printable(FudUtf8* character, bool* pred) +bool utf8IsPrintable(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_printable); + return impl::isAsciiPredicate(character, charIsPrintable); } -bool char_is_punctuation(char character) +bool charIsPunctuation(char character) { - if (!char_is_ascii(character)) { + if (!charIsAscii(character)) { return false; } @@ -290,14 +270,14 @@ bool char_is_punctuation(char character) (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } -FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred) +bool utf8IsPunctuation(FudUtf8 character) { - return impl::isAsciiPredicate(character, pred, char_is_punctuation); + return impl::isAsciiPredicate(character, charIsPunctuation); } -uint8_t char_to_lower(uint8_t character) +uint8_t charToLower(uint8_t character) { - if (char_is_uppercase(static_cast<char>(character))) { + if (charIsUppercase(static_cast<char>(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast<uint8_t>(character - upperA) + lowerA; @@ -305,22 +285,18 @@ uint8_t char_to_lower(uint8_t character) return character; } -FudUtf8* utf8_to_lower(FudUtf8* character) +FudUtf8 utf8ToLower(FudUtf8 character) { - if (character == nullptr) { - return character; - } - - static_cast<void>(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{char_to_lower(static_cast<uint8_t>(ascii.asChar()))}; + static_cast<void>(character.transformAscii([](Ascii& ascii) { + ascii = Ascii{charToLower(static_cast<uint8_t>(ascii.asChar()))}; })); return character; } -uint8_t char_to_upper(uint8_t character) +uint8_t charToUpper(uint8_t character) { - if (char_is_lowercase(static_cast<char>(character))) { + if (charIsLowercase(static_cast<char>(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast<uint8_t>(character - lowerA) + upperA; @@ -328,14 +304,10 @@ uint8_t char_to_upper(uint8_t character) return character; } -FudUtf8* utf8_to_upper(FudUtf8* character) +FudUtf8 utf8ToUpper(FudUtf8 character) { - if (character == nullptr) { - return character; - } - - static_cast<void>(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{char_to_upper(static_cast<uint8_t>(ascii.asChar()))}; + static_cast<void>(character.transformAscii([](Ascii& ascii) { + ascii = Ascii{charToUpper(static_cast<uint8_t>(ascii.asChar()))}; })); return character; |