From 5cc7cbc3704ec255eb5d0ac53b2cc0fcb1221d63 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Wed, 23 Oct 2024 13:21:10 -0500 Subject: String conversion and parsing format spec. --- source/fud_utf8.cpp | 207 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 136 insertions(+), 71 deletions(-) (limited to 'source/fud_utf8.cpp') diff --git a/source/fud_utf8.cpp b/source/fud_utf8.cpp index 4d617da..bffb5c1 100644 --- a/source/fud_utf8.cpp +++ b/source/fud_utf8.cpp @@ -27,7 +27,6 @@ FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept return invalidAscii(); } - return from(StringView{fudString}, index); } @@ -69,20 +68,26 @@ FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept return invalidAscii(); } -bool charIsAscii(char character) +namespace classify { + +bool isAscii(char character) +{ + return isAscii(static_cast(character)); +} + +bool isAscii(utf8 character) { - return static_cast(character & ~ASCII_MASK) == 0; + return (character & ~ASCII_MASK) == 0; } -bool utf8IsAscii(FudUtf8 character) +bool isAscii(FudUtf8 character) { return character.getType() == Utf8Type::Ascii && character.valid(); } namespace impl { -template -bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate) +bool isAsciiPredicate(FudUtf8 character, bool (*predicate)(char)) { auto maybeAscii = character.getAscii(); if (!maybeAscii.has_value()) { @@ -90,92 +95,122 @@ bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate) } auto asciiChar = *maybeAscii; - return std::forward(predicate)(asciiChar.asChar()); + return predicate(asciiChar.asChar()); } } // namespace impl -bool charIsAlphanumeric(char character) +bool isAlphanumeric(char character) { - if (!charIsAscii(character)) { + return isAlphanumeric(static_cast(character)); +} + +bool isAlphanumeric(utf8 character) +{ + if (!isAscii(character)) { return false; } - if (charIsAlpha(character)) { + if (isAlpha(character)) { return true; } - return charIsDigit(character); + return isDigit(character); +} + +bool isAlphanumeric(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isAlphanumeric); } -bool utf8IsAlphanumeric(FudUtf8 character) +bool isAlpha(char character) { - return impl::isAsciiPredicate(character, charIsAlphanumeric); + return isAlpha(static_cast(character)); } -bool charIsAlpha(char character) +bool isAlpha(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } - if (charIsUppercase(character)) { + if (isUppercase(character)) { return true; } - return charIsLowercase(character); + return isLowercase(character); } -bool utf8IsAlpha(FudUtf8 character) +bool isAlpha(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsAlpha); + return impl::isAsciiPredicate(character, isAlpha); } -bool charIsLowercase(char character) +bool isLowercase(char character) { - if (!charIsAscii(character)) { + return isLowercase(static_cast(character)); +} + +bool isLowercase(utf8 character) +{ + if (!isAscii(character)) { return false; } return 'a' <= character && character <= 'z'; } -bool utf8IsLowercase(FudUtf8 character) +bool isLowercase(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isLowercase); +} + +bool isUppercase(char character) { - return impl::isAsciiPredicate(character, charIsLowercase); + return isUppercase(static_cast(character)); } -bool charIsUppercase(char character) +bool isUppercase(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } return 'A' <= character && character <= 'Z'; } -bool utf8IsUppercase(FudUtf8 character) +bool isUppercase(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsUppercase); + return impl::isAsciiPredicate(character, isUppercase); } -bool charIsDigit(char character) +bool isDigit(char character) { - if (!charIsAscii(character)) { + return isDigit(static_cast(character)); +} + +bool isDigit(utf8 character) +{ + if (!isAscii(character)) { return false; } return '0' <= character && character <= '9'; } -bool utf8IsDigit(FudUtf8 character) +bool isDigit(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isDigit); +} + +bool isHexDigit(char character) { - return impl::isAsciiPredicate(character, charIsDigit); + return isHexDigit(static_cast(character)); } -bool charIsHexDigit(char character) +bool isHexDigit(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } @@ -183,86 +218,116 @@ bool charIsHexDigit(char character) ('A' <= character && character <= 'F'); } -bool utf8IsHexDigit(FudUtf8 character) +bool isHexDigit(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isHexDigit); +} + +bool isControl(char character) { - return impl::isAsciiPredicate(character, charIsHexDigit); + return isControl(static_cast(character)); } -bool charIsControl(char character) +bool isControl(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } constexpr char maxControlChar = 0x1F; constexpr const char deleteChar = 0x7F; - return ((static_cast(character) <= maxControlChar)) || character == deleteChar; + return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } -bool utf8IsControl(FudUtf8 character) +bool isControl(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsControl); + return impl::isAsciiPredicate(character, isControl); } -bool charIsGraphical(char character) +bool isGraphical(char character) { - if (!charIsAscii(character)) { + return isGraphical(static_cast(character)); +} + +bool isGraphical(utf8 character) +{ + if (!isAscii(character)) { return false; } - return charIsAlphanumeric(character) || charIsPunctuation(character); + return isAlphanumeric(character) || isPunctuation(character); +} + +bool isGraphical(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isGraphical); } -bool utf8IsGraphical(FudUtf8 character) +bool isSpace(char character) { - return impl::isAsciiPredicate(character, charIsGraphical); + return isSpace(static_cast(character)); } -bool charIsSpace(char character) +bool isSpace(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } -bool utf8IsSpace(FudUtf8 character) +bool isSpace(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsSpace); + return impl::isAsciiPredicate(character, isSpace); } -bool charIsBlank(char character) +bool isBlank(char character) { - if (!charIsAscii(character)) { + return isBlank(static_cast(character)); +} + +bool isBlank(utf8 character) +{ + if (!isAscii(character)) { return false; } return character == ' ' || character == '\t'; } -bool utf8IsBlank(FudUtf8 character) +bool isBlank(FudUtf8 character) +{ + return impl::isAsciiPredicate(character, isBlank); +} + +bool isPrintable(char character) { - return impl::isAsciiPredicate(character, charIsBlank); + return isPrintable(static_cast(character)); } -bool charIsPrintable(char character) +bool isPrintable(utf8 character) { - if (!charIsAscii(character)) { + if (!isAscii(character)) { return false; } return (character >= ' ' && character <= '~'); } -bool utf8IsPrintable(FudUtf8 character) +bool isPrintable(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsPrintable); + return impl::isAsciiPredicate(character, isPrintable); } -bool charIsPunctuation(char character) +bool isPunctuation(char character) { - if (!charIsAscii(character)) { + return isPunctuation(static_cast(character)); +} + +bool isPunctuation(utf8 character) +{ + if (!isAscii(character)) { return false; } @@ -270,45 +335,45 @@ bool charIsPunctuation(char character) (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } -bool utf8IsPunctuation(FudUtf8 character) +bool isPunctuation(FudUtf8 character) { - return impl::isAsciiPredicate(character, charIsPunctuation); + return impl::isAsciiPredicate(character, isPunctuation); } +} // namespace classify + uint8_t charToLower(uint8_t character) { - if (charIsUppercase(static_cast(character))) { + if (classify::isUppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; - return static_cast(character - upperA) + lowerA; + return static_cast(character - upperA) + lowerA; } return character; } FudUtf8 utf8ToLower(FudUtf8 character) { - static_cast(character.transformAscii([](Ascii& ascii) { - ascii = Ascii{charToLower(static_cast(ascii.asChar()))}; - })); + static_cast( + character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToLower(static_cast(ascii.asChar()))}; })); return character; } uint8_t charToUpper(uint8_t character) { - if (charIsLowercase(static_cast(character))) { + if (classify::isLowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; - return static_cast(character - lowerA) + upperA; + return static_cast(character - lowerA) + upperA; } return character; } FudUtf8 utf8ToUpper(FudUtf8 character) { - static_cast(character.transformAscii([](Ascii& ascii) { - ascii = Ascii{charToUpper(static_cast(ascii.asChar()))}; - })); + static_cast( + character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToUpper(static_cast(ascii.asChar()))}; })); return character; } -- cgit v1.2.3