summaryrefslogtreecommitdiff
path: root/source/fud_utf8.cpp
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2024-10-23 13:21:10 -0500
committerDominick Allen <djallen@librehumanitas.org>2024-10-23 13:21:10 -0500
commit5cc7cbc3704ec255eb5d0ac53b2cc0fcb1221d63 (patch)
tree169d4d2d8dffe014851712e31a55036deb0c7c0c /source/fud_utf8.cpp
parentb2dbcb55e2832c373fecb4033a3ed77e5dbc77aa (diff)
String conversion and parsing format spec.
Diffstat (limited to 'source/fud_utf8.cpp')
-rw-r--r--source/fud_utf8.cpp207
1 files changed, 136 insertions, 71 deletions
diff --git a/source/fud_utf8.cpp b/source/fud_utf8.cpp
index 4d617da..bffb5c1 100644
--- a/source/fud_utf8.cpp
+++ b/source/fud_utf8.cpp
@@ -27,7 +27,6 @@ FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept
return invalidAscii();
}
-
return from(StringView{fudString}, index);
}
@@ -69,20 +68,26 @@ FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept
return invalidAscii();
}
-bool charIsAscii(char character)
+namespace classify {
+
+bool isAscii(char character)
+{
+ return isAscii(static_cast<utf8>(character));
+}
+
+bool isAscii(utf8 character)
{
- return static_cast<uint8_t>(character & ~ASCII_MASK) == 0;
+ return (character & ~ASCII_MASK) == 0;
}
-bool utf8IsAscii(FudUtf8 character)
+bool isAscii(FudUtf8 character)
{
return character.getType() == Utf8Type::Ascii && character.valid();
}
namespace impl {
-template <typename Predicate>
-bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate)
+bool isAsciiPredicate(FudUtf8 character, bool (*predicate)(char))
{
auto maybeAscii = character.getAscii();
if (!maybeAscii.has_value()) {
@@ -90,92 +95,122 @@ bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate)
}
auto asciiChar = *maybeAscii;
- return std::forward<Predicate>(predicate)(asciiChar.asChar());
+ return predicate(asciiChar.asChar());
}
} // namespace impl
-bool charIsAlphanumeric(char character)
+bool isAlphanumeric(char character)
{
- if (!charIsAscii(character)) {
+ return isAlphanumeric(static_cast<utf8>(character));
+}
+
+bool isAlphanumeric(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
- if (charIsAlpha(character)) {
+ if (isAlpha(character)) {
return true;
}
- return charIsDigit(character);
+ return isDigit(character);
+}
+
+bool isAlphanumeric(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isAlphanumeric);
}
-bool utf8IsAlphanumeric(FudUtf8 character)
+bool isAlpha(char character)
{
- return impl::isAsciiPredicate(character, charIsAlphanumeric);
+ return isAlpha(static_cast<utf8>(character));
}
-bool charIsAlpha(char character)
+bool isAlpha(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
- if (charIsUppercase(character)) {
+ if (isUppercase(character)) {
return true;
}
- return charIsLowercase(character);
+ return isLowercase(character);
}
-bool utf8IsAlpha(FudUtf8 character)
+bool isAlpha(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsAlpha);
+ return impl::isAsciiPredicate(character, isAlpha);
}
-bool charIsLowercase(char character)
+bool isLowercase(char character)
{
- if (!charIsAscii(character)) {
+ return isLowercase(static_cast<utf8>(character));
+}
+
+bool isLowercase(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
return 'a' <= character && character <= 'z';
}
-bool utf8IsLowercase(FudUtf8 character)
+bool isLowercase(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isLowercase);
+}
+
+bool isUppercase(char character)
{
- return impl::isAsciiPredicate(character, charIsLowercase);
+ return isUppercase(static_cast<utf8>(character));
}
-bool charIsUppercase(char character)
+bool isUppercase(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
return 'A' <= character && character <= 'Z';
}
-bool utf8IsUppercase(FudUtf8 character)
+bool isUppercase(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsUppercase);
+ return impl::isAsciiPredicate(character, isUppercase);
}
-bool charIsDigit(char character)
+bool isDigit(char character)
{
- if (!charIsAscii(character)) {
+ return isDigit(static_cast<utf8>(character));
+}
+
+bool isDigit(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
return '0' <= character && character <= '9';
}
-bool utf8IsDigit(FudUtf8 character)
+bool isDigit(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isDigit);
+}
+
+bool isHexDigit(char character)
{
- return impl::isAsciiPredicate(character, charIsDigit);
+ return isHexDigit(static_cast<utf8>(character));
}
-bool charIsHexDigit(char character)
+bool isHexDigit(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
@@ -183,86 +218,116 @@ bool charIsHexDigit(char character)
('A' <= character && character <= 'F');
}
-bool utf8IsHexDigit(FudUtf8 character)
+bool isHexDigit(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isHexDigit);
+}
+
+bool isControl(char character)
{
- return impl::isAsciiPredicate(character, charIsHexDigit);
+ return isControl(static_cast<utf8>(character));
}
-bool charIsControl(char character)
+bool isControl(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
constexpr char maxControlChar = 0x1F;
constexpr const char deleteChar = 0x7F;
- return ((static_cast<uint8_t>(character) <= maxControlChar)) || character == deleteChar;
+ return ((static_cast<utf8>(character) <= maxControlChar)) || character == deleteChar;
}
-bool utf8IsControl(FudUtf8 character)
+bool isControl(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsControl);
+ return impl::isAsciiPredicate(character, isControl);
}
-bool charIsGraphical(char character)
+bool isGraphical(char character)
{
- if (!charIsAscii(character)) {
+ return isGraphical(static_cast<utf8>(character));
+}
+
+bool isGraphical(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
- return charIsAlphanumeric(character) || charIsPunctuation(character);
+ return isAlphanumeric(character) || isPunctuation(character);
+}
+
+bool isGraphical(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isGraphical);
}
-bool utf8IsGraphical(FudUtf8 character)
+bool isSpace(char character)
{
- return impl::isAsciiPredicate(character, charIsGraphical);
+ return isSpace(static_cast<utf8>(character));
}
-bool charIsSpace(char character)
+bool isSpace(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v';
}
-bool utf8IsSpace(FudUtf8 character)
+bool isSpace(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsSpace);
+ return impl::isAsciiPredicate(character, isSpace);
}
-bool charIsBlank(char character)
+bool isBlank(char character)
{
- if (!charIsAscii(character)) {
+ return isBlank(static_cast<utf8>(character));
+}
+
+bool isBlank(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
return character == ' ' || character == '\t';
}
-bool utf8IsBlank(FudUtf8 character)
+bool isBlank(FudUtf8 character)
+{
+ return impl::isAsciiPredicate(character, isBlank);
+}
+
+bool isPrintable(char character)
{
- return impl::isAsciiPredicate(character, charIsBlank);
+ return isPrintable(static_cast<utf8>(character));
}
-bool charIsPrintable(char character)
+bool isPrintable(utf8 character)
{
- if (!charIsAscii(character)) {
+ if (!isAscii(character)) {
return false;
}
return (character >= ' ' && character <= '~');
}
-bool utf8IsPrintable(FudUtf8 character)
+bool isPrintable(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsPrintable);
+ return impl::isAsciiPredicate(character, isPrintable);
}
-bool charIsPunctuation(char character)
+bool isPunctuation(char character)
{
- if (!charIsAscii(character)) {
+ return isPunctuation(static_cast<utf8>(character));
+}
+
+bool isPunctuation(utf8 character)
+{
+ if (!isAscii(character)) {
return false;
}
@@ -270,45 +335,45 @@ bool charIsPunctuation(char character)
(character >= '[' && character <= '`') || (character >= '{' && character <= '~');
}
-bool utf8IsPunctuation(FudUtf8 character)
+bool isPunctuation(FudUtf8 character)
{
- return impl::isAsciiPredicate(character, charIsPunctuation);
+ return impl::isAsciiPredicate(character, isPunctuation);
}
+} // namespace classify
+
uint8_t charToLower(uint8_t character)
{
- if (charIsUppercase(static_cast<char>(character))) {
+ if (classify::isUppercase(static_cast<char>(character))) {
constexpr uint8_t lowerA = 'a';
constexpr uint8_t upperA = 'A';
- return static_cast<uint8_t>(character - upperA) + lowerA;
+ return static_cast<utf8>(character - upperA) + lowerA;
}
return character;
}
FudUtf8 utf8ToLower(FudUtf8 character)
{
- static_cast<void>(character.transformAscii([](Ascii& ascii) {
- ascii = Ascii{charToLower(static_cast<uint8_t>(ascii.asChar()))};
- }));
+ static_cast<void>(
+ character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToLower(static_cast<utf8>(ascii.asChar()))}; }));
return character;
}
uint8_t charToUpper(uint8_t character)
{
- if (charIsLowercase(static_cast<char>(character))) {
+ if (classify::isLowercase(static_cast<char>(character))) {
constexpr uint8_t lowerA = 'a';
constexpr uint8_t upperA = 'A';
- return static_cast<uint8_t>(character - lowerA) + upperA;
+ return static_cast<utf8>(character - lowerA) + upperA;
}
return character;
}
FudUtf8 utf8ToUpper(FudUtf8 character)
{
- static_cast<void>(character.transformAscii([](Ascii& ascii) {
- ascii = Ascii{charToUpper(static_cast<uint8_t>(ascii.asChar()))};
- }));
+ static_cast<void>(
+ character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToUpper(static_cast<utf8>(ascii.asChar()))}; }));
return character;
}