From 5cc7cbc3704ec255eb5d0ac53b2cc0fcb1221d63 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Wed, 23 Oct 2024 13:21:10 -0500 Subject: String conversion and parsing format spec. --- include/fud_utf8.hpp | 116 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 37 deletions(-) (limited to 'include/fud_utf8.hpp') diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 3b1a6b7..50e50aa 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -19,8 +19,8 @@ #define FUD_UTF8_HPP #include "fud_array.hpp" -#include "fud_unique_array.hpp" #include "fud_c_string.hpp" +#include "fud_unique_array.hpp" #include #include @@ -28,8 +28,6 @@ namespace fud { - - using utf8 = unsigned char; class String; @@ -113,8 +111,7 @@ struct Utf82Byte { { } - __attribute__((nonnull)) - constexpr Utf82Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf82Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 2); if (length < 2) { @@ -159,8 +156,7 @@ struct Utf83Byte { { } - __attribute__((nonnull)) - constexpr Utf83Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf83Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 3); if (length < 3) { @@ -213,8 +209,7 @@ struct Utf84Byte { { } - __attribute__((nonnull)) - constexpr Utf84Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf84Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 4); if (length < 4) { @@ -327,7 +322,8 @@ struct FudUtf8 { return unicode; } - static constexpr FudUtf8 make(Utf8Variant utf8Variant) { + static constexpr FudUtf8 make(Utf8Variant utf8Variant) + { FudUtf8 unicode{}; unicode.m_variant = utf8Variant; if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) { @@ -513,82 +509,128 @@ struct FudUtf8 { } }; +namespace classify { + +using CharPredicate = bool (*)(char); +using Utf8Predicate = bool (*)(utf8); +using FudUtf8Predicate = bool (*)(FudUtf8); + /** \brief Checks if a character is ascii. */ -[[nodiscard]] bool charIsAscii(char character); +[[nodiscard]] bool isAscii(char character); -[[nodiscard]] bool utf8IsAscii(FudUtf8 character); +[[nodiscard]] bool isAscii(utf8 character); + +[[nodiscard]] bool isAscii(FudUtf8 character); /** \brief Checks if a character is alphanumeric. */ -[[nodiscard]] bool charIsAlphanumeric(char character); +[[nodiscard]] bool isAlphanumeric(char character); /** \brief Checks if a character is alphanumeric. */ -[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character); +[[nodiscard]] bool isAlphanumeric(utf8 character); + +/** \brief Checks if a character is alphanumeric. */ +[[nodiscard]] bool isAlphanumeric(FudUtf8 character); + +/** \brief Checks if a character is alphabetic. */ +[[nodiscard]] bool isAlpha(char character); /** \brief Checks if a character is alphabetic. */ -[[nodiscard]] bool charIsAlpha(char character); +[[nodiscard]] bool isAlpha(utf8 character); /** \brief Checks if a character is alphabetic. */ -[[nodiscard]] bool utf8IsAlpha(FudUtf8 character); +[[nodiscard]] bool isAlpha(FudUtf8 character); + +/** \brief Checks if a character is lowercase. */ +[[nodiscard]] bool isLowercase(char character); /** \brief Checks if a character is lowercase. */ -[[nodiscard]] bool charIsLowercase(char character); +[[nodiscard]] bool isLowercase(utf8 character); /** \brief Checks if a character is lowercase. */ -[[nodiscard]] bool utf8IsLowercase(FudUtf8 character); +[[nodiscard]] bool isLowercase(FudUtf8 character); + +/** \brief Checks if a character is uppercase. */ +[[nodiscard]] bool isUppercase(char character); -/** \brief Checks if a character is an uppercase character. */ -[[nodiscard]] bool charIsUppercase(char character); +/** \brief Checks if a character is uppercase. */ +[[nodiscard]] bool isUppercase(utf8 character); /** \brief Checks if a character is uppercase. */ -[[nodiscard]] bool utf8IsUppercase(FudUtf8 character); +[[nodiscard]] bool isUppercase(FudUtf8 character); /** \brief Checks if a character is a digit. */ -[[nodiscard]] bool charIsDigit(char character); +[[nodiscard]] bool isDigit(char character); /** \brief Checks if a character is a digit. */ -[[nodiscard]] bool utf8IsDigit(FudUtf8 character); +[[nodiscard]] bool isDigit(utf8 character); + +/** \brief Checks if a character is a digit. */ +[[nodiscard]] bool isDigit(FudUtf8 character); /** \brief Checks if a character is a hexadecimal character. */ -[[nodiscard]] bool charIsHexDigit(char character); +[[nodiscard]] bool isHexDigit(char character); + +/** \brief Checks if a character is a hexadecimal character. */ +[[nodiscard]] bool isHexDigit(utf8 character); /** \brief Checks if a character is a hexadecimal digit. */ -[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character); +[[nodiscard]] bool isHexDigit(FudUtf8 character); /** \brief Checks if a character is a control character. */ -[[nodiscard]] bool charIsControl(char character); +[[nodiscard]] bool isControl(char character); /** \brief Checks if a character is a control character. */ -[[nodiscard]] bool utf8IsControl(FudUtf8 character); +[[nodiscard]] bool isControl(utf8 character); + +/** \brief Checks if a character is a control character. */ +[[nodiscard]] bool isControl(FudUtf8 character); + +/** \brief Checks if a character is a graphical character. */ +[[nodiscard]] bool isGraphical(char character); /** \brief Checks if a character is a graphical character. */ -[[nodiscard]] bool charIsGraphical(char character); +[[nodiscard]] bool isGraphical(utf8 character); /** \brief Checks if a character is a graphical character. */ -[[nodiscard]] bool utf8IsGraphical(FudUtf8 character); +[[nodiscard]] bool isGraphical(FudUtf8 character); /** \brief Checks if a character is a space character. */ -[[nodiscard]] bool charIsSpace(char character); +[[nodiscard]] bool isSpace(char character); /** \brief Checks if a character is a space character. */ -[[nodiscard]] bool utf8IsSpace(FudUtf8 character); +[[nodiscard]] bool isSpace(utf8 character); + +/** \brief Checks if a character is a space character. */ +[[nodiscard]] bool isSpace(FudUtf8 character); + +/** \brief Checks if a character is a blank character. */ +[[nodiscard]] bool isBlank(char character); /** \brief Checks if a character is a blank character. */ -[[nodiscard]] bool charIsBlank(char character); +[[nodiscard]] bool isBlank(utf8 character); /** \brief Checks if a character is a blank character. */ -[[nodiscard]] bool utf8IsBlank(FudUtf8 character); +[[nodiscard]] bool isBlank(FudUtf8 character); + +/** \brief Checks if a character is a printable character. */ +[[nodiscard]] bool isPrintable(char character); /** \brief Checks if a character is a printable character. */ -[[nodiscard]] bool charIsPrintable(char character); +[[nodiscard]] bool isPrintable(utf8 character); /** \brief Checks if a character is a printable character. */ -[[nodiscard]] bool utf8IsPrintable(FudUtf8 character); +[[nodiscard]] bool isPrintable(FudUtf8 character); + +/** \brief Checks if a character is a punctuation character. */ +[[nodiscard]] bool isPunctuation(char character); /** \brief Checks if a character is a punctuation character. */ -[[nodiscard]] bool charIsPunctuation(char character); +[[nodiscard]] bool isPunctuation(utf8 character); /** \brief Checks if a character is a punctuation character. */ -[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character); +[[nodiscard]] bool isPunctuation(FudUtf8 character); + +} // namespace classify /** \brief Converts character to lowercase if valid. */ uint8_t charToLower(uint8_t character); -- cgit v1.2.3