summaryrefslogtreecommitdiff
path: root/include/fud_utf8.hpp
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2024-10-23 13:21:10 -0500
committerDominick Allen <djallen@librehumanitas.org>2024-10-23 13:21:10 -0500
commit5cc7cbc3704ec255eb5d0ac53b2cc0fcb1221d63 (patch)
tree169d4d2d8dffe014851712e31a55036deb0c7c0c /include/fud_utf8.hpp
parentb2dbcb55e2832c373fecb4033a3ed77e5dbc77aa (diff)
String conversion and parsing format spec.
Diffstat (limited to 'include/fud_utf8.hpp')
-rw-r--r--include/fud_utf8.hpp116
1 files changed, 79 insertions, 37 deletions
diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp
index 3b1a6b7..50e50aa 100644
--- a/include/fud_utf8.hpp
+++ b/include/fud_utf8.hpp
@@ -19,8 +19,8 @@
#define FUD_UTF8_HPP
#include "fud_array.hpp"
-#include "fud_unique_array.hpp"
#include "fud_c_string.hpp"
+#include "fud_unique_array.hpp"
#include <cstdint>
#include <optional>
@@ -28,8 +28,6 @@
namespace fud {
-
-
using utf8 = unsigned char;
class String;
@@ -113,8 +111,7 @@ struct Utf82Byte {
{
}
- __attribute__((nonnull))
- constexpr Utf82Byte(const char* letterStr) noexcept : characters{}
+ __attribute__((nonnull)) constexpr Utf82Byte(const char* letterStr) noexcept : characters{}
{
auto length = cStringLength(letterStr, 2);
if (length < 2) {
@@ -159,8 +156,7 @@ struct Utf83Byte {
{
}
- __attribute__((nonnull))
- constexpr Utf83Byte(const char* letterStr) noexcept : characters{}
+ __attribute__((nonnull)) constexpr Utf83Byte(const char* letterStr) noexcept : characters{}
{
auto length = cStringLength(letterStr, 3);
if (length < 3) {
@@ -213,8 +209,7 @@ struct Utf84Byte {
{
}
- __attribute__((nonnull))
- constexpr Utf84Byte(const char* letterStr) noexcept : characters{}
+ __attribute__((nonnull)) constexpr Utf84Byte(const char* letterStr) noexcept : characters{}
{
auto length = cStringLength(letterStr, 4);
if (length < 4) {
@@ -327,7 +322,8 @@ struct FudUtf8 {
return unicode;
}
- static constexpr FudUtf8 make(Utf8Variant utf8Variant) {
+ static constexpr FudUtf8 make(Utf8Variant utf8Variant)
+ {
FudUtf8 unicode{};
unicode.m_variant = utf8Variant;
if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) {
@@ -513,82 +509,128 @@ struct FudUtf8 {
}
};
+namespace classify {
+
+using CharPredicate = bool (*)(char);
+using Utf8Predicate = bool (*)(utf8);
+using FudUtf8Predicate = bool (*)(FudUtf8);
+
/** \brief Checks if a character is ascii. */
-[[nodiscard]] bool charIsAscii(char character);
+[[nodiscard]] bool isAscii(char character);
-[[nodiscard]] bool utf8IsAscii(FudUtf8 character);
+[[nodiscard]] bool isAscii(utf8 character);
+
+[[nodiscard]] bool isAscii(FudUtf8 character);
/** \brief Checks if a character is alphanumeric. */
-[[nodiscard]] bool charIsAlphanumeric(char character);
+[[nodiscard]] bool isAlphanumeric(char character);
/** \brief Checks if a character is alphanumeric. */
-[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character);
+[[nodiscard]] bool isAlphanumeric(utf8 character);
+
+/** \brief Checks if a character is alphanumeric. */
+[[nodiscard]] bool isAlphanumeric(FudUtf8 character);
+
+/** \brief Checks if a character is alphabetic. */
+[[nodiscard]] bool isAlpha(char character);
/** \brief Checks if a character is alphabetic. */
-[[nodiscard]] bool charIsAlpha(char character);
+[[nodiscard]] bool isAlpha(utf8 character);
/** \brief Checks if a character is alphabetic. */
-[[nodiscard]] bool utf8IsAlpha(FudUtf8 character);
+[[nodiscard]] bool isAlpha(FudUtf8 character);
+
+/** \brief Checks if a character is lowercase. */
+[[nodiscard]] bool isLowercase(char character);
/** \brief Checks if a character is lowercase. */
-[[nodiscard]] bool charIsLowercase(char character);
+[[nodiscard]] bool isLowercase(utf8 character);
/** \brief Checks if a character is lowercase. */
-[[nodiscard]] bool utf8IsLowercase(FudUtf8 character);
+[[nodiscard]] bool isLowercase(FudUtf8 character);
+
+/** \brief Checks if a character is uppercase. */
+[[nodiscard]] bool isUppercase(char character);
-/** \brief Checks if a character is an uppercase character. */
-[[nodiscard]] bool charIsUppercase(char character);
+/** \brief Checks if a character is uppercase. */
+[[nodiscard]] bool isUppercase(utf8 character);
/** \brief Checks if a character is uppercase. */
-[[nodiscard]] bool utf8IsUppercase(FudUtf8 character);
+[[nodiscard]] bool isUppercase(FudUtf8 character);
/** \brief Checks if a character is a digit. */
-[[nodiscard]] bool charIsDigit(char character);
+[[nodiscard]] bool isDigit(char character);
/** \brief Checks if a character is a digit. */
-[[nodiscard]] bool utf8IsDigit(FudUtf8 character);
+[[nodiscard]] bool isDigit(utf8 character);
+
+/** \brief Checks if a character is a digit. */
+[[nodiscard]] bool isDigit(FudUtf8 character);
/** \brief Checks if a character is a hexadecimal character. */
-[[nodiscard]] bool charIsHexDigit(char character);
+[[nodiscard]] bool isHexDigit(char character);
+
+/** \brief Checks if a character is a hexadecimal character. */
+[[nodiscard]] bool isHexDigit(utf8 character);
/** \brief Checks if a character is a hexadecimal digit. */
-[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character);
+[[nodiscard]] bool isHexDigit(FudUtf8 character);
/** \brief Checks if a character is a control character. */
-[[nodiscard]] bool charIsControl(char character);
+[[nodiscard]] bool isControl(char character);
/** \brief Checks if a character is a control character. */
-[[nodiscard]] bool utf8IsControl(FudUtf8 character);
+[[nodiscard]] bool isControl(utf8 character);
+
+/** \brief Checks if a character is a control character. */
+[[nodiscard]] bool isControl(FudUtf8 character);
+
+/** \brief Checks if a character is a graphical character. */
+[[nodiscard]] bool isGraphical(char character);
/** \brief Checks if a character is a graphical character. */
-[[nodiscard]] bool charIsGraphical(char character);
+[[nodiscard]] bool isGraphical(utf8 character);
/** \brief Checks if a character is a graphical character. */
-[[nodiscard]] bool utf8IsGraphical(FudUtf8 character);
+[[nodiscard]] bool isGraphical(FudUtf8 character);
/** \brief Checks if a character is a space character. */
-[[nodiscard]] bool charIsSpace(char character);
+[[nodiscard]] bool isSpace(char character);
/** \brief Checks if a character is a space character. */
-[[nodiscard]] bool utf8IsSpace(FudUtf8 character);
+[[nodiscard]] bool isSpace(utf8 character);
+
+/** \brief Checks if a character is a space character. */
+[[nodiscard]] bool isSpace(FudUtf8 character);
+
+/** \brief Checks if a character is a blank character. */
+[[nodiscard]] bool isBlank(char character);
/** \brief Checks if a character is a blank character. */
-[[nodiscard]] bool charIsBlank(char character);
+[[nodiscard]] bool isBlank(utf8 character);
/** \brief Checks if a character is a blank character. */
-[[nodiscard]] bool utf8IsBlank(FudUtf8 character);
+[[nodiscard]] bool isBlank(FudUtf8 character);
+
+/** \brief Checks if a character is a printable character. */
+[[nodiscard]] bool isPrintable(char character);
/** \brief Checks if a character is a printable character. */
-[[nodiscard]] bool charIsPrintable(char character);
+[[nodiscard]] bool isPrintable(utf8 character);
/** \brief Checks if a character is a printable character. */
-[[nodiscard]] bool utf8IsPrintable(FudUtf8 character);
+[[nodiscard]] bool isPrintable(FudUtf8 character);
+
+/** \brief Checks if a character is a punctuation character. */
+[[nodiscard]] bool isPunctuation(char character);
/** \brief Checks if a character is a punctuation character. */
-[[nodiscard]] bool charIsPunctuation(char character);
+[[nodiscard]] bool isPunctuation(utf8 character);
/** \brief Checks if a character is a punctuation character. */
-[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character);
+[[nodiscard]] bool isPunctuation(FudUtf8 character);
+
+} // namespace classify
/** \brief Converts character to lowercase if valid. */
uint8_t charToLower(uint8_t character);