summaryrefslogtreecommitdiff
path: root/include/fud_utf8.hpp
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2024-10-20 10:48:19 -0500
committerDominick Allen <djallen@librehumanitas.org>2024-10-20 10:48:19 -0500
commit6a27a2a4032e88fa9154ef0f0741edc584f7a701 (patch)
tree92ca58cbcdd2c1d11b7d69deb0d4925d0f979a3f /include/fud_utf8.hpp
parente94db4695e236b42ae1be44b2605075161d5144f (diff)
Lots of work.
Diffstat (limited to 'include/fud_utf8.hpp')
-rw-r--r--include/fud_utf8.hpp130
1 files changed, 94 insertions, 36 deletions
diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp
index 539e0f4..3b1a6b7 100644
--- a/include/fud_utf8.hpp
+++ b/include/fud_utf8.hpp
@@ -19,8 +19,8 @@
#define FUD_UTF8_HPP
#include "fud_array.hpp"
-#include "fud_status.hpp"
#include "fud_unique_array.hpp"
+#include "fud_c_string.hpp"
#include <cstdint>
#include <optional>
@@ -28,6 +28,8 @@
namespace fud {
+
+
using utf8 = unsigned char;
class String;
@@ -110,6 +112,18 @@ struct Utf82Byte {
constexpr Utf82Byte(utf8 first, utf8 second) noexcept : characters{{first, second}}
{
}
+
+ __attribute__((nonnull))
+ constexpr Utf82Byte(const char* letterStr) noexcept : characters{}
+ {
+ auto length = cStringLength(letterStr, 2);
+ if (length < 2) {
+ return;
+ }
+ characters[0] = static_cast<utf8>(letterStr[0]);
+ characters[1] = static_cast<utf8>(letterStr[1]);
+ }
+
Array<utf8, 2> characters;
static constexpr size_t size() noexcept
{
@@ -145,6 +159,18 @@ struct Utf83Byte {
{
}
+ __attribute__((nonnull))
+ constexpr Utf83Byte(const char* letterStr) noexcept : characters{}
+ {
+ auto length = cStringLength(letterStr, 3);
+ if (length < 3) {
+ return;
+ }
+ characters[0] = static_cast<utf8>(letterStr[0]);
+ characters[1] = static_cast<utf8>(letterStr[1]);
+ characters[2] = static_cast<utf8>(letterStr[2]);
+ }
+
Array<utf8, 3> characters;
static constexpr size_t size() noexcept
@@ -187,6 +213,19 @@ struct Utf84Byte {
{
}
+ __attribute__((nonnull))
+ constexpr Utf84Byte(const char* letterStr) noexcept : characters{}
+ {
+ auto length = cStringLength(letterStr, 4);
+ if (length < 4) {
+ return;
+ }
+ characters[0] = static_cast<utf8>(letterStr[0]);
+ characters[1] = static_cast<utf8>(letterStr[1]);
+ characters[2] = static_cast<utf8>(letterStr[2]);
+ characters[3] = static_cast<utf8>(letterStr[3]);
+ }
+
Array<utf8, 4> characters;
static constexpr size_t size() noexcept
@@ -250,11 +289,12 @@ struct FudUtf8 {
Utf8Variant m_variant{Utf8Variant{Ascii{}}};
static constexpr Ascii invalidAsciiCode{Ascii{0xFF}};
- static FudUtf8 fromString(const String& fudString, size_t index) noexcept;
- static FudUtf8 fromStringView(StringView view, size_t index) noexcept;
- // static FudUtf8 fromStringView(const StringView& view, size_t index) noexcept;
- static constexpr FudUtf8 makeUtf8(const Array<utf8, 4>& data)
+ static FudUtf8 from(const String& fudString, size_t index) noexcept;
+
+ static FudUtf8 from(StringView view, size_t index) noexcept;
+
+ static constexpr FudUtf8 make(const Array<utf8, 4>& data)
{
FudUtf8 unicode{};
if (Ascii::valid(data[0])) {
@@ -271,7 +311,12 @@ struct FudUtf8 {
return unicode;
}
- static constexpr FudUtf8 makeUtf8(const Ascii& utf8Char)
+ static constexpr FudUtf8 make(utf8 utf8Char)
+ {
+ return make(Ascii{utf8Char});
+ }
+
+ static constexpr FudUtf8 make(Ascii utf8Char)
{
FudUtf8 unicode{{Utf8Variant{Ascii{}}}};
if (utf8Char.valid()) {
@@ -282,6 +327,15 @@ struct FudUtf8 {
return unicode;
}
+ static constexpr FudUtf8 make(Utf8Variant utf8Variant) {
+ FudUtf8 unicode{};
+ unicode.m_variant = utf8Variant;
+ if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) {
+ unicode.m_variant = invalidAsciiCode;
+ }
+ return unicode;
+ }
+
static constexpr FudUtf8 invalidAscii()
{
FudUtf8 character{};
@@ -460,89 +514,93 @@ struct FudUtf8 {
};
/** \brief Checks if a character is ascii. */
-bool char_is_ascii(char character);
+[[nodiscard]] bool charIsAscii(char character);
-FudStatus utf8_is_ascii(FudUtf8& character, bool& isAscii);
+[[nodiscard]] bool utf8IsAscii(FudUtf8 character);
/** \brief Checks if a character is alphanumeric. */
-bool char_is_alphanumeric(char character);
+[[nodiscard]] bool charIsAlphanumeric(char character);
/** \brief Checks if a character is alphanumeric. */
-FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character);
/** \brief Checks if a character is alphabetic. */
-bool char_is_alpha(char character);
+[[nodiscard]] bool charIsAlpha(char character);
/** \brief Checks if a character is alphabetic. */
-FudStatus utf8_is_alpha(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsAlpha(FudUtf8 character);
/** \brief Checks if a character is lowercase. */
-bool char_is_lowercase(char character);
+[[nodiscard]] bool charIsLowercase(char character);
/** \brief Checks if a character is lowercase. */
-FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsLowercase(FudUtf8 character);
/** \brief Checks if a character is an uppercase character. */
-bool char_is_uppercase(char character);
+[[nodiscard]] bool charIsUppercase(char character);
/** \brief Checks if a character is uppercase. */
-FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsUppercase(FudUtf8 character);
/** \brief Checks if a character is a digit. */
-bool char_is_digit(char character);
+[[nodiscard]] bool charIsDigit(char character);
/** \brief Checks if a character is a digit. */
-FudStatus utf8_is_digit(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsDigit(FudUtf8 character);
/** \brief Checks if a character is a hexadecimal character. */
-bool char_is_hex_digit(char character);
+[[nodiscard]] bool charIsHexDigit(char character);
/** \brief Checks if a character is a hexadecimal digit. */
-FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character);
/** \brief Checks if a character is a control character. */
-bool char_is_control(char character);
+[[nodiscard]] bool charIsControl(char character);
/** \brief Checks if a character is a control character. */
-FudStatus utf8_is_control(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsControl(FudUtf8 character);
/** \brief Checks if a character is a graphical character. */
-bool char_is_graphical(char character);
+[[nodiscard]] bool charIsGraphical(char character);
/** \brief Checks if a character is a graphical character. */
-FudStatus utf8_is_graphical(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsGraphical(FudUtf8 character);
/** \brief Checks if a character is a space character. */
-bool char_is_space(char character);
+[[nodiscard]] bool charIsSpace(char character);
/** \brief Checks if a character is a space character. */
-FudStatus utf8_is_space(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsSpace(FudUtf8 character);
/** \brief Checks if a character is a blank character. */
-bool char_is_blank(char character);
+[[nodiscard]] bool charIsBlank(char character);
/** \brief Checks if a character is a blank character. */
-FudStatus utf8_is_blank(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsBlank(FudUtf8 character);
/** \brief Checks if a character is a printable character. */
-bool char_is_printable(char character);
+[[nodiscard]] bool charIsPrintable(char character);
/** \brief Checks if a character is a printable character. */
-FudStatus utf8_is_printable(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsPrintable(FudUtf8 character);
/** \brief Checks if a character is a punctuation character. */
-bool char_is_punctuation(char character);
+[[nodiscard]] bool charIsPunctuation(char character);
/** \brief Checks if a character is a punctuation character. */
-FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred);
+[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character);
-uint8_t char_to_lower(uint8_t character);
+/** \brief Converts character to lowercase if valid. */
+uint8_t charToLower(uint8_t character);
-FudUtf8* utf8_to_lower(FudUtf8* character);
+/** \brief Converts character to lowercase if valid. */
+FudUtf8 utf8ToLower(FudUtf8 character);
-uint8_t char_to_upper(uint8_t character);
+/** \brief Converts character to uppercase if valid. */
+uint8_t charToUpper(uint8_t character);
-FudUtf8* utf8_to_upper(FudUtf8* character);
+/** \brief Converts character to uppercase if valid. */
+FudUtf8 utf8ToUpper(FudUtf8 character);
} // namespace fud