summaryrefslogtreecommitdiff
path: root/source/utf8.cpp
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2024-09-22 12:41:28 -0500
committerDominick Allen <djallen@librehumanitas.org>2024-09-22 12:41:28 -0500
commit7da829d48f9059c83ab9cada2c850621e8bbd3f3 (patch)
tree314e7a5b645e910d4997e3bee980bd2024f3087d /source/utf8.cpp
parentbf81e34921e3e30b05313efbcf5c9fa839cb7c05 (diff)
Basics of library.
Diffstat (limited to 'source/utf8.cpp')
-rw-r--r--source/utf8.cpp343
1 files changed, 0 insertions, 343 deletions
diff --git a/source/utf8.cpp b/source/utf8.cpp
deleted file mode 100644
index c94ac1f..0000000
--- a/source/utf8.cpp
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * libfud
- * Copyright 2024 Dominick Allen
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utf8.hpp"
-
-#include "string.hpp"
-
-#include <new> // IWYU pragma: keep - this is for placement new overloads.
-
-namespace fud {
-
-ExtUtf8 ExtUtf8::fromString(const String& fudString, size_t index) noexcept
-{
- if (!fudString.valid()) {
- return invalidAscii();
- }
-
-
- return fromStringView(StringView{fudString}, index);
-}
-
-ExtUtf8 ExtUtf8::fromStringView(const StringView& view, size_t index) noexcept
-{
- return fromStringView(StringView{view}, index);
-}
-
-ExtUtf8 ExtUtf8::fromStringView(StringView&& view, size_t index) noexcept
-{
- auto len = view.length();
- const auto* data = view.data();
- if (data == nullptr) {
- return invalidAscii();
- }
-
- ExtUtf8 localChar{Ascii{data[index]}};
- if (localChar.valid()) {
- return localChar;
- }
-
- if (index + 1 < len) {
- localChar.m_variant = Utf82Byte{data[index], data[index + 1]};
- }
- if (localChar.valid()) {
- return localChar;
- }
-
- if (index + 2 < len) {
- localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]};
- }
- if (localChar.valid()) {
- return localChar;
- }
-
- if (index + 3 < len) {
- localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]};
- }
- if (localChar.valid()) {
- return localChar;
- }
-
- return invalidAscii();
-}
-
-bool ext_lib_char_is_ascii(char character)
-{
- return static_cast<uint8_t>(character & ~ASCII_MASK) == 0;
-}
-
-FudStatus ext_lib_utf8_is_ascii(ExtUtf8* character, bool* isAscii)
-{
- if (anyAreNull(character, isAscii)) {
- return FudStatus::NullPointer;
- }
-
- *isAscii = character->getType() == ExtUtf8Type::Ascii && character->valid();
-
- return FudStatus::Success;
-}
-
-namespace impl {
-
-/* Assumes that predicate is not a null pointer! */
-template <typename Predicate>
-inline FudStatus isAsciiPredicate(ExtUtf8* character, bool* pred, Predicate&& predicate)
-{
- if (anyAreNull(character, pred)) {
- return FudStatus::NullPointer;
- }
-
- auto maybeAscii = character->getAscii();
- if (!maybeAscii.has_value()) {
- return FudStatus::InvalidInput;
- }
-
- auto asciiChar = *maybeAscii;
- *pred = std::forward<Predicate>(predicate)(asciiChar.asChar());
-
- return FudStatus::Success;
-}
-
-} // namespace impl
-
-bool ext_lib_char_is_alphanumeric(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- if (ext_lib_char_is_alpha(character)) {
- return true;
- }
-
- return ext_lib_char_is_digit(character);
-}
-
-FudStatus ext_lib_utf8_is_alphanumeric(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alphanumeric);
-}
-
-bool ext_lib_char_is_alpha(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- if (ext_lib_char_is_uppercase(character)) {
- return true;
- }
-
- return ext_lib_char_is_lowercase(character);
-}
-
-FudStatus ext_lib_utf8_is_alpha(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alpha);
-}
-
-bool ext_lib_char_is_lowercase(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return 'a' <= character && character <= 'z';
-}
-
-FudStatus ext_lib_utf8_is_lowercase(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_lowercase);
-}
-
-bool ext_lib_char_is_uppercase(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return 'A' <= character && character <= 'Z';
-}
-
-FudStatus ext_lib_utf8_is_uppercase(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_uppercase);
-}
-
-bool ext_lib_char_is_digit(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return '0' <= character && character <= '9';
-}
-
-FudStatus ext_lib_utf8_is_digit(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_digit);
-}
-
-bool ext_lib_char_is_hex_digit(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') ||
- ('A' <= character && character <= 'F');
-}
-
-FudStatus ext_lib_utf8_is_hex_digit(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_hex_digit);
-}
-
-bool ext_lib_char_is_control(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- constexpr char maxControlChar = 0x1F;
- constexpr const char deleteChar = 0x7F;
- return ((static_cast<uint8_t>(character) <= maxControlChar)) || character == deleteChar;
-}
-
-FudStatus ext_lib_utf8_is_control(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_control);
-}
-
-bool ext_lib_char_is_graphical(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return ext_lib_char_is_alphanumeric(character) || ext_lib_char_is_punctuation(character);
-}
-
-FudStatus ext_lib_utf8_is_graphical(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_graphical);
-}
-
-bool ext_lib_char_is_space(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v';
-}
-
-FudStatus ext_lib_utf8_is_space(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_space);
-}
-
-bool ext_lib_char_is_blank(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return character == ' ' || character == '\t';
-}
-
-FudStatus ext_lib_utf8_is_blank(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_blank);
-}
-
-bool ext_lib_char_is_printable(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return (character >= ' ' && character <= '~');
-}
-
-FudStatus ext_lib_utf8_is_printable(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_printable);
-}
-
-bool ext_lib_char_is_punctuation(char character)
-{
- if (!ext_lib_char_is_ascii(character)) {
- return false;
- }
-
- return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') ||
- (character >= '[' && character <= '`') || (character >= '{' && character <= '~');
-}
-
-FudStatus ext_lib_utf8_is_punctuation(ExtUtf8* character, bool* pred)
-{
- return impl::isAsciiPredicate(character, pred, ext_lib_char_is_punctuation);
-}
-
-uint8_t ext_lib_char_to_lower(uint8_t character)
-{
- if (ext_lib_char_is_uppercase(static_cast<char>(character))) {
- constexpr uint8_t lowerA = 'a';
- constexpr uint8_t upperA = 'A';
- return static_cast<uint8_t>(character - upperA) + lowerA;
- }
- return character;
-}
-
-ExtUtf8* ext_lib_utf8_to_lower(ExtUtf8* character)
-{
- if (character == nullptr) {
- return character;
- }
-
- static_cast<void>(character->transformAscii([](Ascii& ascii) {
- ascii = Ascii{ext_lib_char_to_lower(static_cast<uint8_t>(ascii.asChar()))};
- }));
-
- return character;
-}
-
-uint8_t ext_lib_char_to_upper(uint8_t character)
-{
- if (ext_lib_char_is_lowercase(static_cast<char>(character))) {
- constexpr uint8_t lowerA = 'a';
- constexpr uint8_t upperA = 'A';
- return static_cast<uint8_t>(character - lowerA) + upperA;
- }
- return character;
-}
-
-ExtUtf8* ext_lib_utf8_to_upper(ExtUtf8* character)
-{
- if (character == nullptr) {
- return character;
- }
-
- static_cast<void>(character->transformAscii([](Ascii& ascii) {
- ascii = Ascii{ext_lib_char_to_upper(static_cast<uint8_t>(ascii.asChar()))};
- }));
-
- return character;
-}
-
-} // namespace ext_lib