diff options
Diffstat (limited to 'source/utf8.cpp')
-rw-r--r-- | source/utf8.cpp | 343 |
1 files changed, 0 insertions, 343 deletions
diff --git a/source/utf8.cpp b/source/utf8.cpp deleted file mode 100644 index c94ac1f..0000000 --- a/source/utf8.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* - * libfud - * Copyright 2024 Dominick Allen - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "utf8.hpp" - -#include "string.hpp" - -#include <new> // IWYU pragma: keep - this is for placement new overloads. - -namespace fud { - -ExtUtf8 ExtUtf8::fromString(const String& fudString, size_t index) noexcept -{ - if (!fudString.valid()) { - return invalidAscii(); - } - - - return fromStringView(StringView{fudString}, index); -} - -ExtUtf8 ExtUtf8::fromStringView(const StringView& view, size_t index) noexcept -{ - return fromStringView(StringView{view}, index); -} - -ExtUtf8 ExtUtf8::fromStringView(StringView&& view, size_t index) noexcept -{ - auto len = view.length(); - const auto* data = view.data(); - if (data == nullptr) { - return invalidAscii(); - } - - ExtUtf8 localChar{Ascii{data[index]}}; - if (localChar.valid()) { - return localChar; - } - - if (index + 1 < len) { - localChar.m_variant = Utf82Byte{data[index], data[index + 1]}; - } - if (localChar.valid()) { - return localChar; - } - - if (index + 2 < len) { - localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]}; - } - if (localChar.valid()) { - return localChar; - } - - if (index + 3 < len) { - localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]}; - } - if (localChar.valid()) { - return localChar; - } - - return invalidAscii(); -} - -bool ext_lib_char_is_ascii(char character) -{ - return static_cast<uint8_t>(character & ~ASCII_MASK) == 0; -} - -FudStatus ext_lib_utf8_is_ascii(ExtUtf8* character, bool* isAscii) -{ - if (anyAreNull(character, isAscii)) { - return FudStatus::NullPointer; - } - - *isAscii = character->getType() == ExtUtf8Type::Ascii && character->valid(); - - return FudStatus::Success; -} - -namespace impl { - -/* Assumes that predicate is not a null pointer! */ -template <typename Predicate> -inline FudStatus isAsciiPredicate(ExtUtf8* character, bool* pred, Predicate&& predicate) -{ - if (anyAreNull(character, pred)) { - return FudStatus::NullPointer; - } - - auto maybeAscii = character->getAscii(); - if (!maybeAscii.has_value()) { - return FudStatus::InvalidInput; - } - - auto asciiChar = *maybeAscii; - *pred = std::forward<Predicate>(predicate)(asciiChar.asChar()); - - return FudStatus::Success; -} - -} // namespace impl - -bool ext_lib_char_is_alphanumeric(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - if (ext_lib_char_is_alpha(character)) { - return true; - } - - return ext_lib_char_is_digit(character); -} - -FudStatus ext_lib_utf8_is_alphanumeric(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alphanumeric); -} - -bool ext_lib_char_is_alpha(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - if (ext_lib_char_is_uppercase(character)) { - return true; - } - - return ext_lib_char_is_lowercase(character); -} - -FudStatus ext_lib_utf8_is_alpha(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alpha); -} - -bool ext_lib_char_is_lowercase(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return 'a' <= character && character <= 'z'; -} - -FudStatus ext_lib_utf8_is_lowercase(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_lowercase); -} - -bool ext_lib_char_is_uppercase(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return 'A' <= character && character <= 'Z'; -} - -FudStatus ext_lib_utf8_is_uppercase(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_uppercase); -} - -bool ext_lib_char_is_digit(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return '0' <= character && character <= '9'; -} - -FudStatus ext_lib_utf8_is_digit(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_digit); -} - -bool ext_lib_char_is_hex_digit(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || - ('A' <= character && character <= 'F'); -} - -FudStatus ext_lib_utf8_is_hex_digit(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_hex_digit); -} - -bool ext_lib_char_is_control(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - constexpr char maxControlChar = 0x1F; - constexpr const char deleteChar = 0x7F; - return ((static_cast<uint8_t>(character) <= maxControlChar)) || character == deleteChar; -} - -FudStatus ext_lib_utf8_is_control(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_control); -} - -bool ext_lib_char_is_graphical(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return ext_lib_char_is_alphanumeric(character) || ext_lib_char_is_punctuation(character); -} - -FudStatus ext_lib_utf8_is_graphical(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_graphical); -} - -bool ext_lib_char_is_space(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; -} - -FudStatus ext_lib_utf8_is_space(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_space); -} - -bool ext_lib_char_is_blank(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return character == ' ' || character == '\t'; -} - -FudStatus ext_lib_utf8_is_blank(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_blank); -} - -bool ext_lib_char_is_printable(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return (character >= ' ' && character <= '~'); -} - -FudStatus ext_lib_utf8_is_printable(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_printable); -} - -bool ext_lib_char_is_punctuation(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || - (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); -} - -FudStatus ext_lib_utf8_is_punctuation(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_punctuation); -} - -uint8_t ext_lib_char_to_lower(uint8_t character) -{ - if (ext_lib_char_is_uppercase(static_cast<char>(character))) { - constexpr uint8_t lowerA = 'a'; - constexpr uint8_t upperA = 'A'; - return static_cast<uint8_t>(character - upperA) + lowerA; - } - return character; -} - -ExtUtf8* ext_lib_utf8_to_lower(ExtUtf8* character) -{ - if (character == nullptr) { - return character; - } - - static_cast<void>(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{ext_lib_char_to_lower(static_cast<uint8_t>(ascii.asChar()))}; - })); - - return character; -} - -uint8_t ext_lib_char_to_upper(uint8_t character) -{ - if (ext_lib_char_is_lowercase(static_cast<char>(character))) { - constexpr uint8_t lowerA = 'a'; - constexpr uint8_t upperA = 'A'; - return static_cast<uint8_t>(character - lowerA) + upperA; - } - return character; -} - -ExtUtf8* ext_lib_utf8_to_upper(ExtUtf8* character) -{ - if (character == nullptr) { - return character; - } - - static_cast<void>(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{ext_lib_char_to_upper(static_cast<uint8_t>(ascii.asChar()))}; - })); - - return character; -} - -} // namespace ext_lib |