/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "utf8.hpp" #include "string.hpp" #include // IWYU pragma: keep - this is for placement new overloads. namespace fud { ExtUtf8 ExtUtf8::fromString(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } return fromStringView(StringView{fudString}, index); } ExtUtf8 ExtUtf8::fromStringView(const StringView& view, size_t index) noexcept { return fromStringView(StringView{view}, index); } ExtUtf8 ExtUtf8::fromStringView(StringView&& view, size_t index) noexcept { auto len = view.length(); const auto* data = view.data(); if (data == nullptr) { return invalidAscii(); } ExtUtf8 localChar{Ascii{data[index]}}; if (localChar.valid()) { return localChar; } if (index + 1 < len) { localChar.m_variant = Utf82Byte{data[index], data[index + 1]}; } if (localChar.valid()) { return localChar; } if (index + 2 < len) { localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]}; } if (localChar.valid()) { return localChar; } if (index + 3 < len) { localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]}; } if (localChar.valid()) { return localChar; } return invalidAscii(); } bool ext_lib_char_is_ascii(char character) { return static_cast(character & ~ASCII_MASK) == 0; } FudStatus ext_lib_utf8_is_ascii(ExtUtf8* character, bool* isAscii) { if (anyAreNull(character, isAscii)) { return FudStatus::NullPointer; } *isAscii = character->getType() == ExtUtf8Type::Ascii && character->valid(); return FudStatus::Success; } namespace impl { /* Assumes that predicate is not a null pointer! */ template inline FudStatus isAsciiPredicate(ExtUtf8* character, bool* pred, Predicate&& predicate) { if (anyAreNull(character, pred)) { return FudStatus::NullPointer; } auto maybeAscii = character->getAscii(); if (!maybeAscii.has_value()) { return FudStatus::InvalidInput; } auto asciiChar = *maybeAscii; *pred = std::forward(predicate)(asciiChar.asChar()); return FudStatus::Success; } } // namespace impl bool ext_lib_char_is_alphanumeric(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } if (ext_lib_char_is_alpha(character)) { return true; } return ext_lib_char_is_digit(character); } FudStatus ext_lib_utf8_is_alphanumeric(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alphanumeric); } bool ext_lib_char_is_alpha(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } if (ext_lib_char_is_uppercase(character)) { return true; } return ext_lib_char_is_lowercase(character); } FudStatus ext_lib_utf8_is_alpha(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alpha); } bool ext_lib_char_is_lowercase(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return 'a' <= character && character <= 'z'; } FudStatus ext_lib_utf8_is_lowercase(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_lowercase); } bool ext_lib_char_is_uppercase(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return 'A' <= character && character <= 'Z'; } FudStatus ext_lib_utf8_is_uppercase(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_uppercase); } bool ext_lib_char_is_digit(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return '0' <= character && character <= '9'; } FudStatus ext_lib_utf8_is_digit(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_digit); } bool ext_lib_char_is_hex_digit(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || ('A' <= character && character <= 'F'); } FudStatus ext_lib_utf8_is_hex_digit(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_hex_digit); } bool ext_lib_char_is_control(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } constexpr char maxControlChar = 0x1F; constexpr const char deleteChar = 0x7F; return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } FudStatus ext_lib_utf8_is_control(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_control); } bool ext_lib_char_is_graphical(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return ext_lib_char_is_alphanumeric(character) || ext_lib_char_is_punctuation(character); } FudStatus ext_lib_utf8_is_graphical(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_graphical); } bool ext_lib_char_is_space(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } FudStatus ext_lib_utf8_is_space(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_space); } bool ext_lib_char_is_blank(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return character == ' ' || character == '\t'; } FudStatus ext_lib_utf8_is_blank(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_blank); } bool ext_lib_char_is_printable(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return (character >= ' ' && character <= '~'); } FudStatus ext_lib_utf8_is_printable(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_printable); } bool ext_lib_char_is_punctuation(char character) { if (!ext_lib_char_is_ascii(character)) { return false; } return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } FudStatus ext_lib_utf8_is_punctuation(ExtUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, ext_lib_char_is_punctuation); } uint8_t ext_lib_char_to_lower(uint8_t character) { if (ext_lib_char_is_uppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - upperA) + lowerA; } return character; } ExtUtf8* ext_lib_utf8_to_lower(ExtUtf8* character) { if (character == nullptr) { return character; } static_cast(character->transformAscii([](Ascii& ascii) { ascii = Ascii{ext_lib_char_to_lower(static_cast(ascii.asChar()))}; })); return character; } uint8_t ext_lib_char_to_upper(uint8_t character) { if (ext_lib_char_is_lowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - lowerA) + upperA; } return character; } ExtUtf8* ext_lib_utf8_to_upper(ExtUtf8* character) { if (character == nullptr) { return character; } static_cast(character->transformAscii([](Ascii& ascii) { ascii = Ascii{ext_lib_char_to_upper(static_cast(ascii.asChar()))}; })); return character; } } // namespace ext_lib