/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fud_utf8.hpp" #include "fud_string.hpp" #include // IWYU pragma: keep - this is for placement new overloads. namespace fud { FudUtf8 FudUtf8::fromString(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } return fromStringView(StringView{fudString}, index); } FudUtf8 FudUtf8::fromStringView(const StringView& view, size_t index) noexcept { return fromStringView(StringView{view}, index); } FudUtf8 FudUtf8::fromStringView(StringView&& view, size_t index) noexcept { auto len = view.length(); const auto* data = view.data(); if (data == nullptr) { return invalidAscii(); } FudUtf8 localChar{Ascii{data[index]}}; if (localChar.valid()) { return localChar; } if (index + 1 < len) { localChar.m_variant = Utf82Byte{data[index], data[index + 1]}; } if (localChar.valid()) { return localChar; } if (index + 2 < len) { localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]}; } if (localChar.valid()) { return localChar; } if (index + 3 < len) { localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]}; } if (localChar.valid()) { return localChar; } return invalidAscii(); } bool char_is_ascii(char character) { return static_cast(character & ~ASCII_MASK) == 0; } FudStatus utf8_is_ascii(FudUtf8* character, bool* isAscii) { if (anyAreNull(character, isAscii)) { return FudStatus::NullPointer; } *isAscii = character->getType() == Utf8Type::Ascii && character->valid(); return FudStatus::Success; } namespace impl { /* Assumes that predicate is not a null pointer! */ template inline FudStatus isAsciiPredicate(FudUtf8* character, bool* pred, Predicate&& predicate) { if (anyAreNull(character, pred)) { return FudStatus::NullPointer; } auto maybeAscii = character->getAscii(); if (!maybeAscii.has_value()) { return FudStatus::InvalidInput; } auto asciiChar = *maybeAscii; *pred = std::forward(predicate)(asciiChar.asChar()); return FudStatus::Success; } } // namespace impl bool char_is_alphanumeric(char character) { if (!char_is_ascii(character)) { return false; } if (char_is_alpha(character)) { return true; } return char_is_digit(character); } FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_alphanumeric); } bool char_is_alpha(char character) { if (!char_is_ascii(character)) { return false; } if (char_is_uppercase(character)) { return true; } return char_is_lowercase(character); } FudStatus utf8_is_alpha(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_alpha); } bool char_is_lowercase(char character) { if (!char_is_ascii(character)) { return false; } return 'a' <= character && character <= 'z'; } FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_lowercase); } bool char_is_uppercase(char character) { if (!char_is_ascii(character)) { return false; } return 'A' <= character && character <= 'Z'; } FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_uppercase); } bool char_is_digit(char character) { if (!char_is_ascii(character)) { return false; } return '0' <= character && character <= '9'; } FudStatus utf8_is_digit(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_digit); } bool char_is_hex_digit(char character) { if (!char_is_ascii(character)) { return false; } return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || ('A' <= character && character <= 'F'); } FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_hex_digit); } bool char_is_control(char character) { if (!char_is_ascii(character)) { return false; } constexpr char maxControlChar = 0x1F; constexpr const char deleteChar = 0x7F; return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } FudStatus utf8_is_control(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_control); } bool char_is_graphical(char character) { if (!char_is_ascii(character)) { return false; } return char_is_alphanumeric(character) || char_is_punctuation(character); } FudStatus utf8_is_graphical(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_graphical); } bool char_is_space(char character) { if (!char_is_ascii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } FudStatus utf8_is_space(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_space); } bool char_is_blank(char character) { if (!char_is_ascii(character)) { return false; } return character == ' ' || character == '\t'; } FudStatus utf8_is_blank(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_blank); } bool char_is_printable(char character) { if (!char_is_ascii(character)) { return false; } return (character >= ' ' && character <= '~'); } FudStatus utf8_is_printable(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_printable); } bool char_is_punctuation(char character) { if (!char_is_ascii(character)) { return false; } return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred) { return impl::isAsciiPredicate(character, pred, char_is_punctuation); } uint8_t char_to_lower(uint8_t character) { if (char_is_uppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - upperA) + lowerA; } return character; } FudUtf8* utf8_to_lower(FudUtf8* character) { if (character == nullptr) { return character; } static_cast(character->transformAscii([](Ascii& ascii) { ascii = Ascii{char_to_lower(static_cast(ascii.asChar()))}; })); return character; } uint8_t char_to_upper(uint8_t character) { if (char_is_lowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - lowerA) + upperA; } return character; } FudUtf8* utf8_to_upper(FudUtf8* character) { if (character == nullptr) { return character; } static_cast(character->transformAscii([](Ascii& ascii) { ascii = Ascii{char_to_upper(static_cast(ascii.asChar()))}; })); return character; } } // namespace fud