/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fud_utf8.hpp" #include "fud_string.hpp" namespace fud { FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } return from(StringView{fudString}, index); } FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept { auto viewLocal{view}; auto len = viewLocal.length(); const auto* vData = viewLocal.data(); if (vData == nullptr) { return invalidAscii(); } FudUtf8 localChar{Ascii{vData[index]}}; if (localChar.valid()) { return localChar; } if (index + 1 < len) { localChar.m_variant = Utf82Byte{vData[index], vData[index + 1]}; } if (localChar.valid()) { return localChar; } if (index + 2 < len) { localChar.m_variant = Utf83Byte{vData[index], vData[index + 1], vData[index + 2]}; } if (localChar.valid()) { return localChar; } if (index + 3 < len) { localChar.m_variant = Utf84Byte{vData[index], vData[index + 1], vData[index + 2], vData[index + 3]}; } if (localChar.valid()) { return localChar; } return invalidAscii(); } namespace classify { bool isAscii(char character) { return isAscii(static_cast(character)); } bool isAscii(utf8 character) { return (character & ~ASCII_MASK) == 0; } bool isAscii(FudUtf8 character) { return character.getType() == Utf8Type::Ascii && character.valid(); } namespace impl { bool isAsciiPredicate(FudUtf8 character, bool (*predicate)(char)) { auto maybeAscii = character.getAscii(); if (!maybeAscii.has_value()) { return false; } auto asciiChar = *maybeAscii; return predicate(asciiChar.asChar()); } } // namespace impl bool isAlphanumeric(char character) { return isAlphanumeric(static_cast(character)); } bool isAlphanumeric(utf8 character) { if (!isAscii(character)) { return false; } if (isAlpha(character)) { return true; } return isDigit(character); } bool isAlphanumeric(FudUtf8 character) { return impl::isAsciiPredicate(character, isAlphanumeric); } bool isAlpha(char character) { return isAlpha(static_cast(character)); } bool isAlpha(utf8 character) { if (!isAscii(character)) { return false; } if (isUppercase(character)) { return true; } return isLowercase(character); } bool isAlpha(FudUtf8 character) { return impl::isAsciiPredicate(character, isAlpha); } bool isLowercase(char character) { return isLowercase(static_cast(character)); } bool isLowercase(utf8 character) { if (!isAscii(character)) { return false; } return 'a' <= character && character <= 'z'; } bool isLowercase(FudUtf8 character) { return impl::isAsciiPredicate(character, isLowercase); } bool isUppercase(char character) { return isUppercase(static_cast(character)); } bool isUppercase(utf8 character) { if (!isAscii(character)) { return false; } return 'A' <= character && character <= 'Z'; } bool isUppercase(FudUtf8 character) { return impl::isAsciiPredicate(character, isUppercase); } bool isDigit(char character) { return isDigit(static_cast(character)); } bool isDigit(utf8 character) { if (!isAscii(character)) { return false; } return '0' <= character && character <= '9'; } bool isDigit(FudUtf8 character) { return impl::isAsciiPredicate(character, isDigit); } bool isHexDigit(char character) { return isHexDigit(static_cast(character)); } bool isHexDigit(utf8 character) { if (!isAscii(character)) { return false; } return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || ('A' <= character && character <= 'F'); } bool isHexDigit(FudUtf8 character) { return impl::isAsciiPredicate(character, isHexDigit); } bool isControl(char character) { return isControl(static_cast(character)); } bool isControl(utf8 character) { if (!isAscii(character)) { return false; } constexpr char maxControlChar = 0x1F; constexpr const char deleteChar = 0x7F; return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } bool isControl(FudUtf8 character) { return impl::isAsciiPredicate(character, isControl); } bool isGraphical(char character) { return isGraphical(static_cast(character)); } bool isGraphical(utf8 character) { if (!isAscii(character)) { return false; } return isAlphanumeric(character) || isPunctuation(character); } bool isGraphical(FudUtf8 character) { return impl::isAsciiPredicate(character, isGraphical); } bool isSpace(char character) { return isSpace(static_cast(character)); } bool isSpace(utf8 character) { if (!isAscii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } bool isSpace(FudUtf8 character) { return impl::isAsciiPredicate(character, isSpace); } bool isBlank(char character) { return isBlank(static_cast(character)); } bool isBlank(utf8 character) { if (!isAscii(character)) { return false; } return character == ' ' || character == '\t'; } bool isBlank(FudUtf8 character) { return impl::isAsciiPredicate(character, isBlank); } bool isPrintable(char character) { return isPrintable(static_cast(character)); } bool isPrintable(utf8 character) { if (!isAscii(character)) { return false; } return (character >= ' ' && character <= '~'); } bool isPrintable(FudUtf8 character) { return impl::isAsciiPredicate(character, isPrintable); } bool isPunctuation(char character) { return isPunctuation(static_cast(character)); } bool isPunctuation(utf8 character) { if (!isAscii(character)) { return false; } return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } bool isPunctuation(FudUtf8 character) { return impl::isAsciiPredicate(character, isPunctuation); } } // namespace classify uint8_t charToLower(uint8_t character) { if (classify::isUppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - upperA) + lowerA; } return character; } FudUtf8 utf8ToLower(FudUtf8 character) { static_cast( character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToLower(static_cast(ascii.asChar()))}; })); return character; } uint8_t charToUpper(uint8_t character) { if (classify::isLowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - lowerA) + upperA; } return character; } FudUtf8 utf8ToUpper(FudUtf8 character) { static_cast( character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToUpper(static_cast(ascii.asChar()))}; })); return character; } } // namespace fud