/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fud_utf8.hpp" #include "fud_string.hpp" namespace fud { FudUtf8 FudUtf8::from(const String& fudString, size_t index) noexcept { if (!fudString.valid()) { return invalidAscii(); } return from(StringView{fudString}, index); } FudUtf8 FudUtf8::from(StringView view, size_t index) noexcept { auto viewLocal{view}; auto len = viewLocal.length(); const auto* vData = viewLocal.data(); if (vData == nullptr) { return invalidAscii(); } FudUtf8 localChar{Ascii{vData[index]}}; if (localChar.valid()) { return localChar; } if (index + 1 < len) { localChar.m_variant = Utf82Byte{vData[index], vData[index + 1]}; } if (localChar.valid()) { return localChar; } if (index + 2 < len) { localChar.m_variant = Utf83Byte{vData[index], vData[index + 1], vData[index + 2]}; } if (localChar.valid()) { return localChar; } if (index + 3 < len) { localChar.m_variant = Utf84Byte{vData[index], vData[index + 1], vData[index + 2], vData[index + 3]}; } if (localChar.valid()) { return localChar; } return invalidAscii(); } bool charIsAscii(char character) { return static_cast(character & ~ASCII_MASK) == 0; } bool utf8IsAscii(FudUtf8 character) { return character.getType() == Utf8Type::Ascii && character.valid(); } namespace impl { template bool isAsciiPredicate(FudUtf8 character, Predicate&& predicate) { auto maybeAscii = character.getAscii(); if (!maybeAscii.has_value()) { return false; } auto asciiChar = *maybeAscii; return std::forward(predicate)(asciiChar.asChar()); } } // namespace impl bool charIsAlphanumeric(char character) { if (!charIsAscii(character)) { return false; } if (charIsAlpha(character)) { return true; } return charIsDigit(character); } bool utf8IsAlphanumeric(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsAlphanumeric); } bool charIsAlpha(char character) { if (!charIsAscii(character)) { return false; } if (charIsUppercase(character)) { return true; } return charIsLowercase(character); } bool utf8IsAlpha(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsAlpha); } bool charIsLowercase(char character) { if (!charIsAscii(character)) { return false; } return 'a' <= character && character <= 'z'; } bool utf8IsLowercase(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsLowercase); } bool charIsUppercase(char character) { if (!charIsAscii(character)) { return false; } return 'A' <= character && character <= 'Z'; } bool utf8IsUppercase(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsUppercase); } bool charIsDigit(char character) { if (!charIsAscii(character)) { return false; } return '0' <= character && character <= '9'; } bool utf8IsDigit(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsDigit); } bool charIsHexDigit(char character) { if (!charIsAscii(character)) { return false; } return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || ('A' <= character && character <= 'F'); } bool utf8IsHexDigit(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsHexDigit); } bool charIsControl(char character) { if (!charIsAscii(character)) { return false; } constexpr char maxControlChar = 0x1F; constexpr const char deleteChar = 0x7F; return ((static_cast(character) <= maxControlChar)) || character == deleteChar; } bool utf8IsControl(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsControl); } bool charIsGraphical(char character) { if (!charIsAscii(character)) { return false; } return charIsAlphanumeric(character) || charIsPunctuation(character); } bool utf8IsGraphical(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsGraphical); } bool charIsSpace(char character) { if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; } bool utf8IsSpace(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsSpace); } bool charIsBlank(char character) { if (!charIsAscii(character)) { return false; } return character == ' ' || character == '\t'; } bool utf8IsBlank(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsBlank); } bool charIsPrintable(char character) { if (!charIsAscii(character)) { return false; } return (character >= ' ' && character <= '~'); } bool utf8IsPrintable(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsPrintable); } bool charIsPunctuation(char character) { if (!charIsAscii(character)) { return false; } return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); } bool utf8IsPunctuation(FudUtf8 character) { return impl::isAsciiPredicate(character, charIsPunctuation); } uint8_t charToLower(uint8_t character) { if (charIsUppercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - upperA) + lowerA; } return character; } FudUtf8 utf8ToLower(FudUtf8 character) { static_cast(character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToLower(static_cast(ascii.asChar()))}; })); return character; } uint8_t charToUpper(uint8_t character) { if (charIsLowercase(static_cast(character))) { constexpr uint8_t lowerA = 'a'; constexpr uint8_t upperA = 'A'; return static_cast(character - lowerA) + upperA; } return character; } FudUtf8 utf8ToUpper(FudUtf8 character) { static_cast(character.transformAscii([](Ascii& ascii) { ascii = Ascii{charToUpper(static_cast(ascii.asChar()))}; })); return character; } } // namespace fud