From 5cc7cbc3704ec255eb5d0ac53b2cc0fcb1221d63 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Wed, 23 Oct 2024 13:21:10 -0500 Subject: String conversion and parsing format spec. --- include/fud_format.hpp | 131 ++++++++---- include/fud_option.hpp | 45 ++-- include/fud_string_convert.hpp | 456 +++++++++++++++++++++++++++++++++++++++++ include/fud_string_view.hpp | 41 ++-- include/fud_utf8.hpp | 116 +++++++---- 5 files changed, 674 insertions(+), 115 deletions(-) create mode 100644 include/fud_string_convert.hpp (limited to 'include') diff --git a/include/fud_format.hpp b/include/fud_format.hpp index ea32bd8..2576eca 100644 --- a/include/fud_format.hpp +++ b/include/fud_format.hpp @@ -22,24 +22,30 @@ #include "fud_result.hpp" #include "fud_status.hpp" #include "fud_string_view.hpp" +#include "fud_option.hpp" #include "fud_utf8.hpp" #include -#include #include +#include namespace fud { struct FormatAlign { - enum class Value : uint8_t + enum class Value : utf8 { - Left, - Right, - Center + Left = '<', + Right = '>', + Center = '^', + Default = std::numeric_limits::max() }; - constexpr static std::optional from(utf8 letter) + constexpr static FormatAlign makeDefault() noexcept { + return {Value::Default}; + } + + constexpr static Option from(utf8 letter) { FormatAlign formatAlign; switch (letter) { @@ -53,12 +59,16 @@ struct FormatAlign formatAlign.value = Value::Center; break; default: - return std::nullopt; + return NullOpt; } return formatAlign; } + Value operator()() const { + return value; + } + Value value; }; @@ -66,37 +76,50 @@ struct FormatFill { FormatAlign align; utf8 fill; - constexpr static Result, FudStatus> parse(StringView formatView) { - // "{:A, FudStatus>; - if (formatView.length() < 3) { - return RetType::okay(std::nullopt); + constexpr static FormatFill make() noexcept { + return FormatFill {FormatAlign::makeDefault(), ' '}; + } + + constexpr static Result, FudStatus> make(StringView formatView, size_t& length) noexcept { + using RetType = Result, FudStatus>; + if (formatView.length() < 1) { + return RetType::okay(NullOpt); } const auto* data = formatView.data(); - if (data[0] != 'A') { - return RetType::okay(std::nullopt); + auto align1 = FormatAlign::from(data[0]); + decltype(align1) align2 = NullOpt; + if (formatView.length() > 1) { + align2 = FormatAlign::from(data[1]); } - auto align = FormatAlign::from(data[1]); - if (!align.has_value()) { - return FudStatus::FormatInvalid; + if (align2.hasValue()) { + length = 2; + auto fill = data[0]; + if (not Ascii::valid(fill)) { + return FudStatus::Utf8Invalid; + } + if (fill == '{' || fill == '}') { + return FudStatus::FormatInvalid; + } + return RetType::okay(FormatFill{std::move(align2).value(), data[0]}); } - auto fill = data[2]; - if (!Ascii::valid(fill)) { - return FudStatus::Utf8Invalid; + if (align1.hasValue()) { + length = 1; + return RetType::okay(FormatFill{std::move(align1).value(), ' '}); } - return RetType::okay(FormatFill{*align, fill}); + return RetType::okay(NullOpt); } }; enum class FormatSign : uint8_t { - Plus, - Minus, - Space + Plus = '+', + Minus = '-', + Space = ' ', + Default = std::numeric_limits::max() }; enum class FormatStringType : uint8_t @@ -155,6 +178,7 @@ enum class FormatPointerType : uint8_t HexUpper }; +/* using FormatType = std::variant< // break std::monostate, FormatStringType, @@ -163,33 +187,64 @@ using FormatType = std::variant< // break FormatBoolType, FormatFloatingType, FormatPointerType>; +*/ + +enum class FormatType : utf8 { + Unspecified = '\0', + String = 's', + Escaped = '?', + BinaryLower = 'b', + BinaryUpper = 'B', + Character = 'c', + Decimal = 'd', + Octal = 'o', + HexLower = 'x', + HexUpper = 'X', + FloatHexLower = 'a', + FloatHexUpper = 'A', + ScientificLower = 'e', + ScientificUpper = 'E', + FixedLower = 'f', + FixedUpper = 'F', + GeneralLower = 'g', + GeneralUpper = 'G', +}; struct FormatSpec; using FormatSpecResult = Result; struct FormatSpec { - size_t width; - size_t precision; - FormatFill fill; - FormatSign formatSign; + static constexpr uint32_t widthUnspecified = std::numeric_limits::max(); + static constexpr uint32_t precisionUnspecified = std::numeric_limits::max(); + static constexpr uint16_t positionUnspecified = std::numeric_limits::max(); + static constexpr utf8 openBracket = '{'; + static constexpr utf8 closeBracket = '}'; + static constexpr utf8 formatTypeUnspecified = std::numeric_limits::max(); + static constexpr utf8 localeChar = 'L'; + + uint32_t width{widthUnspecified}; + + uint32_t precision{precisionUnspecified}; + + uint16_t position{positionUnspecified}; + + FormatFill fill{FormatFill::make()}; - FormatType formatType; + FormatSign formatSign{FormatSign::Default}; - bool hasWidth; - bool takesWidth; + FormatType formatType{}; - bool hasPrecision; - bool takesPrecision; + bool takesWidth{false}; - bool hasFill; + bool takesPrecision{false}; - bool hasFormatSign; + bool alternateForm{false}; - bool alternateForm; + bool leadingZero{false}; - bool leadingZero; + bool hasLocale{false}; - static Result make(StringView& formatView, size_t specIndex); + static Result make(StringView formatView, size_t& specLength); }; } // namespace fud diff --git a/include/fud_option.hpp b/include/fud_option.hpp index ca3954f..931ef82 100644 --- a/include/fud_option.hpp +++ b/include/fud_option.hpp @@ -103,16 +103,16 @@ class Option { } } - constexpr Option(const Option& rhs) noexcept : m_engaged(rhs.m_engaged), m_data(rhs.m_data) + constexpr Option(const Option& rhs) noexcept : m_data(rhs.m_data), m_engaged(rhs.m_engaged) { } - constexpr Option(Option&& rhs) noexcept : m_engaged(rhs.m_engaged), m_data(std::move(rhs.m_data)) + constexpr Option(Option&& rhs) noexcept : m_data(std::move(rhs.m_data)), m_engaged(rhs.m_engaged) { rhs.cleanup(); } - ~Option() noexcept + constexpr ~Option() noexcept { destroy(); } @@ -142,6 +142,11 @@ class Option { return m_engaged; } + [[nodiscard]] bool isNone() const + { + return !m_engaged; + } + operator bool() const { return hasValue(); } @@ -170,7 +175,23 @@ class Option { { fudAssert(m_engaged); static_assert(!IsRef); - return *reinterpret_cast(m_data.data()); + return std::move(*reinterpret_cast(m_data.data())); + } + + [[nodiscard]] constexpr const ValueType& valueOr(const ValueType& alternative) const& + { + if (m_engaged) { + return value(); + } + return alternative; + } + + [[nodiscard]] constexpr ValueType&& valueOr(ValueType&& alternative) const&& + { + if (m_engaged) { + return value(); + } + return std::move(alternative); } template @@ -203,27 +224,11 @@ class Option { m_data.clear(); } - // alignas(maxAlign) Array priv_m_data; - alignas(alignof(T)) option_detail::DataArray m_data{}; bool m_engaged; }; -namespace test { - -void testOption() -{ - Option intOpt; - static_cast(intOpt); - Option intRefNull; - static_cast(intRefNull); - int value; - Option intRefValue{value}; -} - -} // namespace test - } // namespace fud #endif diff --git a/include/fud_string_convert.hpp b/include/fud_string_convert.hpp new file mode 100644 index 0000000..597c6a9 --- /dev/null +++ b/include/fud_string_convert.hpp @@ -0,0 +1,456 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_STRING_CONVERT_HPP +#define FUD_STRING_CONVERT_HPP + +#include "fud_algorithm.hpp" +#include "fud_option.hpp" +#include "fud_result.hpp" +#include "fud_status.hpp" +#include "fud_string.hpp" +#include "fud_string_view.hpp" + +#include +#include + +namespace fud { + +enum class Radix : uint8_t +{ + Binary = 2, + Octal = 8, + Decimal = 10, + Hexadecimal = 16, +}; + +constexpr uint8_t RadixMax = 36; + +template +struct ConvertValue { + size_t nextIndex; + T value; +}; + +template +using StringConvertResult = Result, FudStatus>; + +template +StringConvertResult fromString(StringView inputView, Option specifiedRadixOption = NullOpt); + +template +StringConvertResult fromString(StringView inputView, Radix specifiedRadixOption); + +template +StringConvertResult fromString(const String& inputView, Option specifiedRadixOption = NullOpt); + +namespace impl { +constexpr Array AsciiLookup{ + {-1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + -2, -2, -2, -2, -2, -2, -2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, -2, -2, -2, -2, -2, -2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3}}; + +// NOLINTBEGIN(readability-magic-numbers) +static_assert(AsciiLookup[static_cast('0')] == 0); +static_assert(AsciiLookup[static_cast('1')] == 1); +static_assert(AsciiLookup[static_cast('2')] == 2); +static_assert(AsciiLookup[static_cast('3')] == 3); +static_assert(AsciiLookup[static_cast('4')] == 4); +static_assert(AsciiLookup[static_cast('5')] == 5); +static_assert(AsciiLookup[static_cast('6')] == 6); +static_assert(AsciiLookup[static_cast('7')] == 7); +static_assert(AsciiLookup[static_cast('8')] == 8); +static_assert(AsciiLookup[static_cast('9')] == 9); +static_assert(AsciiLookup[static_cast('a')] == 10); +static_assert(AsciiLookup[static_cast('b')] == 11); +static_assert(AsciiLookup[static_cast('c')] == 12); +static_assert(AsciiLookup[static_cast('d')] == 13); +static_assert(AsciiLookup[static_cast('e')] == 14); +static_assert(AsciiLookup[static_cast('f')] == 15); +static_assert(AsciiLookup[static_cast('A')] == 10); +static_assert(AsciiLookup[static_cast('B')] == 11); +static_assert(AsciiLookup[static_cast('C')] == 12); +static_assert(AsciiLookup[static_cast('D')] == 13); +static_assert(AsciiLookup[static_cast('E')] == 14); +static_assert(AsciiLookup[static_cast('F')] == 15); +static_assert(AsciiLookup[127] == -2); +static_assert(AsciiLookup[128] == -3); +// NOLINTEND(readability-magic-numbers) + +FudStatus checkPlusSigned(StringView& view, size_t& skipIndex); + +Result determineRadix(StringView input, size_t& index); + +Result checkNegative(StringView& view, size_t& skipIndex); + +Result getRadix(StringView& view, size_t& skipIndex, Option specifiedRadixOption); + +template +StringConvertResult unsignedFromString(StringView nextView, size_t skipIndex, Option specifiedRadixOption) +{ + static_assert(std::is_unsigned_v && std::is_integral_v); + auto status = checkPlusSigned(nextView, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::ArgumentInvalid; + } + + auto radixResult = impl::getRadix(nextView, skipIndex, specifiedRadixOption); + if (radixResult.isError()) { + return radixResult.takeError(); + } + auto radix = radixResult.takeOkay(); + + T num = 0; + size_t digitIndex = 0; + + while (digitIndex < nextView.length()) { + auto digitResult = impl::AsciiLookup[nextView.data()[digitIndex]]; + if (digitResult >= radix || digitResult < 0) { + break; + } + + auto digit = static_cast(digitResult); + if (std::numeric_limits::max() / radix < num) { + return FudStatus::RangeError; + } + num *= radix; + if (std::numeric_limits::max() - digit < num) { + return FudStatus::RangeError; + } + num += digit; + digitIndex++; + } + if (digitIndex < 1) { + return FudStatus::ArgumentInvalid; + } + + return ConvertValue{skipIndex + digitIndex, num}; +} + +template +FudStatus signedPositiveFromString(StringView view, uint8_t radix, size_t& digitIndex, T& num) +{ + digitIndex = 0; + while (digitIndex < view.length()) { + int8_t digitResult = impl::AsciiLookup[view.data()[digitIndex]]; + if (digitResult >= radix || digitResult < 0) { + break; + } + + auto digit = static_cast(digitResult); + if (std::numeric_limits::max() / radix < num) { + return FudStatus::RangeError; + } + num = static_cast(num * radix); + if (std::numeric_limits::max() - digit < num) { + return FudStatus::RangeError; + } + num = static_cast(num + digit); + digitIndex++; + } + + return FudStatus::Success; +} + +template +FudStatus signedNegativeFromString(StringView view, uint8_t radix, size_t& digitIndex, T& num) +{ + digitIndex = 0; + while (digitIndex < view.length()) { + int8_t digitResult = impl::AsciiLookup[view.data()[digitIndex]]; + if (digitResult >= radix || digitResult < 0) { + break; + } + + auto digit = static_cast(digitResult); + if ((std::numeric_limits::min() / radix > num)) { + return FudStatus::RangeError; + } + num = static_cast(num * radix); + if (std::numeric_limits::min() + digit > num) { + return FudStatus::RangeError; + } + num = static_cast(num - digit); + digitIndex++; + } + + return FudStatus::Success; +} + +template +StringConvertResult signedFromString(StringView nextView, size_t skipIndex, Option specifiedRadixOption) +{ + static_assert(std::is_signed_v && std::is_integral_v); + auto status = impl::checkPlusSigned(nextView, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::ArgumentInvalid; + } + + auto radixResult = impl::getRadix(nextView, skipIndex, specifiedRadixOption); + if (radixResult.isError()) { + return radixResult.takeError(); + } + auto radix = radixResult.takeOkay(); + + T num = 0; + size_t digitIndex = 0; + + auto isNegativeResult = checkNegative(nextView, skipIndex); + if (isNegativeResult.isError()) { + return isNegativeResult.takeError(); + } + const auto isNegative = isNegativeResult.takeOkay(); + + if (isNegative) { + status = signedNegativeFromString(nextView, radix, digitIndex, num); + } else { + status = signedPositiveFromString(nextView, radix, digitIndex, num); + } + + if (status != FudStatus::Success) { + return status; + } + + if (digitIndex < 1) { + return FudStatus::ArgumentInvalid; + } + + return ConvertValue{skipIndex + digitIndex, num}; +} + +template +bool isNanOrInf(T& num, StringView& view, T& sign, size_t& digitIndex) +{ + if (view.length() >= 3) { + Array letters{{view.data()[0], view.data()[1], view.data()[2]}}; + forEach(letters.span(), charToLower); + if (letters[0] == 'i' && letters[1] == 'n' && letters[2] == 'f') { + num = sign * std::numeric_limits::infinity(); + digitIndex = 3; + return true; + } + if (letters[0] == 'n' && letters[1] == 'a' && letters[2] == 'n') { + num = std::numeric_limits::quiet_NaN(); + digitIndex = 3; + return true; + } + } + return false; +} + +template +FudStatus getWhole( + const StringView view, + size_t& digitIndex, + T& num, + T sign, + uint8_t radix, + bool& foundDecimal, + bool& foundExponent) +{ + while (digitIndex < view.length()) { + auto nextChar = view.data()[digitIndex]; + if (nextChar == '.') { + foundDecimal = true; + digitIndex++; + break; + } + + if (radix == static_cast(Radix::Decimal) && (nextChar == 'e' || nextChar == 'E')) { + foundExponent = true; + digitIndex++; + break; + } + + auto digitResult = impl::AsciiLookup[nextChar]; + if (digitResult >= radix) { + return FudStatus::ArgumentInvalid; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult) * sign; + num *= static_cast(radix); + + num += digit; + digitIndex++; + } + return FudStatus::Success; +} + +template +FudStatus getExponent(StringView view, size_t& digitIndex, T& num, uint8_t radix) +{ + StringView tempView{view.length() - digitIndex, view.data() + digitIndex}; + size_t exponentLength{}; + auto convertResult = signedFromString(tempView, exponentLength, static_cast(Radix::Decimal)); + if (convertResult.isError()) { + return convertResult.takeError(); + } + auto convertValue = convertResult.takeOkay(); + digitIndex += convertValue.nextIndex + exponentLength; + num = num * std::pow(static_cast(radix), static_cast(convertValue.value)); + return FudStatus::Success; +} + +template +FudStatus getFraction(const StringView view, size_t& digitIndex, T& num, T sign, uint8_t radix, bool& foundExponent) +{ + auto radixDiv = static_cast(1) / static_cast(radix); + while (digitIndex < view.length()) { + auto nextChar = view.data()[digitIndex]; + if (radix == static_cast(Radix::Decimal) && (nextChar == 'e' || nextChar == 'E')) { + foundExponent = true; + digitIndex++; + break; + } + + auto digitResult = impl::AsciiLookup[nextChar]; + if (digitResult >= radix) { + return FudStatus::ArgumentInvalid; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult) * sign; + num += digit * radixDiv; + radixDiv /= static_cast(radix); + digitIndex++; + } + return FudStatus::Success; +} + +template +StringConvertResult floatFromString(StringView nextView, size_t skipIndex, Option specifiedRadixOption) +{ + static_assert(std::is_floating_point_v); + if (nextView.length() < 1) { + return FudStatus::ArgumentInvalid; + } + + auto isNegativeResult = checkNegative(nextView, skipIndex); + if (isNegativeResult.isError()) { + return isNegativeResult.takeError(); + } + const auto isNegative = isNegativeResult.takeOkay(); + + if (!isNegative) { + auto status = checkPlusSigned(nextView, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::ArgumentInvalid; + } + } + T sign = isNegative ? -1.0 : 1.0; + + T num = 0; + size_t digitIndex = 0; + + auto retSuccess = [&]() { return ConvertValue{skipIndex + digitIndex, num}; }; + + if (impl::isNanOrInf(num, nextView, sign, digitIndex)) { + return retSuccess(); + } + + auto radixResult = impl::getRadix(nextView, skipIndex, specifiedRadixOption); + if (radixResult.isError()) { + return radixResult.takeError(); + } + auto radix = radixResult.takeOkay(); + + bool foundDecimal = false; + bool foundExponent = false; + auto status = getWhole(nextView, digitIndex, num, sign, radix, foundDecimal, foundExponent); + + if (status == FudStatus::Success && foundExponent) { + status = getExponent(nextView, digitIndex, num, radix); + } + + if (status != FudStatus::Success) { + return status; + } + + if (!foundDecimal) { + if (digitIndex < 1) { + return FudStatus::ArgumentInvalid; + } + + return retSuccess(); + } + + status = getFraction(nextView, digitIndex, num, sign, radix, foundExponent); + + if (foundExponent) { + status = getExponent(nextView, digitIndex, num, radix); + if (status != FudStatus::Success) { + return status; + } + } + + if (digitIndex < 1) { + return FudStatus::ArgumentInvalid; + } + + if (std::isinf(num) || std::isnan(num)) // isnan is dubious here - likely unreachable + { + return FudStatus::RangeError; + } + + return retSuccess(); +} + +} // namespace impl + +template +StringConvertResult fromString(StringView inputView, Radix specifiedRadixOption) +{ + return fromString(inputView, static_cast(specifiedRadixOption)); +} + +template +StringConvertResult fromString(StringView inputView, Option specifiedRadixOption) +{ + if (inputView.data() == nullptr) { + return FudStatus::NullPointer; + } + + StringView nextView{inputView}; + auto skipResult = nextView.skipWhitespace(); + if (skipResult.isError()) { + return skipResult.takeError(); + } + size_t skipIndex = skipResult.takeOkay(); + + if constexpr (std::is_unsigned_v && std::is_integral_v) { + return impl::unsignedFromString(nextView, skipIndex, specifiedRadixOption); + } else if constexpr (std::is_signed_v && std::is_integral_v) { + return impl::signedFromString(nextView, skipIndex, specifiedRadixOption); + } else if constexpr (std::is_floating_point_v) { + return impl::floatFromString(nextView, skipIndex, specifiedRadixOption); + } else { + return FudStatus::NotImplemented; + } +} + +} // namespace fud + +#endif diff --git a/include/fud_string_view.hpp b/include/fud_string_view.hpp index 8a47ae5..0852645 100644 --- a/include/fud_string_view.hpp +++ b/include/fud_string_view.hpp @@ -20,6 +20,8 @@ #include "fud_status.hpp" #include "fud_utf8.hpp" +#include "fud_config.hpp" +#include "fud_assert.hpp" #include @@ -63,6 +65,25 @@ struct StringView { return m_data; } + constexpr const utf8& operator[](size_t index) const + { + if constexpr (fudBoundsChecking) { + fudAssert(m_data != nullptr); + fudAssert(index < m_length); + } + return m_data[index]; + } + + constexpr const utf8* begin() const noexcept + { + return m_data; + } + + constexpr const utf8* end() const noexcept + { + return m_data + m_length; + } + [[nodiscard]] bool nullTerminated() const; [[nodiscard]] bool utf8Valid() const; @@ -79,26 +100,6 @@ struct StringView { void advanceUnsafe(size_t size); - FudStatus toUint8(uint8_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toUint16(uint16_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toUint32(uint32_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toUint64(uint64_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toInt8(int8_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toInt16(int16_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toInt32(int32_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toInt64(int64_t& number, uint8_t specifiedRadix, size_t& strLen) const; - - FudStatus toFloat(float& number, size_t& strLen) const; - - FudStatus toDouble(double& number, size_t& strLen) const; - size_t m_length{0}; const utf8* m_data{nullptr}; diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 3b1a6b7..50e50aa 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -19,8 +19,8 @@ #define FUD_UTF8_HPP #include "fud_array.hpp" -#include "fud_unique_array.hpp" #include "fud_c_string.hpp" +#include "fud_unique_array.hpp" #include #include @@ -28,8 +28,6 @@ namespace fud { - - using utf8 = unsigned char; class String; @@ -113,8 +111,7 @@ struct Utf82Byte { { } - __attribute__((nonnull)) - constexpr Utf82Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf82Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 2); if (length < 2) { @@ -159,8 +156,7 @@ struct Utf83Byte { { } - __attribute__((nonnull)) - constexpr Utf83Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf83Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 3); if (length < 3) { @@ -213,8 +209,7 @@ struct Utf84Byte { { } - __attribute__((nonnull)) - constexpr Utf84Byte(const char* letterStr) noexcept : characters{} + __attribute__((nonnull)) constexpr Utf84Byte(const char* letterStr) noexcept : characters{} { auto length = cStringLength(letterStr, 4); if (length < 4) { @@ -327,7 +322,8 @@ struct FudUtf8 { return unicode; } - static constexpr FudUtf8 make(Utf8Variant utf8Variant) { + static constexpr FudUtf8 make(Utf8Variant utf8Variant) + { FudUtf8 unicode{}; unicode.m_variant = utf8Variant; if (!std::visit([](auto arg) { return arg.valid(); }, utf8Variant)) { @@ -513,82 +509,128 @@ struct FudUtf8 { } }; +namespace classify { + +using CharPredicate = bool (*)(char); +using Utf8Predicate = bool (*)(utf8); +using FudUtf8Predicate = bool (*)(FudUtf8); + /** \brief Checks if a character is ascii. */ -[[nodiscard]] bool charIsAscii(char character); +[[nodiscard]] bool isAscii(char character); -[[nodiscard]] bool utf8IsAscii(FudUtf8 character); +[[nodiscard]] bool isAscii(utf8 character); + +[[nodiscard]] bool isAscii(FudUtf8 character); /** \brief Checks if a character is alphanumeric. */ -[[nodiscard]] bool charIsAlphanumeric(char character); +[[nodiscard]] bool isAlphanumeric(char character); /** \brief Checks if a character is alphanumeric. */ -[[nodiscard]] bool utf8IsAlphanumeric(FudUtf8 character); +[[nodiscard]] bool isAlphanumeric(utf8 character); + +/** \brief Checks if a character is alphanumeric. */ +[[nodiscard]] bool isAlphanumeric(FudUtf8 character); + +/** \brief Checks if a character is alphabetic. */ +[[nodiscard]] bool isAlpha(char character); /** \brief Checks if a character is alphabetic. */ -[[nodiscard]] bool charIsAlpha(char character); +[[nodiscard]] bool isAlpha(utf8 character); /** \brief Checks if a character is alphabetic. */ -[[nodiscard]] bool utf8IsAlpha(FudUtf8 character); +[[nodiscard]] bool isAlpha(FudUtf8 character); + +/** \brief Checks if a character is lowercase. */ +[[nodiscard]] bool isLowercase(char character); /** \brief Checks if a character is lowercase. */ -[[nodiscard]] bool charIsLowercase(char character); +[[nodiscard]] bool isLowercase(utf8 character); /** \brief Checks if a character is lowercase. */ -[[nodiscard]] bool utf8IsLowercase(FudUtf8 character); +[[nodiscard]] bool isLowercase(FudUtf8 character); + +/** \brief Checks if a character is uppercase. */ +[[nodiscard]] bool isUppercase(char character); -/** \brief Checks if a character is an uppercase character. */ -[[nodiscard]] bool charIsUppercase(char character); +/** \brief Checks if a character is uppercase. */ +[[nodiscard]] bool isUppercase(utf8 character); /** \brief Checks if a character is uppercase. */ -[[nodiscard]] bool utf8IsUppercase(FudUtf8 character); +[[nodiscard]] bool isUppercase(FudUtf8 character); /** \brief Checks if a character is a digit. */ -[[nodiscard]] bool charIsDigit(char character); +[[nodiscard]] bool isDigit(char character); /** \brief Checks if a character is a digit. */ -[[nodiscard]] bool utf8IsDigit(FudUtf8 character); +[[nodiscard]] bool isDigit(utf8 character); + +/** \brief Checks if a character is a digit. */ +[[nodiscard]] bool isDigit(FudUtf8 character); /** \brief Checks if a character is a hexadecimal character. */ -[[nodiscard]] bool charIsHexDigit(char character); +[[nodiscard]] bool isHexDigit(char character); + +/** \brief Checks if a character is a hexadecimal character. */ +[[nodiscard]] bool isHexDigit(utf8 character); /** \brief Checks if a character is a hexadecimal digit. */ -[[nodiscard]] bool utf8IsHexDigit(FudUtf8 character); +[[nodiscard]] bool isHexDigit(FudUtf8 character); /** \brief Checks if a character is a control character. */ -[[nodiscard]] bool charIsControl(char character); +[[nodiscard]] bool isControl(char character); /** \brief Checks if a character is a control character. */ -[[nodiscard]] bool utf8IsControl(FudUtf8 character); +[[nodiscard]] bool isControl(utf8 character); + +/** \brief Checks if a character is a control character. */ +[[nodiscard]] bool isControl(FudUtf8 character); + +/** \brief Checks if a character is a graphical character. */ +[[nodiscard]] bool isGraphical(char character); /** \brief Checks if a character is a graphical character. */ -[[nodiscard]] bool charIsGraphical(char character); +[[nodiscard]] bool isGraphical(utf8 character); /** \brief Checks if a character is a graphical character. */ -[[nodiscard]] bool utf8IsGraphical(FudUtf8 character); +[[nodiscard]] bool isGraphical(FudUtf8 character); /** \brief Checks if a character is a space character. */ -[[nodiscard]] bool charIsSpace(char character); +[[nodiscard]] bool isSpace(char character); /** \brief Checks if a character is a space character. */ -[[nodiscard]] bool utf8IsSpace(FudUtf8 character); +[[nodiscard]] bool isSpace(utf8 character); + +/** \brief Checks if a character is a space character. */ +[[nodiscard]] bool isSpace(FudUtf8 character); + +/** \brief Checks if a character is a blank character. */ +[[nodiscard]] bool isBlank(char character); /** \brief Checks if a character is a blank character. */ -[[nodiscard]] bool charIsBlank(char character); +[[nodiscard]] bool isBlank(utf8 character); /** \brief Checks if a character is a blank character. */ -[[nodiscard]] bool utf8IsBlank(FudUtf8 character); +[[nodiscard]] bool isBlank(FudUtf8 character); + +/** \brief Checks if a character is a printable character. */ +[[nodiscard]] bool isPrintable(char character); /** \brief Checks if a character is a printable character. */ -[[nodiscard]] bool charIsPrintable(char character); +[[nodiscard]] bool isPrintable(utf8 character); /** \brief Checks if a character is a printable character. */ -[[nodiscard]] bool utf8IsPrintable(FudUtf8 character); +[[nodiscard]] bool isPrintable(FudUtf8 character); + +/** \brief Checks if a character is a punctuation character. */ +[[nodiscard]] bool isPunctuation(char character); /** \brief Checks if a character is a punctuation character. */ -[[nodiscard]] bool charIsPunctuation(char character); +[[nodiscard]] bool isPunctuation(utf8 character); /** \brief Checks if a character is a punctuation character. */ -[[nodiscard]] bool utf8IsPunctuation(FudUtf8 character); +[[nodiscard]] bool isPunctuation(FudUtf8 character); + +} // namespace classify /** \brief Converts character to lowercase if valid. */ uint8_t charToLower(uint8_t character); -- cgit v1.2.3