diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/array.hpp | 113 | ||||
-rw-r--r-- | include/c_file.hpp | 103 | ||||
-rw-r--r-- | include/fud_type_traits.hpp | 80 | ||||
-rw-r--r-- | include/libfud.hpp | 30 | ||||
-rw-r--r-- | include/memory.hpp | 140 | ||||
-rw-r--r-- | include/result.hpp | 83 | ||||
-rw-r--r-- | include/status.hpp | 106 | ||||
-rw-r--r-- | include/string.hpp | 153 | ||||
-rw-r--r-- | include/unique_array.hpp | 68 | ||||
-rw-r--r-- | include/utf8.hpp | 557 | ||||
-rw-r--r-- | include/utf8_iterator.hpp | 39 |
11 files changed, 1472 insertions, 0 deletions
diff --git a/include/array.hpp b/include/array.hpp new file mode 100644 index 0000000..9de6c0a --- /dev/null +++ b/include/array.hpp @@ -0,0 +1,113 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef EXT_ARRAY_HPP +#define EXT_ARRAY_HPP + +#include "memory.hpp" + +#include <cstdlib> + +namespace fud { + +template <typename T, size_t Size> +struct Array { + static_assert(Size > 0); + using ValueType = T; + + T m_data[Size]; // NOLINT(cppcoreguidelines-avoid-c-arrays) + + constexpr static Array constFill(T value) + { + Array arr{}; + setMemory(arr, value); + return arr; + } + + [[nodiscard]] constexpr size_t size() const + { + return Size; + } + + constexpr T& front() + { + return m_data[0]; + } + + constexpr const T& front() const + { + return m_data[0]; + } + + constexpr T& back() + { + return m_data[Size - 1]; + } + + constexpr const T& back() const + { + return m_data[Size - 1]; + } + + constexpr T* data() noexcept + { + return m_data; + } + + constexpr const T* data() const noexcept + { + return m_data; + } + + constexpr T* begin() noexcept + { + return m_data; + } + + constexpr const T* begin() const noexcept + { + return m_data; + } + + constexpr T* end() noexcept + { + return m_data + Size; + } + + constexpr const T* end() const noexcept + { + return m_data + Size; + } + + constexpr T& operator[](size_t index) + { + return m_data[index]; + } + + constexpr const T& operator[](size_t index) const + { + return m_data[index]; + } + + constexpr bool operator==(const Array<T, Size>&) const noexcept = default; + + constexpr auto operator<=>(const Array<T, Size>& other) const noexcept = default; +}; + +} // namespace ext_lib + +#endif diff --git a/include/c_file.hpp b/include/c_file.hpp new file mode 100644 index 0000000..0f43e08 --- /dev/null +++ b/include/c_file.hpp @@ -0,0 +1,103 @@ +/* + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_C_FILE_HPP +#define FUD_C_FILE_HPP + +#include "result.hpp" + +#include <cstdint> +#include <string> + +namespace fud { + +enum class CFileMode : uint8_t +{ + ReadOnly, + ReadWrite, + WriteTruncate, + ReadWriteTruncate, + WriteAppend, + ReadWriteAppend, +}; + +constexpr const char* CBinaryFileModeFromFlags(CFileMode mode) +{ + switch (mode) { + case CFileMode::ReadOnly: + return "rb"; + case CFileMode::ReadWrite: + return "r+b"; + case CFileMode::WriteTruncate: + return "wb"; + case CFileMode::ReadWriteTruncate: + return "w+b"; + case CFileMode::WriteAppend: + return "ab"; + case CFileMode::ReadWriteAppend: + return "a+b"; + default: + return ""; + } +} + +constexpr const char* CTextFileModeFromFlags(CFileMode mode) +{ + switch (mode) { + case CFileMode::ReadOnly: + return "r"; + case CFileMode::ReadWrite: + return "r+"; + case CFileMode::WriteTruncate: + return "w"; + case CFileMode::ReadWriteTruncate: + return "w+"; + case CFileMode::WriteAppend: + return "a"; + case CFileMode::ReadWriteAppend: + return "a+"; + default: + return ""; + } +} + +enum class FileResult +{ + Success, + Error, +}; + +class CBinaryFile { + public: + CBinaryFile(const std::string& filename, CFileMode mode); + CBinaryFile(const std::string& filename, CFileMode mode, const std::string& extraFlags); + ~CBinaryFile(); + FileResult open(); + void close(); + const FILE* file() const; + + private: + std::string m_filename; + std::string m_extraFlags{}; + std::string m_mode; + CFileMode m_modeFlags; + FILE* m_file{nullptr}; +}; + +} // namespace fud + + +#endif diff --git a/include/fud_type_traits.hpp b/include/fud_type_traits.hpp new file mode 100644 index 0000000..3fdff79 --- /dev/null +++ b/include/fud_type_traits.hpp @@ -0,0 +1,80 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_TYPE_TRAITS_HPP +#define FUD_TYPE_TRAITS_HPP + +#include <cstddef> +#include <type_traits> + +namespace fud { + +template <template <class, size_t> class Container, typename T, size_t Size> +constexpr bool hasDuplicates(Container<T, Size> const& arr) +{ + for (size_t iIdx = 1; iIdx < Size; iIdx++) { + for (size_t jIdx = 0; jIdx < iIdx; jIdx++) { + if (arr[iIdx] == arr[jIdx]) { + return true; + } + } + } + return false; +} + +template <typename T, size_t Size> +struct Array; + +template <typename T, T... Vs> +constexpr bool hasDuplicates() +{ + static_assert(sizeof...(Vs) > 0); + constexpr Array<T, sizeof...(Vs)> arr{{Vs...}}; + return hasDuplicates(arr); +} + +template <template <class, size_t> class Container, typename T, T... Vs> +constexpr bool hasDuplicates() +{ + static_assert(sizeof...(Vs) > 0); + constexpr Container<T, sizeof...(Vs)> arr{{Vs...}}; + return hasDuplicates(arr); +} + +template <typename...> +inline constexpr auto isUnique = std::true_type{}; + +template <typename T, typename... Rest> +inline constexpr auto + isUnique<T, Rest...> = std::bool_constant<(!std::is_same_v<T, Rest> && ...) && isUnique<Rest...>>{}; + + +template <template <class, size_t> class Container, class T, size_t N> +constexpr bool allDifferentFrom(Container<T, N>& data, const T& value) +{ + for (size_t i = 0; i < data.size(); ++i) { + if (data[i] == value) { + return false; + } + } + + return true; +} + +} // namespace fud + +#endif diff --git a/include/libfud.hpp b/include/libfud.hpp new file mode 100644 index 0000000..ffea195 --- /dev/null +++ b/include/libfud.hpp @@ -0,0 +1,30 @@ +/* + * LibFud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBFUD_HPP +#define LIBFUD_HPP + +#include "result.hpp" // IWYU pragma: export + +namespace fud { + +void fud(); + +} + + +#endif diff --git a/include/memory.hpp b/include/memory.hpp new file mode 100644 index 0000000..1ca6029 --- /dev/null +++ b/include/memory.hpp @@ -0,0 +1,140 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MEMORY_HPP +#define MEMORY_HPP + +#include "result.hpp" +#include "status.hpp" + +#include <cstddef> +#include <cstdint> +#include <type_traits> + +namespace fud { + +// An allocating function which returns null on failure. +using FudAllocOne = void(*)(size_t); + +// An allocating function which returns null on failure. +using FudAllocMany = void(*)(size_t, size_t); + +FudStatus copyMem(void* destination, size_t destination_size, const void* source, size_t count); + +FudStatus compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count, int* difference); + +Result<int, FudStatus> compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count); + +FudStatus setMemory(void* data, size_t dataSize, uint8_t pattern, size_t count); + +FudStatus setMemory( + void* data, + size_t collectionCount, + size_t eltOffset, + size_t eltSize, + uint8_t pattern, + size_t eltCount); + +template <template <class, size_t> class Container, typename T, size_t Size> +constexpr void setMemory(Container<T, Size>& container, const T& value) +{ + for (auto& elt : container) { + elt = value; + } +} + +template <size_t Count, typename T, typename U> +void copyMem(T& destination, const U& source) +{ + static_assert(Count <= sizeof(U)); + static_assert(Count <= sizeof(T)); + static_assert(std::is_standard_layout_v<T>); + static_assert(std::is_standard_layout_v<U>); + + for (size_t idx = 0; idx < Count; ++idx) { + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast<uint8_t*>(&destination)[idx] = reinterpret_cast<const uint8_t*>(&source)[idx]; + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + } +} + +template <typename T, typename U> +void copyMem(T& destination, const U& source) +{ + static_assert(sizeof(U) <= sizeof(T)); + + copyMem<sizeof(U)>(destination, source); +} + +template <size_t Count, typename T, typename U> +int compareMem(const T& lhs, const U& rhs) +{ + static_assert(Count <= sizeof(T)); + static_assert(Count <= sizeof(U)); + + int difference = 0; + for (size_t idx = 0; idx < Count; ++idx) { + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + difference = reinterpret_cast<const uint8_t*>(&lhs)[idx] - reinterpret_cast<const uint8_t*>(&rhs)[idx]; + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + if (difference != 0) { + break; + } + } + + return difference; +} + +template <size_t Count, typename T, typename U> +int compareMem(const T& lhs, U&& rhs) +{ + static_assert(Count <= sizeof(T)); + static_assert(Count <= sizeof(U)); + + int difference = 0; + for (size_t idx = 0; idx < Count; ++idx) { + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + difference = reinterpret_cast<const uint8_t*>(&lhs)[idx] - + reinterpret_cast<const uint8_t*>(&std::forward<U>(rhs))[idx]; + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + if (difference != 0) { + break; + } + } + + return difference; +} + +template <typename T, typename U> +int compareMem(const T& lhs, const U& rhs) +{ + static_assert(sizeof(U) == sizeof(T)); + + return compareMem<sizeof(U)>(lhs, rhs); +} + +template <typename T, typename U> +int compareMem(const T& lhs, U&& rhs) +{ + static_assert(sizeof(U) == sizeof(T)); + + return compareMem<sizeof(U)>(lhs, std::forward<U>(rhs)); +} + +} // namespace fud + +#endif diff --git a/include/result.hpp b/include/result.hpp new file mode 100644 index 0000000..158afd1 --- /dev/null +++ b/include/result.hpp @@ -0,0 +1,83 @@ +/* + * LibFud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BOOKMOUSE_RESULT_HPP +#define BOOKMOUSE_RESULT_HPP + +#include <utility> +#include <variant> + +namespace fud { + +template<typename T, typename E> +class Result { +public: + using ResultType = Result<T, E>; + static ResultType okay(const T& okay) + { + return ResultType{okay}; + } + + static ResultType okay(T&& okay) + { + return ResultType{std::move(okay)}; + } + + static ResultType error(const E& error) + { + return ResultType{error}; + } + + static ResultType error(E&& error) + { + return ResultType{std::move(error)}; + } + + [[nodiscard]] constexpr bool isOkay() const + { + return(m_value.index() == 0); + } + + [[nodiscard]] constexpr bool isError() const + { + return(m_value.index() == 1); + } + + T getOkay() + { + return std::get<T>(m_value); + } + + E getError() + { + return std::get<E>(m_value); + } + +private: + explicit Result() : m_value() {} + explicit Result(const T& value) : m_value(value) {} + explicit Result(const E& value) : m_value(value) {} + + explicit Result(T&& value) : m_value(std::move(value)) {} + explicit Result(E&& value) : m_value(std::move(value)) {} + + std::variant<T, E> m_value; +}; + +} // namespace bookmouse + +#endif diff --git a/include/status.hpp b/include/status.hpp new file mode 100644 index 0000000..2bba4b3 --- /dev/null +++ b/include/status.hpp @@ -0,0 +1,106 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STATUS_HPP +#define STATUS_HPP + +namespace fud { + +enum class [[nodiscard]] FudStatus +{ + Success = 0, + NullPointer, + StringInvalid, + OperationInvalid, + AllocFailure, + InvalidInput, + Utf8Invalid, + Failure, + NotFound, + Aliased, + Empty, + Partial, + Full, + RangeError, + VariantInvalid, + NotImplemented, + NotSupported +}; + +static inline const char* ExtStatusToString(FudStatus status) +{ + switch (status) { + case FudStatus::Success: + return "ExtSuccess"; + case FudStatus::NullPointer: + return "ExtNullPointer"; + case FudStatus::StringInvalid: + return "ExtStringInvalid"; + case FudStatus::OperationInvalid: + return "ExtOperationInvalid"; + case FudStatus::AllocFailure: + return "ExtAllocFailure"; + case FudStatus::InvalidInput: + return "ExtInvalidInput"; + case FudStatus::Utf8Invalid: + return "ExtUtf8Invalid"; + case FudStatus::Failure: + return "ExtFailure"; + case FudStatus::NotFound: + return "ExtNotFound"; + case FudStatus::Aliased: + return "ExtAliased"; + case FudStatus::Empty: + return "ExtEmpty"; + case FudStatus::Partial: + return "ExtPartial"; + case FudStatus::Full: + return "ExtFull"; + case FudStatus::RangeError: + return "ExtRangeError"; + case FudStatus::VariantInvalid: + return "ExtVariantInvalid"; + case FudStatus::NotImplemented: + return "ExtNotImplemented"; + case FudStatus::NotSupported: + return "ExtNotSupported"; + default: + return "Unknown"; + } +} + +static inline bool anyAreNull() { return false; } + +template <typename T> +bool anyAreNull(const T* pointer) +{ + return pointer == nullptr; +} + +template <typename T, typename... Ts> +bool anyAreNull(T pointer, Ts... pointers) +{ + if (pointer == nullptr) + { + return true; + } + return anyAreNull(pointers...); +} + +} // namespace ext_lib + +#endif diff --git a/include/string.hpp b/include/string.hpp new file mode 100644 index 0000000..89aa94e --- /dev/null +++ b/include/string.hpp @@ -0,0 +1,153 @@ +/* + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_STRING_HPP +#define FUD_STRING_HPP + +#include "utf8.hpp" + +#include <climits> +#include <cstddef> + +static_assert(CHAR_BIT == 8); + +namespace fud { + +class String { + public: + [[nodiscard]] constexpr size_t length() const + { + return m_length; + } + + [[nodiscard]] constexpr size_t size() const + { + return m_length + 1; + } + + [[nodiscard]] constexpr size_t capacity() const + { + return m_capacity; + } + + [[nodiscard]] constexpr utf8* data() const + { + return m_data; + } + + [[nodiscard]] bool nullTerminated() const; + + [[nodiscard]] bool valid() const; + + [[nodiscard]] bool utf8Valid() const; + + [[nodiscard]] FudStatus nullTerminate() const; + + [[nodiscard]] constexpr size_t remainingLength() const + { + if (m_length >= m_capacity) { + return 0; + } + + return m_capacity - 1U - m_length; + } + + [[nodiscard]] FudStatus pushBack(char letter); + + [[nodiscard]] FudStatus pushBack(utf8 letter); + + [[nodiscard]] FudStatus pushBack(const ExtUtf8& letter); + + std::optional<utf8> pop(); + + [[nodiscard]] FudStatus catenate(StringView source); + + private: + utf8* m_data; + size_t m_length; + size_t m_capacity; +}; + +class StringView { + public: + constexpr StringView() noexcept : m_length(0), m_data{nullptr} + { + } + + constexpr StringView(size_t strLen, const utf8* strData) : m_length(0), m_data{strData} + { + } + + StringView(size_t strLen, const char* strData) : + m_length(0), // line break + m_data{reinterpret_cast<const utf8*>(strData)} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + { + } + + explicit constexpr StringView(const StringView& view) noexcept = default; + + explicit constexpr StringView(const String& fudString) noexcept : StringView(fudString.length(), fudString.data()) + { + } + + [[nodiscard]] constexpr size_t length() const + { + return m_length; + } + + [[nodiscard]] constexpr const utf8* data() const + { + return m_data; + } + + [[nodiscard]] bool nullTerminated() const; + + [[nodiscard]] bool utf8Valid() const; + + Result<size_t, FudStatus> skipWhitespace(); + + Result<size_t, FudStatus> trimWhitespace(); + + FudStatus toUint8(uint8_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toUint16(uint16_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toUint32(uint32_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toUint64(uint64_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toInt8(int8_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toInt16(int16_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toInt32(int32_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toInt64(int64_t& number, uint8_t specifiedRadix, size_t& strLen) const; + + FudStatus toFloat(float& number, size_t& strLen) const; + + FudStatus toDouble(double& number, size_t& strLen) const; + + private: + size_t m_length; + const utf8* m_data; +}; + +FudStatus skipWhitespace(StringView& view, size_t& skipIndex); + +} // namespace fud + +#endif diff --git a/include/unique_array.hpp b/include/unique_array.hpp new file mode 100644 index 0000000..a7e0731 --- /dev/null +++ b/include/unique_array.hpp @@ -0,0 +1,68 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_UNIQUE_ARRAY_HPP +#define FUD_UNIQUE_ARRAY_HPP + +#include "array.hpp" +#include "fud_type_traits.hpp" + +#include <cstdlib> +#include <utility> + +namespace fud { + +template <class T, T... Vs> +class UniqueArray { // NOLINT(cppcoreguidelines-special-member-functions) + public: + // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) + const Array<T, sizeof...(Vs)> m_values{}; + // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) + + ~UniqueArray() = default; + + constexpr UniqueArray() : m_values{{Vs...}} + { + static_assert(sizeof...(Vs) < 2 || !hasDuplicates<T, Vs...>()); + } + + constexpr UniqueArray(const UniqueArray& rhs) : m_values(rhs.m_values) + { + static_assert(sizeof...(Vs) < 2 || !hasDuplicates<T, Vs...>()); + } + + constexpr UniqueArray(UniqueArray&& rhs) noexcept : m_values(std::move(rhs).m_values) + { + static_assert(sizeof...(Vs) < 2 || !hasDuplicates<T, Vs...>()); + } + + [[nodiscard]] constexpr size_t size() const + { + return sizeof...(Vs); + } +}; + +template <typename T=size_t, T... Is> +constexpr UniqueArray<T, Is...> makeStaticIndexSet(std::integer_sequence<T, Is...> /*unused*/) +{ + static_assert(sizeof...(Is) > 0); + return UniqueArray<T, Is...>{}; +} + +} // namespace fud + +#endif diff --git a/include/utf8.hpp b/include/utf8.hpp new file mode 100644 index 0000000..c66d93c --- /dev/null +++ b/include/utf8.hpp @@ -0,0 +1,557 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_UTF8_HPP +#define FUD_UTF8_HPP + +#include "array.hpp" +#include "memory.hpp" +#include "status.hpp" +#include "unique_array.hpp" + +/* +#include "ext_hash.hpp" +#include "ext_set.hpp" +*/ + +#include <cstdint> +#include <optional> +#include <type_traits> + +namespace fud { + +using utf8 = unsigned char; + +struct StringView; + +constexpr uint8_t ASCII_MASK = 0x7F; + +constexpr uint8_t UTF8_MB_PATTERN_MASK = 0xC0; +constexpr uint8_t UTF8_MB_PATTERN = 0x80; +constexpr uint8_t UTF8_MB_MASK = static_cast<uint8_t>(~UTF8_MB_PATTERN_MASK); + +constexpr uint8_t UTF8_2B_PATTERN_MASK = 0xE0; +constexpr uint8_t UTF8_2B_PATTERN = 0xC0; +constexpr uint8_t UTF8_2B_MASK = static_cast<uint8_t>(~UTF8_2B_PATTERN_MASK); + +constexpr uint8_t UTF8_3B_PATTERN_MASK = 0xF0; +constexpr uint8_t UTF8_3B_PATTERN = 0xE0; +constexpr uint8_t UTF8_3B_MASK = static_cast<uint8_t>(~UTF8_3B_PATTERN_MASK); + +constexpr uint8_t UTF8_4B_PATTERN_MASK = 0xF8; +constexpr uint8_t UTF8_4B_PATTERN = 0xF0; +constexpr uint8_t UTF8_4B_MASK = static_cast<uint8_t>(~UTF8_4B_PATTERN_MASK); + +namespace privateImpl { +constexpr bool validUtf8MB(uint8_t code) noexcept +{ + return (code & UTF8_MB_PATTERN_MASK) == UTF8_MB_PATTERN; +} +} // namespace privateImpl + +struct Ascii { + Array<uint8_t, 1> characters; + + constexpr Ascii() noexcept = default; + + constexpr explicit Ascii(uint8_t chr) noexcept : characters{{chr}} + { + } + + [[nodiscard]] constexpr uint8_t character() const noexcept + { + return characters[0]; + } + + [[nodiscard]] constexpr char asChar() const noexcept + { + return static_cast<char>(characters[0]); + } + + static constexpr size_t size() noexcept + { + return 1; + } + + [[nodiscard]] constexpr bool valid() const noexcept + { + return valid(characters[0]); + } + + static constexpr bool valid(uint8_t character) noexcept + { + return static_cast<uint8_t>(character & ~ASCII_MASK) == 0; + } + + auto operator<=>(const Ascii& other) const noexcept = default; +}; + +static_assert(std::is_trivial_v<Ascii>); +static_assert(std::is_standard_layout_v<Ascii>); + +/* +| B | E | Byte 1 | Byte 2 | Byte 3 | Byte 4 +| U+0000 | U+007F | 0xxxxxxx | | | +| U+0080 | U+07FF | 110xxxxx | 10xxxxxx | | +| U+0800 | U+FFFF | 1110xxxx | 10xxxxxx | 10xxxxxx | +| U+10000 | U+10FFFF | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx +*/ + +struct Utf82Byte { + constexpr Utf82Byte(uint8_t first, uint8_t second) noexcept : characters{{first, second}} + { + } + Array<uint8_t, 2> characters; + static constexpr size_t size() noexcept + { + return 2; + } + + [[nodiscard]] constexpr bool valid() const noexcept + { + return valid(first(), second()); + } + + static constexpr bool valid(uint8_t first, uint8_t second) noexcept + { + using privateImpl::validUtf8MB; + return ((first & UTF8_2B_PATTERN_MASK) == UTF8_2B_PATTERN) && validUtf8MB(second); + } + + [[nodiscard]] constexpr uint8_t first() const noexcept + { + return characters[0]; + } + + [[nodiscard]] constexpr uint8_t second() const noexcept + { + return characters[1]; + } + + auto operator<=>(const Utf82Byte& other) const noexcept = default; +}; + +struct Utf83Byte { + constexpr Utf83Byte(uint8_t first, uint8_t second, uint8_t third) noexcept : characters{{first, second, third}} + { + } + + Array<uint8_t, 3> characters; + + static constexpr size_t size() noexcept + { + return 3; + } + + [[nodiscard]] constexpr bool valid() const noexcept + { + return valid(first(), second(), third()); + } + + static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third) noexcept + { + using privateImpl::validUtf8MB; + return ((first & UTF8_3B_PATTERN_MASK) == UTF8_3B_PATTERN) && validUtf8MB(second) && validUtf8MB(third); + } + + [[nodiscard]] constexpr uint8_t first() const noexcept + { + return characters[0]; + } + + [[nodiscard]] constexpr uint8_t second() const noexcept + { + return characters[1]; + } + + [[nodiscard]] constexpr uint8_t third() const noexcept + { + return characters[2]; + } + + auto operator<=>(const Utf83Byte& other) const noexcept = default; +}; + +struct Utf84Byte { + constexpr Utf84Byte(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept : + characters{{first, second, third, fourth}} + { + } + + Array<uint8_t, 4> characters; + + static constexpr size_t size() noexcept + { + return 4; + } + + [[nodiscard]] constexpr bool valid() const noexcept + { + return valid(first(), second(), third(), fourth()); + } + + static constexpr bool valid(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) noexcept + { + using privateImpl::validUtf8MB; + if ((first & UTF8_4B_PATTERN_MASK) != UTF8_4B_PATTERN) { + return false; + } + return validUtf8MB(second) && validUtf8MB(third) && validUtf8MB(fourth); + } + + [[nodiscard]] constexpr uint8_t first() const noexcept + { + return characters[0]; + } + + [[nodiscard]] constexpr uint8_t second() const noexcept + { + return characters[1]; + } + + [[nodiscard]] constexpr uint8_t third() const noexcept + { + return characters[2]; + } + + [[nodiscard]] constexpr uint8_t fourth() const noexcept + { + return characters[3]; + } + + auto operator<=>(const Utf84Byte& other) const noexcept = default; +}; + +using Utf8Variant = std::variant<Ascii, Utf82Byte, Utf83Byte, Utf84Byte>; + +constexpr auto ExtUtf8TypeSet{UniqueArray<size_t, 0, 1, 2, 3>{}}; +enum class ExtUtf8Type : uint8_t +{ + Ascii, + Utf82Byte, + Utf83Byte, + Utf84Byte, +}; +static_assert(ExtUtf8TypeSet.m_values[0] == static_cast<uint8_t>(ExtUtf8Type::Ascii)); +static_assert(ExtUtf8TypeSet.m_values[1] == static_cast<uint8_t>(ExtUtf8Type::Utf82Byte)); +static_assert(ExtUtf8TypeSet.m_values[2] == static_cast<uint8_t>(ExtUtf8Type::Utf83Byte)); +static_assert(ExtUtf8TypeSet.m_values[3] == static_cast<uint8_t>(ExtUtf8Type::Utf84Byte)); + +class String; +class StringView; + +struct ExtUtf8 { + Utf8Variant m_variant{Utf8Variant{Ascii{}}}; + + static constexpr Ascii invalidAsciiCode{Ascii{0xFF}}; + static ExtUtf8 fromString(const String& fudString, size_t index) noexcept; + static ExtUtf8 fromStringView(StringView&& fudView, size_t index) noexcept; + static ExtUtf8 fromStringView(const StringView& fudView, size_t index) noexcept; + + static constexpr ExtUtf8 makeUtf8(Array<utf8, 4>& data) + { + ExtUtf8 unicode{}; + if (Ascii::valid(data[0])) { + unicode.m_variant = Ascii{data[0]}; + } else if (Utf82Byte::valid(data[0], data[1])) { + unicode.m_variant = Utf82Byte{data[0], data[1]}; + } else if (Utf83Byte::valid(data[0], data[1], data[2])) { + unicode.m_variant = Utf83Byte{data[0], data[1], data[2]}; + } else if (Utf84Byte::valid(data[0], data[1], data[2], data[3])) { + unicode.m_variant = Utf84Byte{data[0], data[1], data[2], data[3]}; + } else { + unicode.m_variant = invalidAsciiCode; + } + return unicode; + } + + static constexpr ExtUtf8 makeUtf8(const Ascii& utf8Char) + { + ExtUtf8 unicode{{Utf8Variant{Ascii{}}}}; + if (utf8Char.valid()) { + unicode.m_variant = utf8Char; + } else { + unicode.m_variant = invalidAsciiCode; + } + return unicode; + } + + static constexpr ExtUtf8 invalidAscii() + { + ExtUtf8 utf8{}; + utf8.m_variant = Ascii{invalidAsciiCode}; + return utf8; + } + + [[nodiscard]] constexpr ExtUtf8Type getType() const + { + return static_cast<ExtUtf8Type>(m_variant.index()); + } + + [[nodiscard]] constexpr bool isAscii() const + { + return getType() == ExtUtf8Type::Ascii; + } + + [[nodiscard]] constexpr bool valid() const noexcept + { + switch (m_variant.index()) { + case static_cast<size_t>(ExtUtf8Type::Ascii): + return std::get<Ascii>(m_variant).valid(); + case static_cast<size_t>(ExtUtf8Type::Utf82Byte): + return std::get<Utf82Byte>(m_variant).valid(); + case static_cast<size_t>(ExtUtf8Type::Utf83Byte): + return std::get<Utf83Byte>(m_variant).valid(); + case static_cast<size_t>(ExtUtf8Type::Utf84Byte): + return std::get<Utf84Byte>(m_variant).valid(); + default: // unlikely + return false; + } + } + + [[nodiscard]] constexpr size_t size() const noexcept + { + if (!valid()) { + return 0; + } + switch (m_variant.index()) { + case static_cast<size_t>(ExtUtf8Type::Ascii): + return Ascii::size(); + case static_cast<size_t>(ExtUtf8Type::Utf82Byte): + return Utf82Byte::size(); + case static_cast<size_t>(ExtUtf8Type::Utf83Byte): + return Utf83Byte::size(); + case static_cast<size_t>(ExtUtf8Type::Utf84Byte): + return Utf84Byte::size(); + default: // unlikely + return 0; + } + } + + [[nodiscard]] constexpr const uint8_t* data() const noexcept + { + if (!valid()) { + return nullptr; + } + + switch (m_variant.index()) { + case static_cast<size_t>(ExtUtf8Type::Ascii): + return std::get<Ascii>(m_variant).characters.data(); + case static_cast<size_t>(ExtUtf8Type::Utf82Byte): + return std::get<Utf82Byte>(m_variant).characters.data(); + case static_cast<size_t>(ExtUtf8Type::Utf83Byte): + return std::get<Utf83Byte>(m_variant).characters.data(); + case static_cast<size_t>(ExtUtf8Type::Utf84Byte): + return std::get<Utf84Byte>(m_variant).characters.data(); + default: // unlikely + return nullptr; + } + } + + template <typename Func> + [[nodiscard]] bool transformAscii(Func&& transform) + { + if (isAscii()) { + std::forward<Func>(transform)(std::get<Ascii>(m_variant)); + return true; + } + return false; + } + + [[nodiscard]] constexpr int64_t hash() const noexcept + { + using fud::ExtUtf8Type; + using fud::Utf82Byte; + using fud::Utf83Byte; + using fud::Utf84Byte; + + if (!valid()) { + return -1; + } + + constexpr uint8_t OneByteShift = 8; + constexpr uint8_t TwoByteShift = 2 * OneByteShift; + constexpr uint8_t ThreeByteShift = 3 * OneByteShift; + + switch (static_cast<ExtUtf8Type>(m_variant.index())) { + case ExtUtf8Type::Ascii: + return std::get<Ascii>(m_variant).characters[0]; + case ExtUtf8Type::Utf82Byte: + return static_cast<int64_t>(std::get<Utf82Byte>(m_variant).characters[0]) << OneByteShift | + static_cast<int64_t>(std::get<Utf82Byte>(m_variant).characters[1]); + case ExtUtf8Type::Utf83Byte: + return static_cast<int64_t>(std::get<Utf83Byte>(m_variant).characters[0]) << TwoByteShift | + static_cast<int64_t>(std::get<Utf83Byte>(m_variant).characters[1]) << OneByteShift | + static_cast<int64_t>(std::get<Utf83Byte>(m_variant).characters[2]); + case ExtUtf8Type::Utf84Byte: + return static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[0]) << ThreeByteShift | + static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[1]) << TwoByteShift | + static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[2]) << OneByteShift | + static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[3]); + default: // unlikely + return -1; + } + } + + constexpr bool operator==(const ExtUtf8& other) const noexcept = default; + + constexpr auto operator<=>(const ExtUtf8& other) const noexcept + { + auto hasSameAlternative = []<typename T>(const ExtUtf8& lhs, const ExtUtf8& rhs) noexcept { + return std::holds_alternative<T>(lhs.m_variant) && std::holds_alternative<T>(rhs.m_variant); + }; + + auto getSameAlternative = []<typename T>(const ExtUtf8& lhs, const ExtUtf8& rhs) noexcept { + return std::get<T>(lhs.m_variant).operator<=>(std::get<T>(rhs.m_variant)); + }; + + if (hasSameAlternative.template operator()<Ascii>(*this, other)) { + return getSameAlternative.template operator()<Ascii>(*this, other); + } + + if (hasSameAlternative.template operator()<Utf82Byte>(*this, other)) { + return getSameAlternative.template operator()<Utf82Byte>(*this, other); + } + + if (hasSameAlternative.template operator()<Utf83Byte>(*this, other)) { + return getSameAlternative.template operator()<Utf83Byte>(*this, other); + } + + if (hasSameAlternative.template operator()<Utf84Byte>(*this, other)) { + return getSameAlternative.template operator()<Utf84Byte>(*this, other); + } + + if (std::holds_alternative<Ascii>(m_variant)) { + return std::strong_ordering::less; + } + + if (std::holds_alternative<Ascii>(other.m_variant)) { + return std::strong_ordering::greater; + } + + if (std::holds_alternative<Utf82Byte>(m_variant)) { + return std::strong_ordering::less; + } + + if (std::holds_alternative<Utf82Byte>(other.m_variant)) { + return std::strong_ordering::greater; + } + + if (std::holds_alternative<Utf83Byte>(m_variant)) { + return std::strong_ordering::less; + } + + return std::strong_ordering::greater; + } + + std::optional<Ascii> getAscii() const + { + if (m_variant.index() == static_cast<size_t>(ExtUtf8Type::Ascii)) { + return std::get<Ascii>(m_variant); + } + return std::nullopt; + } +}; + +/** \brief Checks if a character is ascii. */ +bool ext_lib_char_is_ascii(char character); + +FudStatus ext_lib_utf8_is_ascii(ExtUtf8& character, bool& isAscii); + +/** \brief Checks if a character is alphanumeric. */ +bool ext_lib_char_is_alphanumeric(char character); + +/** \brief Checks if a character is alphanumeric. */ +FudStatus ext_lib_utf8_is_alphanumeric(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is alphabetic. */ +bool ext_lib_char_is_alpha(char character); + +/** \brief Checks if a character is alphabetic. */ +FudStatus ext_lib_utf8_is_alpha(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is lowercase. */ +bool ext_lib_char_is_lowercase(char character); + +/** \brief Checks if a character is lowercase. */ +FudStatus ext_lib_utf8_is_lowercase(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is an uppercase character. */ +bool ext_lib_char_is_uppercase(char character); + +/** \brief Checks if a character is uppercase. */ +FudStatus ext_lib_utf8_is_uppercase(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a digit. */ +bool ext_lib_char_is_digit(char character); + +/** \brief Checks if a character is a digit. */ +FudStatus ext_lib_utf8_is_digit(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a hexadecimal character. */ +bool ext_lib_char_is_hex_digit(char character); + +/** \brief Checks if a character is a hexadecimal digit. */ +FudStatus ext_lib_utf8_is_hex_digit(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a control character. */ +bool ext_lib_char_is_control(char character); + +/** \brief Checks if a character is a control character. */ +FudStatus ext_lib_utf8_is_control(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a graphical character. */ +bool ext_lib_char_is_graphical(char character); + +/** \brief Checks if a character is a graphical character. */ +FudStatus ext_lib_utf8_is_graphical(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a space character. */ +bool ext_lib_char_is_space(char character); + +/** \brief Checks if a character is a space character. */ +FudStatus ext_lib_utf8_is_space(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a blank character. */ +bool ext_lib_char_is_blank(char character); + +/** \brief Checks if a character is a blank character. */ +FudStatus ext_lib_utf8_is_blank(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a printable character. */ +bool ext_lib_char_is_printable(char character); + +/** \brief Checks if a character is a printable character. */ +FudStatus ext_lib_utf8_is_printable(ExtUtf8* character, bool* pred); + +/** \brief Checks if a character is a punctuation character. */ +bool ext_lib_char_is_punctuation(char character); + +/** \brief Checks if a character is a punctuation character. */ +FudStatus ext_lib_utf8_is_punctuation(ExtUtf8* character, bool* pred); + +uint8_t ext_lib_char_to_lower(uint8_t character); + +ExtUtf8* ext_lib_utf8_to_lower(ExtUtf8* character); + +uint8_t ext_lib_char_to_upper(uint8_t character); + +ExtUtf8* ext_lib_utf8_to_upper(ExtUtf8* character); + +} // namespace fud + +#endif diff --git a/include/utf8_iterator.hpp b/include/utf8_iterator.hpp new file mode 100644 index 0000000..1f9674b --- /dev/null +++ b/include/utf8_iterator.hpp @@ -0,0 +1,39 @@ +#ifndef FUD_UTF8_ITERATOR_HPP +#define FUD_UTF8_ITERATOR_HPP + +#include "string.hpp" +#include "utf8.hpp" + +#include <cstddef> +#include <optional> + +namespace fud { + +class Utf8Iterator { + private: + size_t m_index{0}; + // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) + const StringView m_view; + // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) + + public: + explicit constexpr Utf8Iterator(const String& extString) : m_view{extString} + { + } + + explicit constexpr Utf8Iterator(const StringView& view) : m_view{view} + { + } + + constexpr void reset() + { + m_index = 0; + } + + [[nodiscard]] std::optional<ExtUtf8> peek() const; + std::optional<ExtUtf8> next(); +}; + +} // namespace fud + +#endif |