From 7da829d48f9059c83ab9cada2c850621e8bbd3f3 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Sun, 22 Sep 2024 12:41:28 -0500 Subject: Basics of library. --- source/c_file.cpp | 60 -- source/fud_c_file.cpp | 60 ++ source/fud_memory.cpp | 125 ++++ source/fud_string.cpp | 1413 ++++++++++++++++++++++++++++++++++++++++++ source/fud_utf8.cpp | 343 ++++++++++ source/fud_utf8_iterator.cpp | 55 ++ source/libfud.cpp | 4 +- source/memory.cpp | 127 ---- source/string.cpp | 19 - source/utf8.cpp | 343 ---------- source/utf8_iterator.cpp | 38 -- 11 files changed, 1999 insertions(+), 588 deletions(-) delete mode 100644 source/c_file.cpp create mode 100644 source/fud_c_file.cpp create mode 100644 source/fud_memory.cpp create mode 100644 source/fud_string.cpp create mode 100644 source/fud_utf8.cpp create mode 100644 source/fud_utf8_iterator.cpp delete mode 100644 source/memory.cpp delete mode 100644 source/string.cpp delete mode 100644 source/utf8.cpp delete mode 100644 source/utf8_iterator.cpp (limited to 'source') diff --git a/source/c_file.cpp b/source/c_file.cpp deleted file mode 100644 index f64e024..0000000 --- a/source/c_file.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * LibFud - * Copyright 2024 Dominick Allen - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "c_file.hpp" - -namespace fud { - -CBinaryFile::CBinaryFile(const std::string& filename, CFileMode mode) - : m_filename{filename}, - m_mode{CBinaryFileModeFromFlags(mode)}, - m_modeFlags{mode} -{ -} - -CBinaryFile::CBinaryFile(const std::string& filename, CFileMode mode, const std::string& extraFlags) - : m_filename{filename}, - m_extraFlags{extraFlags}, - m_mode{std::string(CBinaryFileModeFromFlags(mode) + extraFlags)}, - m_modeFlags{mode} -{ -} - -CBinaryFile::~CBinaryFile() { - close(); -} - -FileResult CBinaryFile::open() -{ - m_file = fopen(m_filename.c_str(), m_mode.c_str()); - return m_file != nullptr ? FileResult::Success : FileResult::Error; -} - -void CBinaryFile::close() -{ - if (m_file != nullptr) { - fclose(m_file); - m_file = nullptr; - } -} - -const FILE* CBinaryFile::file() const -{ - return m_file; -} - -} // namespace fud diff --git a/source/fud_c_file.cpp b/source/fud_c_file.cpp new file mode 100644 index 0000000..ff54d8e --- /dev/null +++ b/source/fud_c_file.cpp @@ -0,0 +1,60 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_c_file.hpp" + +namespace fud { + +CBinaryFile::CBinaryFile(const String& filename, CFileMode mode) + : m_filename{filename}, + m_mode{CBinaryFileModeFromFlags(mode)}, + m_modeFlags{mode} +{ +} + +CBinaryFile::CBinaryFile(const String& filename, CFileMode mode, const String& extraFlags) + : m_filename{filename}, + m_extraFlags{extraFlags}, + m_mode{String(CBinaryFileModeFromFlags(mode)).append(extraFlags)}, + m_modeFlags{mode} +{ +} + +CBinaryFile::~CBinaryFile() { + close(); +} + +FileResult CBinaryFile::open() +{ + m_file = fopen(m_filename.c_str(), m_mode.c_str()); + return m_file != nullptr ? FileResult::Success : FileResult::Error; +} + +void CBinaryFile::close() +{ + if (m_file != nullptr) { + fclose(m_file); + m_file = nullptr; + } +} + +const FILE* CBinaryFile::file() const +{ + return m_file; +} + +} // namespace fud diff --git a/source/fud_memory.cpp b/source/fud_memory.cpp new file mode 100644 index 0000000..fe6dfae --- /dev/null +++ b/source/fud_memory.cpp @@ -0,0 +1,125 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_memory.hpp" + +namespace fud { + +FudStatus copyMem(void* destination, size_t destination_size, const void* source, size_t count) +{ + if (anyAreNull(destination, source)) { + return FudStatus::NullPointer; + } + + if (destination_size < count) { + return FudStatus::InvalidInput; + } + + auto* destPtr = static_cast(destination); + const auto* sourcePtr = static_cast(source); + for (decltype(destination_size) idx = 0; idx < count; ++idx) { + destPtr[idx] = sourcePtr[idx]; + } + + return FudStatus::Success; +} + +FudStatus compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count, int* difference) +{ + if (anyAreNull(lhs, rhs, difference)) { + return FudStatus::NullPointer; + } + + if (destination_size < count) { + return FudStatus::InvalidInput; + } + + int localDifference = 0; + // NOLINTBEGIN(readability-magic-numbers) + for (decltype(destination_size) idx = 0; idx < count; idx++) { + localDifference = static_cast(lhs)[idx] - static_cast(rhs)[idx]; + if (localDifference != 0) { + *difference = localDifference; + return FudStatus::Success; + } + } + *difference = localDifference; + + return FudStatus::Success; +} + +Result compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count) +{ + int difference = 0; + auto status = compareMem(lhs, destination_size, rhs, count, &difference); + if (status != FudStatus::Success) + { + return Result::error(status); + } + + return Result::okay(difference); +} + +FudStatus setMemory(void* data, size_t dataSize, uint8_t pattern, size_t count) +{ + if (data == nullptr) + { + return FudStatus::NullPointer; + } + + if (count > dataSize) + { + return FudStatus::InvalidInput; + } + + for (size_t idx = 0; idx < count; ++idx) + { + static_cast(data)[idx] = pattern; + } + + return FudStatus::Success; +} + +FudStatus setMemory( + void* data, + size_t collectionCount, + size_t eltOffset, + size_t eltSize, + uint8_t pattern, + size_t eltCount) +{ + if (eltOffset >= collectionCount) + { + return FudStatus::InvalidInput; + } + + if (eltOffset + eltCount > collectionCount) + { + return FudStatus::InvalidInput; + } + + auto dataSize = collectionCount * eltSize; + auto byteOffset = eltOffset * eltSize; + auto byteCount = eltCount * eltSize; + + auto remainingSize = dataSize - byteOffset; + + auto* offsetData = static_cast(data) + byteOffset; + return setMemory(offsetData, remainingSize, pattern, byteCount); +} + +} // namespace fud diff --git a/source/fud_string.cpp b/source/fud_string.cpp new file mode 100644 index 0000000..d690aa9 --- /dev/null +++ b/source/fud_string.cpp @@ -0,0 +1,1413 @@ +/* + * LibFud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_string.hpp" + +#include + +namespace fud { + +ssize_t cStringLength(const char* str) +{ + constexpr auto maxLength = SSIZE_MAX - 1; + return cStringLength(str, maxLength); +} + +ssize_t cStringLength(const char* str, size_t maxLength) +{ + if (str == nullptr || maxLength > (SSIZE_MAX - 1)) { + return -1; + } + + ssize_t size = 0; + + while (str[size] != 0 && static_cast(size) < maxLength) { + size++; + } + + if (str[size] != 0 && static_cast(size) == maxLength) { + return static_cast(maxLength) + 1; + } + + return size; +} + +String::String(const utf8* cString) : String(reinterpret_cast(cString)) +{ +} + +String::String(const char* cString) +{ + auto lenResult = cStringLength(cString); + if (lenResult < 0 || lenResult >= SSIZE_MAX) { + m_length = 1; + m_capacity = 0; + } else if (lenResult < SSO_BUF_SIZE) { + m_length = static_cast(lenResult); + assert(copyMem(m_buffer.data(), m_buffer.size(), cString, m_length)); + } else { + m_length = static_cast(lenResult); + m_capacity = m_length + 1; + m_data = static_cast(fudAlloc(m_capacity)); + assert(m_data != nullptr); + assert(copyMem(m_buffer.data(), m_capacity, cString, m_length) == FudStatus::Success); + assert(nullTerminate() == FudStatus::Success); + } +} + +String::String(const String& rhs) : m_length{rhs.m_length}, m_capacity{rhs.m_capacity} +{ + if (rhs.valid()) { + if (isLarge()) { + m_data = static_cast(fudAlloc(m_capacity)); + assert(m_data != nullptr); + } + assert(copyMem(data(), m_capacity, rhs.data(), m_length) == FudStatus::Success); + assert(nullTerminate() == FudStatus::Success); + } +} + +String::String(String&& rhs) : m_length{rhs.m_length}, m_capacity{rhs.m_capacity} +{ + if (rhs.isLarge()) { + m_data = rhs.m_data; + rhs.m_data = nullptr; + } else { + assert(copyMem(m_buffer.data(), m_buffer.size(), rhs.m_buffer.data(), m_length)); + assert(nullTerminate() == FudStatus::Success); + } +} + +String::~String() +{ + if (isLarge() && m_data != nullptr) { + fudFree(m_data); + m_data = nullptr; + } +} + +String& String::operator=(const String& rhs) +{ + m_length = rhs.m_length; + m_capacity = rhs.m_capacity; + if (rhs.valid()) { + if (isLarge()) { + m_data = static_cast(fudAlloc(m_capacity)); + assert(m_data != nullptr); + } + assert(copyMem(data(), m_capacity, rhs.data(), m_length) == FudStatus::Success); + assert(nullTerminate() == FudStatus::Success); + } + return *this; +} + +String& String::operator=(String&& rhs) { + m_length = rhs.m_length; + m_capacity = rhs.m_capacity; + if (rhs.isLarge()) { + m_data = rhs.m_data; + rhs.m_data = nullptr; + } else { + assert(copyMem(m_buffer.data(), m_buffer.size(), rhs.m_buffer.data(), m_length)); + assert(nullTerminate() == FudStatus::Success); + } + return *this; +} + +bool String::nullTerminated() const +{ + return data() != nullptr && m_length < m_capacity && data()[m_length] == '\0'; +} + +bool String::valid() const +{ + return nullTerminated() && m_length < m_capacity; +} + +bool String::utf8Valid() const +{ + if (!valid()) { + return false; + } + + StringView view{*this}; + return view.utf8Valid(); +} + +FudStatus String::nullTerminate() const +{ + if (m_length < m_capacity) { + m_data[m_length] = '\0'; + return FudStatus::Success; + } + return FudStatus::StringInvalid; +} + +std::optional String::pop() +{ + if (m_length < 1) { + return std::nullopt; + } + m_length--; + auto letter = m_data[m_length]; + m_data[m_length] = '\0'; + return letter; +} + +FudStatus String::pushBack(char letter) +{ + return pushBack(static_cast(letter)); +} + +FudStatus String::pushBack(utf8 letter) +{ + if (!valid()) { + return FudStatus::StringInvalid; + } + + if (remainingLength() < 1) { + return FudStatus::OperationInvalid; + } + + m_data[m_length] = letter; + m_length++; + m_data[m_length] = '\0'; + + return FudStatus::Success; +} + +FudStatus String::pushBack(const FudUtf8& letter) +{ + if (!valid()) { + return FudStatus::StringInvalid; + } + + if (!letter.valid()) { + return FudStatus::InvalidInput; + } + + const auto* letterData = letter.data(); + if (letterData == nullptr) { + return FudStatus::InvalidInput; + } + + auto letterSize = letter.size(); + if (letterSize > remainingLength()) { + return FudStatus::OperationInvalid; + } + + auto copyStatus = copyMem(m_data + m_length, remainingLength(), letterData, letterSize); + + if (copyStatus != FudStatus::Success) { + return copyStatus; + } + + m_length += letterSize; + m_data[m_length] = '\0'; + + return FudStatus::Success; +} + +FudStatus String::catenate(StringView source) +{ + if (!valid()) { + return FudStatus::StringInvalid; + } + + if (source.data() == m_data) { + return FudStatus::Aliased; + } + + const auto newLength = m_length + source.length(); + const auto newSize = newLength + 1; + if (newSize >= m_capacity) { + return FudStatus::OperationInvalid; + } + + auto* destPtr = m_data + m_length; + auto status = copyMem(destPtr, m_capacity, source.data(), source.length()); + if (status == FudStatus::Success) { // likely + m_length += source.length(); + status = nullTerminate(); + } + + return status; +} + +String String::append(const String& rhs) const +{ + String output{}; + output.m_length = 1; + output.m_capacity = 0; + + if (!valid()) { + return output; + } + + output.m_length = m_length + rhs.length(); + output.m_capacity = output.m_length + 1; + if (output.isLarge()) { + output.m_data = static_cast(fudAlloc(output.m_capacity)); + } + + auto* destPtr = output.data(); + auto status = copyMem(destPtr, m_capacity, rhs.data(), rhs.length()); + assert(output.nullTerminate() == FudStatus::Success); + + return output; +} + +bool StringView::nullTerminated() const +{ + return m_data != nullptr && m_data[m_length] == '\0'; +} + +bool StringView::utf8Valid() const +{ + if (m_data == nullptr) { + return false; + } + + for (size_t idx = 0; idx < m_length;) { + if (Ascii::valid(m_data[idx])) { + idx++; + } else if (idx + 1 < m_length && Utf82Byte::valid(m_data[idx], m_data[idx + 1])) { + idx += 2; + } else if (idx + 2 < m_length && Utf83Byte::valid(m_data[idx], m_data[idx + 1], m_data[idx + 2])) { + idx += 3; + } else if ( + idx + 3 < m_length && Utf84Byte::valid(m_data[idx], m_data[idx + 1], m_data[idx + 2], m_data[idx + 3])) { + idx += 4; + } else { + return false; + } + } + + return true; +} + +Result StringView::skipWhitespace() +{ + using RetType = Result; + if (m_data == nullptr) { + return RetType::error(FudStatus::NullPointer); + } + size_t index = 0; + while (m_length > 0 && char_is_space(static_cast(m_data[0]))) { + m_data++; + m_length--; + index++; + } + + return RetType::okay(index); +} + +Result StringView::trimWhitespace() +{ + using RetType = Result; + if (m_data == nullptr) { + return RetType::error(FudStatus::NullPointer); + } + + size_t count = 0; + while (m_length > 0 && char_is_space(static_cast(m_data[m_length - 1]))) { + m_length--; + count++; + } + + return RetType::okay(count); +} + +#if 0 +FudStatus ext_string_copy(ExtBasicString* destination, const ExtBasicString* source) +{ + if (anyAreNull(source, destination) || destination->m_data == nullptr) { + return FudStatus::NullPointer; + } + + /* TODO: ensure that destination and source aren't aliased, over the entire length. */ + if (source == destination || source->m_data == destination->m_data) { + return ExtAliased; + } + + if (!String{*source}.valid()) { + return FudStatus::StringInvalid; + } + + if (destination->m_size <= source->m_length) { + return FudStatus::OperationInvalid; + } + + auto status = ExtCopyMem(destination->m_data, destination->m_size, source->m_data, source->m_length); + if (status == FudStatus::Success) { + destination->m_length = source->m_length; + status = StringBorrow{*destination}.nullTerminate(); + } + + return status; +} + +FudStatus ext_string_catenate(ExtBasicString* destination, StringView source) +{ + if (destination == nullptr || anyAreNull(destination->m_data, source.data)) { + return FudStatus::NullPointer; + } + + return StringBorrow{*destination}.catenate(source); +} + +FudStatus ext_string_truncate(ExtBasicString* source, ssize_t newLength) +{ + if (source == nullptr) { + return FudStatus::NullPointer; + } + StringBorrow wrapper{*source}; + if (!wrapper.valid()) { + return FudStatus::StringInvalid; + } + + if ((newLength > 0 && static_cast(newLength) > source->m_length) || + (static_cast(-newLength) > source->m_length)) { + return FudStatus::InvalidInput; + } + + if (newLength < 0) { + source->m_length = source->m_length - static_cast(-newLength); + } else { + source->m_length = static_cast(newLength); + } + + return wrapper.nullTerminate(); +} + +FudStatus ext_string_reverse(ExtBasicString* source) +{ + if (source == nullptr || source->m_data == nullptr) { + return FudStatus::NullPointer; + } + return ext_string_reverse_substring(source, StringView{source->m_length, source->m_data}); +} + +FudStatus ext_string_reverse_substring(ExtBasicString* source, StringView subString) +{ + auto dataOffset = subString.data - source->m_data; + if (dataOffset < 0 || static_cast(dataOffset) > source->m_length) { + return FudStatus::InvalidInput; + } + if (static_cast(dataOffset) + subString.length > source->m_length) { + return FudStatus::InvalidInput; + } + + if (source == nullptr || source->m_data == nullptr) { + return FudStatus::NullPointer; + } + + StringView view{subString}; + + size_t index = 0; + auto* data = source->m_data + dataOffset; + while (index < subString.length) { + if (ext_lib_char_is_ascii(static_cast(data[index]))) { + index++; + continue; + } + auto utf8 = FudUtf8::fromStringView(view, index); + if (!utf8.valid()) { + return ExtUtf8Invalid; + } + const auto* utf8Data = utf8.data(); + if (utf8Data == nullptr) { + return ExtFailure; + } + auto utf8Size = utf8.size(); + switch (utf8Size) { + case 2: + data[index] = utf8Data[1]; + data[index + 1] = utf8Data[0]; + break; + case 3: + data[index] = utf8Data[2]; + data[index + 2] = utf8Data[0]; + break; + case 4: + data[index] = utf8Data[3]; + data[index + 1] = utf8Data[2]; + data[index + 2] = utf8Data[1]; + data[index + 3] = utf8Data[0]; + break; + default: + return ExtFailure; + } + index += utf8Size; + } + + ext_lib::DataView dataView{subString.length, data}; + reverse(dataView); + + return FudStatus::Success; +} + +FudStatus ext_string_compare(StringView levo, StringView dextro, int* difference) +{ + if (anyAreNull(difference, levo.data, dextro.data)) { + return FudStatus::NullPointer; + } + + int diff = 0; + size_t index = 0; + while (diff == 0 && index < levo.length && index < dextro.length) { + diff = levo.data[index] - dextro.data[index]; + index++; + } + + if (diff != 0 || levo.length == dextro.length) { + /* nothing to do */ + } else if (levo.length > dextro.length) { + diff = static_cast(levo.data[index]); + } else { + diff = -static_cast(dextro.data[index]); + } + + *difference = diff; + return FudStatus::Success; +} + +FudStatus ext_string_chr(StringView extStringView, char character, size_t* index) +{ + if (anyAreNull(extStringView.data, index)) { + return FudStatus::NullPointer; + } + + bool found = false; + for (size_t localIndex = 0; localIndex < extStringView.length; ++localIndex) { + if (extStringView.data[localIndex] == static_cast(character)) { + *index = localIndex; + found = true; + break; + } + } + + if (found) { + return FudStatus::Success; + } + + return ExtNotFound; +} + +FudStatus ext_string_unicode_chr(StringView extString, const ExtUtf8* unicode, size_t* index) +{ + if (anyAreNull(extString.data, unicode, index)) { + return FudStatus::NullPointer; + } + + if (!unicode->valid()) { + return ExtUtf8Invalid; + } + + size_t charSize = unicode->size(); + ExtDebugAssert(charSize != 0); + const uint8_t* dataMem = unicode->data(); + ExtDebugAssert(dataMem != nullptr); + + std::array localData{}; + auto copyStatus = ExtCopyMem(localData.data(), localData.size(), dataMem, charSize); + ExtDebugAssert(copyStatus == FudStatus::Success); + + for (size_t sIdx = 0; sIdx + charSize - 1 < extString.length;) { + + auto localChar = FudUtf8::fromStringView(extString, sIdx); + + if (!localChar.valid()) { + return ExtUtf8Invalid; + } + + if (localChar.m_variant == unicode->m_variant) { + *index = sIdx; + return FudStatus::Success; + } + + sIdx += localChar.size(); + } + + return ExtNotFound; +} + +namespace ext_lib { + +FudStatus ext_string_span_c_api( + const StringView& inputView, + const StringView& characterSetString, + StringView& result, + bool inSet) +{ + size_t firstIndex = inputView.length; + + size_t sIdx = 0; + while (sIdx < firstIndex) { + auto stringChar = FudUtf8::fromStringView(inputView, sIdx); + if (!stringChar.valid()) { + return ExtUtf8Invalid; + } + + size_t cIdx = 0; + bool found = false; + while (firstIndex > 0 && cIdx < firstIndex && cIdx < characterSetString.length) { + auto setChar = FudUtf8::fromStringView(characterSetString, cIdx); + if (!setChar.valid()) { + return ExtUtf8Invalid; + } + + if (stringChar == setChar) { + found = true; + } + + cIdx += setChar.size(); + } + + if (!inSet && found) { + firstIndex = sIdx; + } else if (inSet && !found) { + if (sIdx > 0) { + firstIndex = sIdx; + } + break; + } + + sIdx += stringChar.size(); + } + + if (firstIndex < inputView.length) { + result.length = inputView.length - firstIndex; + result.data = inputView.data + firstIndex; + return FudStatus::Success; + } + + return ExtNotFound; +} + +FudStatus ext_string_span_set(StringView inputView, const ExtUtf8Set* characterSet, StringView* stringView, bool inSet) +{ + if (anyAreNull(inputView.data, characterSet, stringView)) { + return FudStatus::NullPointer; + } + + if (!characterSet->valid()) { + return ExtUtf8Invalid; + } + + size_t firstIndex = inputView.length; + size_t sIdx = 0; + while (sIdx < firstIndex) { + auto localChar = FudUtf8::fromStringView(inputView, sIdx); + if (!localChar.valid()) { + return ExtUtf8Invalid; + } + + bool found = characterSet->contains(localChar); + + if (!inSet && found) { + firstIndex = sIdx; + } else if (inSet && !found) { + if (sIdx > 0) { + firstIndex = sIdx; + } + break; + } + + sIdx += localChar.size(); + } + + if (firstIndex < inputView.length) { + stringView->length = inputView.length - firstIndex; + stringView->data = inputView.data + firstIndex; + return FudStatus::Success; + } + + return ExtNotFound; +} + +} // namespace ext_lib + +FudStatus ext_string_span(StringView extString, StringView characterSetString, StringView* result) +{ + if (result == nullptr) { + return FudStatus::NullPointer; + } + + const StringView inputView{extString}; + const StringView characterSet{characterSetString}; + + return ext_string_span_c_api(inputView, characterSet, *result, true); +} + +FudStatus ext_string_c_span(StringView extString, StringView characterSetString, StringView* result) +{ + if (result == nullptr) { + return FudStatus::NullPointer; + } + + const StringView inputView{extString}; + const StringView characterSet{characterSetString}; + + return ext_string_span_c_api(inputView, characterSet, *result, false); +} + +FudStatus ext_string_span_set(StringView extString, const ExtUtf8Set* characterSet, StringView* stringView) +{ + return ext_lib::ext_string_span_set(extString, characterSet, stringView, true); +} + +FudStatus ext_string_c_span_set(StringView extString, const ExtUtf8Set* characterSet, StringView* stringView) +{ + return ext_lib::ext_string_span_set(extString, characterSet, stringView, false); +} + +FudStatus ext_string_find_substring(StringView haystack, StringView needle, StringView* stringView) +{ + if (anyAreNull(haystack.data, needle.data, stringView)) { + return FudStatus::NullPointer; + } + + if (needle.length > haystack.length) { + return ExtNotFound; + } + + if (needle.length == 1) { + size_t index = 0; + auto chrFindStatus = ext_string_chr(haystack, static_cast(needle.data[0]), &index); + if (chrFindStatus == FudStatus::Success) { + stringView->data = haystack.data + index; + stringView->length = 1; + } + return chrFindStatus; + } + + size_t haystackIdx = 0; + while (haystackIdx < haystack.length - needle.length) { + StringView lhs; + lhs.data = haystack.data + haystackIdx; + lhs.length = haystack.length - haystackIdx; + size_t lhsIndex = 0; + auto chrFindStatus = ext_string_chr(lhs, static_cast(needle.data[0]), &lhsIndex); + if (chrFindStatus != FudStatus::Success) { + return chrFindStatus; + } + haystackIdx += lhsIndex; + // GE or GT? + if (haystackIdx + needle.length >= haystack.length) { + break; + } + lhs.data = haystack.data + haystackIdx; + lhs.length = needle.length; + + int difference = -1; + auto cmpStatus = ext_string_compare(lhs, needle, &difference); + ExtDebugAssert(cmpStatus == FudStatus::Success); + if (difference == 0) { + stringView->data = lhs.data; + stringView->length = lhs.length; + return FudStatus::Success; + } + haystackIdx++; + } + + return ExtNotFound; +} + +namespace ext_lib { + +FudStatus skipWhitespace(StringView& view, size_t& skipIndex) +{ + auto skipResult = view.skipWhitespace(); + if (skipResult.isError()) { + return skipResult.getError(); + } + skipIndex = skipResult.getOkay(); + if (view.length < 1) { + return FudStatus::InvalidInput; + } + return FudStatus::Success; +} + +} // namespace ext_lib + +FudStatus ext_string_view_skip_whitespace(StringView* view) +{ + if (view == nullptr) { + return FudStatus::NullPointer; + } + + StringView sView{*view}; + auto skipResult = sView.skipWhitespace(); + if (skipResult.isError()) { + return skipResult.getError(); + } + view->data = sView.data; + view->length = sView.length; + return FudStatus::Success; +} + +FudStatus ext_string_view_trim_whitespace(StringView* view) +{ + if (view == nullptr) { + return FudStatus::NullPointer; + } + + StringView sView{*view}; + auto skipResult = sView.trimWhitespace(); + if (skipResult.isError()) { + return skipResult.getError(); + } + view->data = sView.data; + view->length = sView.length; + return FudStatus::Success; +} + +namespace impl { +constexpr ext_lib::Array AsciiLookup{ + {-1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + -2, -2, -2, -2, -2, -2, -2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, -2, -2, -2, -2, -2, -2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3}}; + +// NOLINTBEGIN(readability-magic-numbers) +static_assert(AsciiLookup[static_cast('0')] == 0); +static_assert(AsciiLookup[static_cast('9')] == 9); +static_assert(AsciiLookup[static_cast('a')] == 10); +static_assert(AsciiLookup[static_cast('A')] == 10); +static_assert(AsciiLookup[static_cast('f')] == 15); +static_assert(AsciiLookup[static_cast('F')] == 15); +static_assert(AsciiLookup[127] == -2); +static_assert(AsciiLookup[128] == -3); +static_assert(AsciiLookup[255] == -3); +// NOLINTEND(readability-magic-numbers) + +FudStatus determineRadix(StringView input, uint8_t& radix, size_t& index) +{ + if (input.length < 1) { + return FudStatus::InvalidInput; + } + + if (input.length == 1 && input.data[0] == '0') { + radix = ExtRadixOctal; + return FudStatus::Success; + } + + if (input.length == 1) { + radix = ExtRadixDecimal; + return FudStatus::Success; + } + + if (input.data[0] == '0' && (input.data[1] == 'x' || input.data[1] == 'X')) { + radix = ExtRadixHexadecimal; + index += 2; + return FudStatus::Success; + } + + if (input.data[0] == '0') { + auto nextChar = input.data[1]; + auto nextVal = AsciiLookup[nextChar]; + if (nextVal >= 0 && nextVal < ExtRadixOctal) { + radix = ExtRadixOctal; + return FudStatus::Success; + } + if (nextVal >= ExtRadixOctal) { + return FudStatus::InvalidInput; + } + } + + radix = ExtRadixDecimal; + return FudStatus::Success; +} + +FudStatus getRadix(StringView& view, uint8_t& radix, size_t& skipIndex) +{ + if (radix == 0) { + size_t radixIndex = 0; + auto status = determineRadix(view, radix, radixIndex); + if (status != FudStatus::Success) { + return status; + } + skipIndex += radixIndex; + view.data += radixIndex; + view.length -= radixIndex; + } else if (radix == ExtRadixHexadecimal && view.length > 2 && (view.data[1] == 'x' || view.data[1] == 'X')) { + skipIndex += 2; + view.data += 2; + view.length -= 2; + } + return FudStatus::Success; +} + +FudStatus checkNegative(StringView& view, bool& isNegative, size_t& skipIndex) +{ + isNegative = view.data[0] == '-'; + if (isNegative && view.length == 1) { + return FudStatus::InvalidInput; + } + if (isNegative) { + skipIndex += 1; + view.data++; + view.length--; + } + return FudStatus::Success; +} + +FudStatus checkPlusSigned(StringView& view, size_t& skipIndex) +{ + auto isPlusSigned = view.data[0] == '+'; + if (isPlusSigned && view.length == 1) { + return FudStatus::InvalidInput; + } + if (isPlusSigned) { + skipIndex += 1; + view.data++; + view.length--; + } + return FudStatus::Success; +} + +template +FudStatus stringViewToUnsignedInteger(StringView input, T& number, uint8_t specifiedRadix, size_t& index) +{ + if (input.data == nullptr) { + return FudStatus::NullPointer; + } + + if (specifiedRadix == 1 || specifiedRadix > ExtMaxRadix || input.length < 1) { + return FudStatus::InvalidInput; + } + + uint8_t radix = specifiedRadix; + + StringView view{input}; + size_t skipIndex = 0; + auto status = ext_lib::skipWhitespace(view, skipIndex); + if (status != FudStatus::Success) { + return status; + } + + status = checkPlusSigned(view, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::InvalidInput; + } + + status = getRadix(view, radix, skipIndex); + + T num = 0; + size_t digitIndex = 0; + + while (digitIndex < view.length) { + auto digitResult = impl::AsciiLookup[view.data[digitIndex]]; + if (digitResult >= radix || digitResult < 0) { + break; + } + + auto digit = static_cast(digitResult); + if (std::numeric_limits::max() / radix < num) { + return FudStatus::InvalidInput; + } + num *= radix; + if (std::numeric_limits::max() - digit < num) { + return FudStatus::InvalidInput; + } + num += digit; + digitIndex++; + } + if (digitIndex < 1) { + return FudStatus::InvalidInput; + } + + index = skipIndex + digitIndex; + number = num; + + return FudStatus::Success; +} + +template +FudStatus stringViewToUnsignedInteger(StringView input, T* number, uint8_t specifiedRadix, size_t* index) +{ + if (anyAreNull(input.data, number)) { + return FudStatus::NullPointer; + } + + size_t localIndex = 0; + + auto status = stringViewToUnsignedInteger(input, *number, specifiedRadix, localIndex); + if (status == FudStatus::Success && index != nullptr) { + *index = localIndex; + } + return status; +} + +template +FudStatus viewToSignedIntPositive(StringView view, uint8_t radix, size_t& digitIndex, T& num) +{ + digitIndex = 0; + while (digitIndex < view.length) { + int8_t digitResult = impl::AsciiLookup[view.data[digitIndex]]; + if (digitResult >= radix) { + return FudStatus::InvalidInput; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult); + if (std::numeric_limits::max() / radix < num) { + return FudStatus::InvalidInput; + } + num = static_cast(num * radix); + if (std::numeric_limits::max() - digit < num) { + return FudStatus::InvalidInput; + } + num = static_cast(num + digit); + digitIndex++; + } + + return FudStatus::Success; +} + +template +FudStatus viewToSignedIntNegative(StringView view, uint8_t radix, size_t& digitIndex, T& num) +{ + digitIndex = 0; + while (digitIndex < view.length) { + int8_t digitResult = impl::AsciiLookup[view.data[digitIndex]]; + if (digitResult >= radix) { + return FudStatus::InvalidInput; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult); + if ((std::numeric_limits::min() / radix > num)) { + return FudStatus::InvalidInput; + } + num = static_cast(num * radix); + if (std::numeric_limits::min() + digit > num) { + return FudStatus::InvalidInput; + } + num = static_cast(num - digit); + digitIndex++; + } + + return FudStatus::Success; +} + +template +FudStatus stringViewToSignedInteger(StringView input, T& number, uint8_t specifiedRadix, size_t& index) +{ + if (input.data == nullptr) { + return FudStatus::NullPointer; + } + + auto radix = specifiedRadix; + + StringView view{input}; + size_t skipIndex = 0; + auto status = ext_lib::skipWhitespace(view, skipIndex); + if (status != FudStatus::Success) { + return status; + } + + bool isNegative = false; + status = checkNegative(view, isNegative, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::InvalidInput; + } + + if (!isNegative) { + status = checkPlusSigned(view, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::InvalidInput; + } + } + + status = getRadix(view, radix, skipIndex); + + T num = 0; + size_t digitIndex = 0; + + if (isNegative) { + status = viewToSignedIntNegative(view, radix, digitIndex, num); + } else { + status = viewToSignedIntPositive(view, radix, digitIndex, num); + } + if (status != FudStatus::Success) { + return status; + } + + if (digitIndex < 1) { + return FudStatus::InvalidInput; + } + + index = skipIndex + digitIndex; + number = num; + return FudStatus::Success; +} + +template +FudStatus stringViewToSignedInteger(StringView input, T* number, uint8_t specifiedRadix, size_t* index) +{ + if (anyAreNull(input.data, number)) { + return FudStatus::NullPointer; + } + + if (specifiedRadix == 1 || specifiedRadix > ExtMaxRadix || input.length < 1) { + return FudStatus::InvalidInput; + } + + size_t localIndex = 0; + auto status = stringViewToSignedInteger(input, *number, specifiedRadix, localIndex); + if (status == FudStatus::Success && index != nullptr) { + *index = localIndex; + } + return status; +} + +} // namespace impl + +FudStatus ext_string_to_uint8(StringView input, uint8_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToUnsignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_uint16(StringView input, uint16_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToUnsignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_uint32(StringView input, uint32_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToUnsignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_uint64(StringView input, uint64_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToUnsignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_int8(StringView input, int8_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToSignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_int16(StringView input, int16_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToSignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_int32(StringView input, int32_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToSignedInteger(input, number, specifiedRadix, index); +} + +FudStatus ext_string_to_int64(StringView input, int64_t* number, uint8_t specifiedRadix, size_t* index) +{ + return impl::stringViewToSignedInteger(input, number, specifiedRadix, index); +} + +namespace impl { + +template +bool isNanOrInf(T& num, StringView& view, T& sign, size_t& digitIndex) +{ + if (view.length >= 3) { + std::array letters{{view.data[0], view.data[1], view.data[2]}}; + ext_lib::mapMut(letters, ext_lib_char_to_lower); + if (letters[0] == 'i' && letters[1] == 'n' && letters[2] == 'f') { + num = sign * std::numeric_limits::infinity(); + digitIndex = 3; + return true; + } + if (letters[0] == 'n' && letters[1] == 'a' && letters[2] == 'n') { + num = std::numeric_limits::quiet_NaN(); + ; + digitIndex = 3; + return true; + } + } + return false; +} + +template +FudStatus getWhole( + const StringView view, + size_t& digitIndex, + T& num, + T sign, + uint8_t radix, + bool& foundDecimal, + bool& foundExponent) +{ + while (digitIndex < view.length) { + auto nextChar = view.data[digitIndex]; + if (nextChar == '.') { + foundDecimal = true; + digitIndex++; + break; + } + + if (radix == ExtRadixDecimal && (nextChar == 'e' || nextChar == 'E')) { + foundExponent = true; + digitIndex++; + break; + } + + auto digitResult = impl::AsciiLookup[nextChar]; + if (digitResult >= radix) { + return FudStatus::InvalidInput; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult) * sign; + num *= static_cast(radix); + + num += digit; + digitIndex++; + } + return FudStatus::Success; +} + +template +FudStatus getExponent(const StringView& view, size_t& digitIndex, T& num, uint8_t radix) +{ + int32_t exponent{}; + StringView tempView{view.length - digitIndex, view.data + digitIndex}; + size_t exponentLength{}; + auto status = tempView.toInt32(exponent, ExtRadixDecimal, exponentLength); + if (status != FudStatus::Success) { + return status; + } + digitIndex += exponentLength; + num = num * std::pow(static_cast(radix), static_cast(exponent)); + return FudStatus::Success; +} + +template +FudStatus getFraction(const StringView view, size_t& digitIndex, T& num, T sign, uint8_t radix, bool& foundExponent) +{ + auto radixDiv = 1.0F / static_cast(radix); + while (digitIndex < view.length) { + auto nextChar = view.data[digitIndex]; + if (radix == ExtRadixDecimal && (nextChar == 'e' || nextChar == 'E')) { + foundExponent = true; + digitIndex++; + break; + } + + auto digitResult = impl::AsciiLookup[nextChar]; + if (digitResult >= radix) { + return FudStatus::InvalidInput; + } + if (digitResult < 0) { + break; + } + auto digit = static_cast(digitResult) * sign; + num += digit * radixDiv; + radixDiv /= static_cast(radix); + digitIndex++; + } + return FudStatus::Success; +} + +template +FudStatus stringViewToFloat(StringView input, T& number, size_t& index) +{ + if (input.data == nullptr) { + return FudStatus::NullPointer; + } + + if (input.length < 1) { + return FudStatus::InvalidInput; + } + + uint8_t radix = 0; + + StringView view{input}; + size_t skipIndex = 0; + + auto status = ext_lib::skipWhitespace(view, skipIndex); + if (status != FudStatus::Success) { + return status; + } + + T sign = 1.0; + bool isNegative = false; + status = impl::checkNegative(view, isNegative, skipIndex); + if (status != FudStatus::Success) { + return FudStatus::InvalidInput; + } + + if (!isNegative) { + status = checkPlusSigned(view, skipIndex); + } else { + sign = -1.0; + } + + if (status != FudStatus::Success) { + return FudStatus::InvalidInput; + } + + T num = 0; + size_t digitIndex = 0; + + auto retSuccess = [&]() { + index = skipIndex + digitIndex; + number = num; + return FudStatus::Success; + }; + + if (impl::isNanOrInf(num, view, sign, digitIndex)) { + return retSuccess(); + } + + status = impl::getRadix(view, radix, skipIndex); + if (status != FudStatus::Success) { + return status; + } + + bool foundDecimal = false; + bool foundExponent = false; + status = getWhole(view, digitIndex, num, sign, radix, foundDecimal, foundExponent); + + if (status == FudStatus::Success && foundExponent) { + status = getExponent(view, digitIndex, num, radix); + } + + if (status != FudStatus::Success) { + return status; + } + + if (!foundDecimal) { + if (digitIndex < 1) { + return FudStatus::InvalidInput; + } + + return retSuccess(); + } + + status = getFraction(view, digitIndex, num, sign, radix, foundExponent); + + if (foundExponent) { + status = getExponent(view, digitIndex, num, radix); + if (status != FudStatus::Success) { + return status; + } + } + + if (digitIndex < 1) { + return FudStatus::InvalidInput; + } + + if (std::isinf(num) || std::isnan(num)) // isnan is dubious here - likely unreachable + { + return ExtRangeError; + } + + return retSuccess(); +} + +template +FudStatus stringViewToFloat(StringView input, T* number, size_t* index) +{ + if (anyAreNull(input.data, number)) { + return FudStatus::NullPointer; + } + + size_t localIndex{0}; + auto status = stringViewToFloat(input, *number, localIndex); + + if (status == FudStatus::Success && index != nullptr) { + *index = localIndex; + } + return status; +} + +} // namespace impl + +FudStatus ext_string_to_float(StringView input, float* number, size_t* index) +{ + return impl::stringViewToFloat(input, number, index); +} + +FudStatus ext_string_to_double(StringView input, double* number, size_t* index) +{ + return impl::stringViewToFloat(input, number, index); +} + +namespace ext_lib { + +FudStatus StringView::toUint8(uint8_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToUnsignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toUint16(uint16_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToUnsignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toUint32(uint32_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToUnsignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toUint64(uint64_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToUnsignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toInt8(int8_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToSignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toInt16(int16_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToSignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toInt32(int32_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToSignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toInt64(int64_t& number, uint8_t specifiedRadix, size_t& strLen) const +{ + return ::impl::stringViewToSignedInteger(*this, number, specifiedRadix, strLen); +} + +FudStatus StringView::toFloat(float& number, size_t& strLen) const +{ + return ::impl::stringViewToFloat(*this, number, strLen); +} + +FudStatus StringView::toDouble(double& number, size_t& strLen) const +{ + return ::impl::stringViewToFloat(*this, number, strLen); +} + +#endif + +} // namespace fud diff --git a/source/fud_utf8.cpp b/source/fud_utf8.cpp new file mode 100644 index 0000000..5dd5099 --- /dev/null +++ b/source/fud_utf8.cpp @@ -0,0 +1,343 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_utf8.hpp" + +#include "fud_string.hpp" + +#include // IWYU pragma: keep - this is for placement new overloads. + +namespace fud { + +FudUtf8 FudUtf8::fromString(const String& fudString, size_t index) noexcept +{ + if (!fudString.valid()) { + return invalidAscii(); + } + + + return fromStringView(StringView{fudString}, index); +} + +FudUtf8 FudUtf8::fromStringView(const StringView& view, size_t index) noexcept +{ + return fromStringView(StringView{view}, index); +} + +FudUtf8 FudUtf8::fromStringView(StringView&& view, size_t index) noexcept +{ + auto len = view.length(); + const auto* data = view.data(); + if (data == nullptr) { + return invalidAscii(); + } + + FudUtf8 localChar{Ascii{data[index]}}; + if (localChar.valid()) { + return localChar; + } + + if (index + 1 < len) { + localChar.m_variant = Utf82Byte{data[index], data[index + 1]}; + } + if (localChar.valid()) { + return localChar; + } + + if (index + 2 < len) { + localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]}; + } + if (localChar.valid()) { + return localChar; + } + + if (index + 3 < len) { + localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]}; + } + if (localChar.valid()) { + return localChar; + } + + return invalidAscii(); +} + +bool char_is_ascii(char character) +{ + return static_cast(character & ~ASCII_MASK) == 0; +} + +FudStatus utf8_is_ascii(FudUtf8* character, bool* isAscii) +{ + if (anyAreNull(character, isAscii)) { + return FudStatus::NullPointer; + } + + *isAscii = character->getType() == Utf8Type::Ascii && character->valid(); + + return FudStatus::Success; +} + +namespace impl { + +/* Assumes that predicate is not a null pointer! */ +template +inline FudStatus isAsciiPredicate(FudUtf8* character, bool* pred, Predicate&& predicate) +{ + if (anyAreNull(character, pred)) { + return FudStatus::NullPointer; + } + + auto maybeAscii = character->getAscii(); + if (!maybeAscii.has_value()) { + return FudStatus::InvalidInput; + } + + auto asciiChar = *maybeAscii; + *pred = std::forward(predicate)(asciiChar.asChar()); + + return FudStatus::Success; +} + +} // namespace impl + +bool char_is_alphanumeric(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + if (char_is_alpha(character)) { + return true; + } + + return char_is_digit(character); +} + +FudStatus utf8_is_alphanumeric(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_alphanumeric); +} + +bool char_is_alpha(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + if (char_is_uppercase(character)) { + return true; + } + + return char_is_lowercase(character); +} + +FudStatus utf8_is_alpha(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_alpha); +} + +bool char_is_lowercase(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return 'a' <= character && character <= 'z'; +} + +FudStatus utf8_is_lowercase(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_lowercase); +} + +bool char_is_uppercase(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return 'A' <= character && character <= 'Z'; +} + +FudStatus utf8_is_uppercase(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_uppercase); +} + +bool char_is_digit(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return '0' <= character && character <= '9'; +} + +FudStatus utf8_is_digit(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_digit); +} + +bool char_is_hex_digit(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || + ('A' <= character && character <= 'F'); +} + +FudStatus utf8_is_hex_digit(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_hex_digit); +} + +bool char_is_control(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + constexpr char maxControlChar = 0x1F; + constexpr const char deleteChar = 0x7F; + return ((static_cast(character) <= maxControlChar)) || character == deleteChar; +} + +FudStatus utf8_is_control(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_control); +} + +bool char_is_graphical(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return char_is_alphanumeric(character) || char_is_punctuation(character); +} + +FudStatus utf8_is_graphical(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_graphical); +} + +bool char_is_space(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; +} + +FudStatus utf8_is_space(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_space); +} + +bool char_is_blank(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return character == ' ' || character == '\t'; +} + +FudStatus utf8_is_blank(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_blank); +} + +bool char_is_printable(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return (character >= ' ' && character <= '~'); +} + +FudStatus utf8_is_printable(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_printable); +} + +bool char_is_punctuation(char character) +{ + if (!char_is_ascii(character)) { + return false; + } + + return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || + (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); +} + +FudStatus utf8_is_punctuation(FudUtf8* character, bool* pred) +{ + return impl::isAsciiPredicate(character, pred, char_is_punctuation); +} + +uint8_t char_to_lower(uint8_t character) +{ + if (char_is_uppercase(static_cast(character))) { + constexpr uint8_t lowerA = 'a'; + constexpr uint8_t upperA = 'A'; + return static_cast(character - upperA) + lowerA; + } + return character; +} + +FudUtf8* utf8_to_lower(FudUtf8* character) +{ + if (character == nullptr) { + return character; + } + + static_cast(character->transformAscii([](Ascii& ascii) { + ascii = Ascii{char_to_lower(static_cast(ascii.asChar()))}; + })); + + return character; +} + +uint8_t char_to_upper(uint8_t character) +{ + if (char_is_lowercase(static_cast(character))) { + constexpr uint8_t lowerA = 'a'; + constexpr uint8_t upperA = 'A'; + return static_cast(character - lowerA) + upperA; + } + return character; +} + +FudUtf8* utf8_to_upper(FudUtf8* character) +{ + if (character == nullptr) { + return character; + } + + static_cast(character->transformAscii([](Ascii& ascii) { + ascii = Ascii{char_to_upper(static_cast(ascii.asChar()))}; + })); + + return character; +} + +} // namespace fud diff --git a/source/fud_utf8_iterator.cpp b/source/fud_utf8_iterator.cpp new file mode 100644 index 0000000..4476050 --- /dev/null +++ b/source/fud_utf8_iterator.cpp @@ -0,0 +1,55 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_utf8_iterator.hpp" + +namespace fud { + +std::optional Utf8Iterator::peek() const +{ + if (m_index >= m_view.length()) { + return std::nullopt; + } + + auto utf8 = FudUtf8::fromStringView(m_view, m_index); + + if (!utf8.valid()) { + return std::nullopt; + } + + return utf8; +} + +std::optional Utf8Iterator::next() +{ + if (m_index >= m_view.length()) { + m_index = m_view.length(); + return std::nullopt; + } + + auto utf8 = FudUtf8::fromStringView(m_view, m_index); + + if (!utf8.valid()) { + m_index = m_view.length(); + return std::nullopt; + } + + m_index += utf8.size(); + return utf8; +} + +} // namespace fud diff --git a/source/libfud.cpp b/source/libfud.cpp index fa0e3a0..834082e 100644 --- a/source/libfud.cpp +++ b/source/libfud.cpp @@ -1,4 +1,5 @@ /* + * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); you @@ -18,7 +19,8 @@ namespace fud { -void fud() { +void fud() +{ } } // namespace fud diff --git a/source/memory.cpp b/source/memory.cpp deleted file mode 100644 index 9f5d358..0000000 --- a/source/memory.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * ExtLib - * Copyright 2024 Dominick Allen - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "memory.hpp" - -#include - -namespace fud { - -FudStatus copyMem(void* destination, size_t destination_size, const void* source, size_t count) -{ - if (anyAreNull(destination, source)) { - return FudStatus::NullPointer; - } - - if (destination_size < count) { - return FudStatus::InvalidInput; - } - - auto* destPtr = static_cast(destination); - const auto* sourcePtr = static_cast(source); - for (decltype(destination_size) idx = 0; idx < count; ++idx) { - destPtr[idx] = sourcePtr[idx]; - } - - return FudStatus::Success; -} - -FudStatus compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count, int* difference) -{ - if (anyAreNull(lhs, rhs, difference)) { - return FudStatus::NullPointer; - } - - if (destination_size < count) { - return FudStatus::InvalidInput; - } - - int localDifference = 0; - // NOLINTBEGIN(readability-magic-numbers) - for (decltype(destination_size) idx = 0; idx < count; idx++) { - localDifference = static_cast(lhs)[idx] - static_cast(rhs)[idx]; - if (localDifference != 0) { - *difference = localDifference; - return FudStatus::Success; - } - } - *difference = localDifference; - - return FudStatus::Success; -} - -Result compareMem(const void* lhs, size_t destination_size, const void* rhs, size_t count) -{ - int difference = 0; - auto status = compareMem(lhs, destination_size, rhs, count, &difference); - if (status != FudStatus::Success) - { - return Result::error(status); - } - - return Result::okay(difference); -} - -FudStatus setMemory(void* data, size_t dataSize, uint8_t pattern, size_t count) -{ - if (data == nullptr) - { - return FudStatus::NullPointer; - } - - if (count > dataSize) - { - return FudStatus::InvalidInput; - } - - for (size_t idx = 0; idx < count; ++idx) - { - static_cast(data)[idx] = pattern; - } - - return FudStatus::Success; -} - -FudStatus setMemory( - void* data, - size_t collectionCount, - size_t eltOffset, - size_t eltSize, - uint8_t pattern, - size_t eltCount) -{ - if (eltOffset >= collectionCount) - { - return FudStatus::InvalidInput; - } - - if (eltOffset + eltCount > collectionCount) - { - return FudStatus::InvalidInput; - } - - auto dataSize = collectionCount * eltSize; - auto byteOffset = eltOffset * eltSize; - auto byteCount = eltCount * eltSize; - - auto remainingSize = dataSize - byteOffset; - - auto* offsetData = static_cast(data) + byteOffset; - return setMemory(offsetData, remainingSize, pattern, byteCount); -} - -} // namespace fud diff --git a/source/string.cpp b/source/string.cpp deleted file mode 100644 index a121418..0000000 --- a/source/string.cpp +++ /dev/null @@ -1,19 +0,0 @@ -/* - * LibFud - * Copyright 2024 Dominick Allen - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "string.hpp" diff --git a/source/utf8.cpp b/source/utf8.cpp deleted file mode 100644 index c94ac1f..0000000 --- a/source/utf8.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* - * libfud - * Copyright 2024 Dominick Allen - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "utf8.hpp" - -#include "string.hpp" - -#include // IWYU pragma: keep - this is for placement new overloads. - -namespace fud { - -ExtUtf8 ExtUtf8::fromString(const String& fudString, size_t index) noexcept -{ - if (!fudString.valid()) { - return invalidAscii(); - } - - - return fromStringView(StringView{fudString}, index); -} - -ExtUtf8 ExtUtf8::fromStringView(const StringView& view, size_t index) noexcept -{ - return fromStringView(StringView{view}, index); -} - -ExtUtf8 ExtUtf8::fromStringView(StringView&& view, size_t index) noexcept -{ - auto len = view.length(); - const auto* data = view.data(); - if (data == nullptr) { - return invalidAscii(); - } - - ExtUtf8 localChar{Ascii{data[index]}}; - if (localChar.valid()) { - return localChar; - } - - if (index + 1 < len) { - localChar.m_variant = Utf82Byte{data[index], data[index + 1]}; - } - if (localChar.valid()) { - return localChar; - } - - if (index + 2 < len) { - localChar.m_variant = Utf83Byte{data[index], data[index + 1], data[index + 2]}; - } - if (localChar.valid()) { - return localChar; - } - - if (index + 3 < len) { - localChar.m_variant = Utf84Byte{data[index], data[index + 1], data[index + 2], data[index + 3]}; - } - if (localChar.valid()) { - return localChar; - } - - return invalidAscii(); -} - -bool ext_lib_char_is_ascii(char character) -{ - return static_cast(character & ~ASCII_MASK) == 0; -} - -FudStatus ext_lib_utf8_is_ascii(ExtUtf8* character, bool* isAscii) -{ - if (anyAreNull(character, isAscii)) { - return FudStatus::NullPointer; - } - - *isAscii = character->getType() == ExtUtf8Type::Ascii && character->valid(); - - return FudStatus::Success; -} - -namespace impl { - -/* Assumes that predicate is not a null pointer! */ -template -inline FudStatus isAsciiPredicate(ExtUtf8* character, bool* pred, Predicate&& predicate) -{ - if (anyAreNull(character, pred)) { - return FudStatus::NullPointer; - } - - auto maybeAscii = character->getAscii(); - if (!maybeAscii.has_value()) { - return FudStatus::InvalidInput; - } - - auto asciiChar = *maybeAscii; - *pred = std::forward(predicate)(asciiChar.asChar()); - - return FudStatus::Success; -} - -} // namespace impl - -bool ext_lib_char_is_alphanumeric(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - if (ext_lib_char_is_alpha(character)) { - return true; - } - - return ext_lib_char_is_digit(character); -} - -FudStatus ext_lib_utf8_is_alphanumeric(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alphanumeric); -} - -bool ext_lib_char_is_alpha(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - if (ext_lib_char_is_uppercase(character)) { - return true; - } - - return ext_lib_char_is_lowercase(character); -} - -FudStatus ext_lib_utf8_is_alpha(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_alpha); -} - -bool ext_lib_char_is_lowercase(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return 'a' <= character && character <= 'z'; -} - -FudStatus ext_lib_utf8_is_lowercase(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_lowercase); -} - -bool ext_lib_char_is_uppercase(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return 'A' <= character && character <= 'Z'; -} - -FudStatus ext_lib_utf8_is_uppercase(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_uppercase); -} - -bool ext_lib_char_is_digit(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return '0' <= character && character <= '9'; -} - -FudStatus ext_lib_utf8_is_digit(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_digit); -} - -bool ext_lib_char_is_hex_digit(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return ('0' <= character && character <= '9') || ('a' <= character && character <= 'f') || - ('A' <= character && character <= 'F'); -} - -FudStatus ext_lib_utf8_is_hex_digit(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_hex_digit); -} - -bool ext_lib_char_is_control(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - constexpr char maxControlChar = 0x1F; - constexpr const char deleteChar = 0x7F; - return ((static_cast(character) <= maxControlChar)) || character == deleteChar; -} - -FudStatus ext_lib_utf8_is_control(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_control); -} - -bool ext_lib_char_is_graphical(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return ext_lib_char_is_alphanumeric(character) || ext_lib_char_is_punctuation(character); -} - -FudStatus ext_lib_utf8_is_graphical(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_graphical); -} - -bool ext_lib_char_is_space(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return character == ' ' || character == '\t' || character == '\n' || character == '\r' || character == '\v'; -} - -FudStatus ext_lib_utf8_is_space(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_space); -} - -bool ext_lib_char_is_blank(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return character == ' ' || character == '\t'; -} - -FudStatus ext_lib_utf8_is_blank(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_blank); -} - -bool ext_lib_char_is_printable(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return (character >= ' ' && character <= '~'); -} - -FudStatus ext_lib_utf8_is_printable(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_printable); -} - -bool ext_lib_char_is_punctuation(char character) -{ - if (!ext_lib_char_is_ascii(character)) { - return false; - } - - return (character >= '!' && character <= '/') || (character >= ':' && character <= '@') || - (character >= '[' && character <= '`') || (character >= '{' && character <= '~'); -} - -FudStatus ext_lib_utf8_is_punctuation(ExtUtf8* character, bool* pred) -{ - return impl::isAsciiPredicate(character, pred, ext_lib_char_is_punctuation); -} - -uint8_t ext_lib_char_to_lower(uint8_t character) -{ - if (ext_lib_char_is_uppercase(static_cast(character))) { - constexpr uint8_t lowerA = 'a'; - constexpr uint8_t upperA = 'A'; - return static_cast(character - upperA) + lowerA; - } - return character; -} - -ExtUtf8* ext_lib_utf8_to_lower(ExtUtf8* character) -{ - if (character == nullptr) { - return character; - } - - static_cast(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{ext_lib_char_to_lower(static_cast(ascii.asChar()))}; - })); - - return character; -} - -uint8_t ext_lib_char_to_upper(uint8_t character) -{ - if (ext_lib_char_is_lowercase(static_cast(character))) { - constexpr uint8_t lowerA = 'a'; - constexpr uint8_t upperA = 'A'; - return static_cast(character - lowerA) + upperA; - } - return character; -} - -ExtUtf8* ext_lib_utf8_to_upper(ExtUtf8* character) -{ - if (character == nullptr) { - return character; - } - - static_cast(character->transformAscii([](Ascii& ascii) { - ascii = Ascii{ext_lib_char_to_upper(static_cast(ascii.asChar()))}; - })); - - return character; -} - -} // namespace ext_lib diff --git a/source/utf8_iterator.cpp b/source/utf8_iterator.cpp deleted file mode 100644 index e439687..0000000 --- a/source/utf8_iterator.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "utf8_iterator.hpp" - -namespace fud { - -std::optional Utf8Iterator::peek() const -{ - if (m_index >= m_view.length()) { - return std::nullopt; - } - - auto utf8 = ExtUtf8::fromStringView(m_view, m_index); - - if (!utf8.valid()) { - return std::nullopt; - } - - return utf8; -} - -std::optional Utf8Iterator::next() -{ - if (m_index >= m_view.length()) { - m_index = m_view.length(); - return std::nullopt; - } - - auto utf8 = ExtUtf8::fromStringView(m_view, m_index); - - if (!utf8.valid()) { - m_index = m_view.length(); - return std::nullopt; - } - - m_index += utf8.size(); - return utf8; -} - -} // namespace fud -- cgit v1.2.3