/* * libfud * Copyright 2024 Dominick Allen * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fud_string_view.hpp" #include "fud_string.hpp" namespace fud { StringView::StringView(const String& fudString) noexcept : StringView(fudString.length(), fudString.data()) { } bool StringView::nullTerminated() const { return m_data != nullptr && m_data[m_length] == '\0'; } bool StringView::utf8Valid() const { if (m_data == nullptr) { return false; } for (size_t idx = 0; idx < m_length;) { if (Ascii::valid(m_data[idx])) { idx++; } else if (idx + 1 < m_length && Utf82Byte::valid(m_data[idx], m_data[idx + 1])) { idx += 2; } else if (idx + 2 < m_length && Utf83Byte::valid(m_data[idx], m_data[idx + 1], m_data[idx + 2])) { idx += 3; } else if ( idx + 3 < m_length && Utf84Byte::valid(m_data[idx], m_data[idx + 1], m_data[idx + 2], m_data[idx + 3])) { idx += 4; } else { return false; } } return true; } Result StringView::skipWhitespace() { using RetType = Result; if (m_data == nullptr) { return RetType::error(FudStatus::NullPointer); } size_t index = 0; while (m_length > 0 && classify::isSpace(static_cast(m_data[0]))) { m_data++; m_length--; index++; } return RetType::okay(index); } Result StringView::trimWhitespace() { using RetType = Result; if (m_data == nullptr) { return RetType::error(FudStatus::NullPointer); } size_t count = 0; while (m_length > 0 && classify::isSpace(static_cast(m_data[m_length - 1]))) { m_length--; count++; } return RetType::okay(count); } bool StringView::advance() { if (m_length < 1) { return false; } m_length--; m_data++; return true; } void StringView::advanceUnsafe() { fudAssert(m_length > 0); m_length--; m_data++; } bool StringView::advance(size_t size) { if (size > m_length) { return false; } m_length -= size; m_data += size; return true; } void StringView::advanceUnsafe(size_t size) { fudAssert(size <= m_length); m_length -= size; m_data += size; } FudStatus skipWhitespace(StringView& view, size_t& skipIndex) { auto skipResult = view.skipWhitespace(); if (skipResult.isError()) { return skipResult.getError(); } skipIndex = skipResult.getOkay(); if (view.length() < 1) { return FudStatus::ArgumentInvalid; } return FudStatus::Success; } #if 0 FudStatus fud_string_truncate(ExtBasicString* source, ssize_t newLength) { if (source == nullptr) { return FudStatus::NullPointer; } StringBorrow wrapper{*source}; if (!wrapper.valid()) { return FudStatus::StringInvalid; } if ((newLength > 0 && static_cast(newLength) > source->m_length) || (static_cast(-newLength) > source->m_length)) { return FudStatus::ArgumentInvalid; } if (newLength < 0) { source->m_length = source->m_length - static_cast(-newLength); } else { source->m_length = static_cast(newLength); } return wrapper.nullTerminate(); } FudStatus fud_string_reverse(ExtBasicString* source) { if (source == nullptr || source->m_data == nullptr) { return FudStatus::NullPointer; } return fud_string_reverse_substring(source, StringView{source->m_length, source->m_data}); } FudStatus fud_string_reverse_substring(ExtBasicString* source, StringView subString) { auto dataOffset = subString.data - source->m_data; if (dataOffset < 0 || static_cast(dataOffset) > source->m_length) { return FudStatus::ArgumentInvalid; } if (static_cast(dataOffset) + subString.length > source->m_length) { return FudStatus::ArgumentInvalid; } if (source == nullptr || source->m_data == nullptr) { return FudStatus::NullPointer; } StringView view{subString}; size_t index = 0; auto* data = source->m_data + dataOffset; while (index < subString.length) { if (fud_char_is_ascii(static_cast(data[index]))) { index++; continue; } auto utf8 = FudUtf8::fromStringView(view, index); if (!utf8.valid()) { return ExtUtf8Invalid; } const auto* utf8Data = utf8.data(); if (utf8Data == nullptr) { return ExtFailure; } auto utf8Size = utf8.size(); switch (utf8Size) { case 2: data[index] = utf8Data[1]; data[index + 1] = utf8Data[0]; break; case 3: data[index] = utf8Data[2]; data[index + 2] = utf8Data[0]; break; case 4: data[index] = utf8Data[3]; data[index + 1] = utf8Data[2]; data[index + 2] = utf8Data[1]; data[index + 3] = utf8Data[0]; break; default: return ExtFailure; } index += utf8Size; } DataView dataView{subString.length, data}; reverse(dataView); return FudStatus::Success; } FudStatus fud_string_compare(StringView levo, StringView dextro, int* difference) { if (anyAreNull(difference, levo.data, dextro.data)) { return FudStatus::NullPointer; } int diff = 0; size_t index = 0; while (diff == 0 && index < levo.length && index < dextro.length) { diff = levo.data[index] - dextro.data[index]; index++; } if (diff != 0 || levo.length == dextro.length) { /* nothing to do */ } else if (levo.length > dextro.length) { diff = static_cast(levo.data[index]); } else { diff = -static_cast(dextro.data[index]); } *difference = diff; return FudStatus::Success; } FudStatus fud_string_chr(StringView extStringView, char character, size_t* index) { if (anyAreNull(extStringView.data, index)) { return FudStatus::NullPointer; } bool found = false; for (size_t localIndex = 0; localIndex < extStringView.length; ++localIndex) { if (extStringView.data[localIndex] == static_cast(character)) { *index = localIndex; found = true; break; } } if (found) { return FudStatus::Success; } return ExtNotFound; } FudStatus fud_string_unicode_chr(StringView extString, const ExtUtf8* unicode, size_t* index) { if (anyAreNull(extString.data, unicode, index)) { return FudStatus::NullPointer; } if (!unicode->valid()) { return ExtUtf8Invalid; } size_t charSize = unicode->size(); ExtDebugAssert(charSize != 0); const uint8_t* dataMem = unicode->data(); ExtDebugAssert(dataMem != nullptr); Array localData{}; auto copyStatus = ExtCopyMem(localData.data(), localData.size(), dataMem, charSize); ExtDebugAssert(copyStatus == FudStatus::Success); for (size_t sIdx = 0; sIdx + charSize - 1 < extString.length;) { auto localChar = FudUtf8::fromStringView(extString, sIdx); if (!localChar.valid()) { return ExtUtf8Invalid; } if (localChar.m_variant == unicode->m_variant) { *index = sIdx; return FudStatus::Success; } sIdx += localChar.size(); } return ExtNotFound; } FudStatus fud_string_span_c_api( const StringView& inputView, const StringView& characterSetString, StringView& result, bool inSet) { size_t firstIndex = inputView.length; size_t sIdx = 0; while (sIdx < firstIndex) { auto stringChar = FudUtf8::fromStringView(inputView, sIdx); if (!stringChar.valid()) { return ExtUtf8Invalid; } size_t cIdx = 0; bool found = false; while (firstIndex > 0 && cIdx < firstIndex && cIdx < characterSetString.length) { auto setChar = FudUtf8::fromStringView(characterSetString, cIdx); if (!setChar.valid()) { return ExtUtf8Invalid; } if (stringChar == setChar) { found = true; } cIdx += setChar.size(); } if (!inSet && found) { firstIndex = sIdx; } else if (inSet && !found) { if (sIdx > 0) { firstIndex = sIdx; } break; } sIdx += stringChar.size(); } if (firstIndex < inputView.length) { result.length = inputView.length - firstIndex; result.data = inputView.data + firstIndex; return FudStatus::Success; } return ExtNotFound; } FudStatus fud_string_span_set(StringView inputView, const ExtUtf8Set* characterSet, StringView* stringView, bool inSet) { if (anyAreNull(inputView.data, characterSet, stringView)) { return FudStatus::NullPointer; } if (!characterSet->valid()) { return ExtUtf8Invalid; } size_t firstIndex = inputView.length; size_t sIdx = 0; while (sIdx < firstIndex) { auto localChar = FudUtf8::fromStringView(inputView, sIdx); if (!localChar.valid()) { return ExtUtf8Invalid; } bool found = characterSet->contains(localChar); if (!inSet && found) { firstIndex = sIdx; } else if (inSet && !found) { if (sIdx > 0) { firstIndex = sIdx; } break; } sIdx += localChar.size(); } if (firstIndex < inputView.length) { stringView->length = inputView.length - firstIndex; stringView->data = inputView.data + firstIndex; return FudStatus::Success; } return ExtNotFound; } FudStatus fud_string_span(StringView extString, StringView characterSetString, StringView* result) { if (result == nullptr) { return FudStatus::NullPointer; } const StringView inputView{extString}; const StringView characterSet{characterSetString}; return fud_string_span_c_api(inputView, characterSet, *result, true); } FudStatus fud_string_c_span(StringView extString, StringView characterSetString, StringView* result) { if (result == nullptr) { return FudStatus::NullPointer; } const StringView inputView{extString}; const StringView characterSet{characterSetString}; return fud_string_span_c_api(inputView, characterSet, *result, false); } FudStatus fud_string_span_set(StringView extString, const ExtUtf8Set* characterSet, StringView* stringView) { return fud_string_span_set(extString, characterSet, stringView, true); } FudStatus fud_string_c_span_set(StringView extString, const ExtUtf8Set* characterSet, StringView* stringView) { return fud_string_span_set(extString, characterSet, stringView, false); } FudStatus fud_string_find_substring(StringView haystack, StringView needle, StringView* stringView) { if (anyAreNull(haystack.data, needle.data, stringView)) { return FudStatus::NullPointer; } if (needle.length > haystack.length) { return ExtNotFound; } if (needle.length == 1) { size_t index = 0; auto chrFindStatus = fud_string_chr(haystack, static_cast(needle.data[0]), &index); if (chrFindStatus == FudStatus::Success) { stringView->data = haystack.data + index; stringView->length = 1; } return chrFindStatus; } size_t haystackIdx = 0; while (haystackIdx < haystack.length - needle.length) { StringView lhs; lhs.data = haystack.data + haystackIdx; lhs.length = haystack.length - haystackIdx; size_t lhsIndex = 0; auto chrFindStatus = fud_string_chr(lhs, static_cast(needle.data[0]), &lhsIndex); if (chrFindStatus != FudStatus::Success) { return chrFindStatus; } haystackIdx += lhsIndex; // GE or GT? if (haystackIdx + needle.length >= haystack.length) { break; } lhs.data = haystack.data + haystackIdx; lhs.length = needle.length; int difference = -1; auto cmpStatus = fud_string_compare(lhs, needle, &difference); ExtDebugAssert(cmpStatus == FudStatus::Success); if (difference == 0) { stringView->data = lhs.data; stringView->length = lhs.length; return FudStatus::Success; } haystackIdx++; } return ExtNotFound; } #endif } // namespace fud