diff options
author | Dominick Allen <djallen@librehumanitas.org> | 2025-01-02 15:11:51 -0600 |
---|---|---|
committer | Dominick Allen <djallen@librehumanitas.org> | 2025-01-02 15:11:51 -0600 |
commit | 87071200872c2450c947047350132aee493033c1 (patch) | |
tree | 49109532d9bbd148b4e59043120037684093be33 /include | |
parent | 16379362c02a2472f00fac49cad62788547c9519 (diff) |
Get basic CSV parser operating.
Diffstat (limited to 'include')
-rw-r--r-- | include/fud_csv.hpp | 34 | ||||
-rw-r--r-- | include/fud_file.hpp | 10 | ||||
-rw-r--r-- | include/fud_format.hpp | 21 | ||||
-rw-r--r-- | include/fud_option.hpp | 3 | ||||
-rw-r--r-- | include/fud_permissions.hpp | 6 | ||||
-rw-r--r-- | include/fud_print.hpp | 11 | ||||
-rw-r--r-- | include/fud_result.hpp | 2 | ||||
-rw-r--r-- | include/fud_string_view.hpp | 9 | ||||
-rw-r--r-- | include/fud_utf8.hpp | 26 | ||||
-rw-r--r-- | include/fud_vector.hpp | 35 |
10 files changed, 129 insertions, 28 deletions
diff --git a/include/fud_csv.hpp b/include/fud_csv.hpp index efd37e6..38b1b81 100644 --- a/include/fud_csv.hpp +++ b/include/fud_csv.hpp @@ -22,6 +22,7 @@ #include "fud_status.hpp" #include "fud_string_view.hpp" #include "fud_text.hpp" +#include "fud_utf8.hpp" #include "fud_vector.hpp" #include <functional> // reference_wrapper @@ -29,52 +30,59 @@ namespace fud { using TextBuffer = Vector<std::byte>; -using CsvBuffer = Vector<std::byte>; -using CsvLine = Vector<StringView>; struct Csv { /** \brief The number of lines of data in the CSV. */ - size_t numLines; + size_t numLines{0}; /** \brief The number of columns in the CSV. */ - size_t numColumns; + size_t numColumns{0}; /** \brief Buffer for each line with numColumns of StringView. */ - Vector<CsvLine> lines; + Vector<StringView> entries; /** \brief Backing buffer for data. */ - CsvBuffer buffer; + Vector<utf8> buffer; /** \separator for each column */ Utf8 columnDelimiter{Ascii{','}}; + Utf8 quoteCharacter{Ascii{'"'}}; + /** \separator for each line */ NewlineRepr newlineDelimiter{NewlineRepr::Posix}; - bool strict; + bool strictUtf8{true}; + + bool strictColumns{true}; + + bool strictQuote{false}; + + bool skipInitialSpace{false}; /** \brief Uses global Fud allocator for lines and backing buffer. */ static Csv makeDefault(); /** \brief Specify allocator to use for both lines and backing buffer. */ - static Csv makeSingleAllocator(Allocator& allocator); + static Csv makeWithSingleAllocator(Allocator& allocator); /** \brief Specify allocator. */ - static Csv make(Allocator& lineAllocator, Allocator& bufferAllocator); + static Csv make(Allocator& entryAllocator, Allocator& bufferAllocator); /** Consume and return the CSV. */ - static FudStatus parseCsvFromFilename( + static FudStatus parseFromFilename( Csv& csv, Option<TextBuffer&&> bufferOption, StringView filename, OpenFlags flags = OpenFlags{}, - Option<int> dirFdOption = NullOpt); + Option<int> dirFdOption = NullOpt, + Option<size_t> maxExtraAttempts = NullOpt); // assumes file is at start - static FudStatus parseCsvFromUnbufferedFile(Csv& csv, RegularFile&& file); + static FudStatus parseFromUnbufferedFile(Csv& csv, RegularFile&& file, Option<size_t> maxExtraAttempts); // assumes file is at start - static FudStatus parseCsvFromBufferedFile(Csv& csv, BufferedRegularFile& file); + static FudStatus parseFromBufferedFile(Csv& csv, BufferedRegularFile& file, Option<size_t> maxExtraAttempts); }; } // namespace fud diff --git a/include/fud_file.hpp b/include/fud_file.hpp index e7c485c..6f1acbf 100644 --- a/include/fud_file.hpp +++ b/include/fud_file.hpp @@ -38,8 +38,7 @@ enum class FileAccessMode : uint8_t ReadWrite = Read | Write }; -// enum class OpenFlagEnum : uint32_t -enum class OpenFlagEnum : uint8_t +enum class OpenFlagEnum : uint16_t { Append = 0x01, Truncate = Append << 1, @@ -207,6 +206,9 @@ class BufferedRegularFile { /** \brief Read from file as source to sink. */ DrainResult read(std::byte* sink, size_t length, Option<size_t> maxExtraAttempts); + /** \brief Attempt to read one UTF8 sequence. */ + DrainResult readUtf8(Utf8& sink, Option<size_t> maxExtraAttempts); + FudStatus setBuffer(Vector<std::byte>&& buffer, bool discardOldBuffer); DrainResult flush(size_t maxExtraAttempts = 0); @@ -261,6 +263,10 @@ class BufferedRegularFile { }; Operation m_lastOperation{Operation::None}; + + DrainResult validateBufferedIO(const std::byte* pointer, Operation requestedOperation); + + void drainReadBuffer(std::byte*& sink, size_t& length, DrainResult& result); }; } // namespace fud diff --git a/include/fud_format.hpp b/include/fud_format.hpp index 2102dc9..9be3dd9 100644 --- a/include/fud_format.hpp +++ b/include/fud_format.hpp @@ -32,8 +32,10 @@ #include <concepts> #include <cstdint> +#include <format> #include <limits> #include <variant> +#include <cstdio> namespace fud { @@ -716,21 +718,24 @@ FudStatus fillSignedBuffer(IntCharArray& buffer, T value, uint8_t& bufferLength, { static_assert(sizeof(T) <= sizeof(uint64_t)); static_assert(std::is_signed_v<T>); + uint64_t unsignedValue{}; if (value < 0) { value++; - value = -value; - value++; + unsignedValue = static_cast<uint64_t>(-value); + unsignedValue++; + } else { + unsignedValue = static_cast<uint64_t>(value); } if constexpr (std::is_same_v<T, char>) { - return fillUnsignedBuffer(buffer, static_cast<uint8_t>(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast<uint8_t>(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v<T, int8_t>) { - return fillUnsignedBuffer(buffer, static_cast<uint8_t>(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast<uint8_t>(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v<T, int16_t>) { - return fillUnsignedBuffer(buffer, static_cast<uint16_t>(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast<uint16_t>(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v<T, int32_t>) { - return fillUnsignedBuffer(buffer, static_cast<uint32_t>(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast<uint32_t>(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v<T, int64_t>) { - return fillUnsignedBuffer(buffer, static_cast<uint64_t>(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast<uint64_t>(unsignedValue), bufferLength, radix, uppercase); } } @@ -1441,6 +1446,8 @@ FormatResult format(Sink& sink, FormatCharMode formatMode, const FormatSpec& for return result; } + // printf("From format(sink, mode, spec, stringview arg): Arg contents are %p %zu?\n\n", arg.c_str(), arg.length()); + // printf("From format(sink, mode, spec, stringview arg): What?\n%s\n", std::format("{}", std::string_view{arg.c_str(), arg.length()}).c_str()); auto drainViewResult = sink.drain(arg); result.bytesDrained += drainViewResult.bytesDrained; result.status = drainViewResult.status; diff --git a/include/fud_option.hpp b/include/fud_option.hpp index 3b0eb1b..af2fcd3 100644 --- a/include/fud_option.hpp +++ b/include/fud_option.hpp @@ -229,7 +229,8 @@ class Option { m_data.clear(); } - alignas(alignof(T)) option_detail::DataArray<Size> m_data{}; + static constexpr auto Align = std::max(alignof(T), alignof(std::reference_wrapper<T>)); + alignas(Align) option_detail::DataArray<Size> m_data{}; bool m_engaged; }; diff --git a/include/fud_permissions.hpp b/include/fud_permissions.hpp index 66eec0c..d79fe79 100644 --- a/include/fud_permissions.hpp +++ b/include/fud_permissions.hpp @@ -186,6 +186,12 @@ constexpr PermissionField operator|(PermissionField lhs, PermissionField rhs) constexpr PermissionField PermReadWrite = PermissionType::Read | PermissionType::Write; +constexpr Permissions PermAllReadWrite{PermReadWrite, PermReadWrite, PermReadWrite}; + +constexpr Permissions PermUserGroupReadWrite{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}}; + +constexpr Permissions PermUserRwGroupRead{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}}; + } // namespace fud #endif diff --git a/include/fud_print.hpp b/include/fud_print.hpp index 592b106..a8e61de 100644 --- a/include/fud_print.hpp +++ b/include/fud_print.hpp @@ -35,6 +35,17 @@ FormatResult print(FormatString fmt, Args&&... args) return format(outSink, FormatCharMode::Unchecked, fmt, std::forward<Args>(args)...); } +inline void debugPrint(FormatString fmt) +{ + static_cast<void>(print(fmt)); +} + +template <typename... Args> +void debugPrint(FormatString fmt, Args&&... args) +{ + static_cast<void>(print(fmt, std::forward<Args>(args)...)); +} + } // namespace fud #endif diff --git a/include/fud_result.hpp b/include/fud_result.hpp index b91a31a..0f501e8 100644 --- a/include/fud_result.hpp +++ b/include/fud_result.hpp @@ -307,7 +307,7 @@ class [[nodiscard]] Result { static constexpr auto Size = std::max(sizeof(T), sizeof(E)); static constexpr auto Align = std::max(alignof(T), alignof(E)); - option_detail::DataArray<Size> m_data{}; + alignas(Align) option_detail::DataArray<Size> m_data{}; enum class Discriminant : uint8_t { diff --git a/include/fud_string_view.hpp b/include/fud_string_view.hpp index 6403c27..327bf20 100644 --- a/include/fud_string_view.hpp +++ b/include/fud_string_view.hpp @@ -68,10 +68,10 @@ struct StringView { explicit StringView(const String& fudString) noexcept; template <size_t N> - constexpr static StringView cStringView(const char (&input)[N]) + constexpr static StringView makeFromCString(const char (&input)[N]) { static_assert(N > 0); - return StringView{N, reinterpret_cast<const utf8*>(input)}; + return StringView{N - 1, reinterpret_cast<const utf8*>(input)}; } [[nodiscard]] constexpr size_t length() const @@ -84,6 +84,11 @@ struct StringView { return m_data; } + [[nodiscard]] inline const char* c_str() const + { + return reinterpret_cast<const char*>(m_data); + } + constexpr const utf8& operator[](size_t index) const { if constexpr (fudBoundsChecking) { diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 119640c..030164d 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -275,12 +275,37 @@ enum class Utf8Type : uint8_t Utf82Byte, Utf83Byte, Utf84Byte, + Invalid }; static_assert(Utf8TypeSet.m_values[0] == static_cast<uint8_t>(Utf8Type::Ascii)); static_assert(Utf8TypeSet.m_values[1] == static_cast<uint8_t>(Utf8Type::Utf82Byte)); static_assert(Utf8TypeSet.m_values[2] == static_cast<uint8_t>(Utf8Type::Utf83Byte)); static_assert(Utf8TypeSet.m_values[3] == static_cast<uint8_t>(Utf8Type::Utf84Byte)); +/* +| B | E | Byte 1 | Byte 2 | Byte 3 | Byte 4 +| U+0000 | U+007F | 0xxxxxxx | | | +| U+0080 | U+07FF | 110xxxxx | 10xxxxxx | | +| U+0800 | U+FFFF | 1110xxxx | 10xxxxxx | 10xxxxxx | +| U+10000 | U+10FFFF | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx +*/ +constexpr Utf8Type utf8TypeFromByte(utf8 input) { + if ((input >> 7) == 0) { + return Utf8Type::Ascii; + } + if ((input >> 5) == 0b110) { + return Utf8Type::Utf82Byte; + } + if ((input >> 4) == 0b1110) { + return Utf8Type::Utf83Byte; + } + if ((input >> 3) == 0b11110) { + return Utf8Type::Utf84Byte; + } + + return Utf8Type::Invalid; +} + struct Utf8 { Utf8Variant m_variant{Utf8Variant{Ascii{}}}; @@ -445,6 +470,7 @@ struct Utf8 { static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[1]) << TwoByteShift | static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[2]) << OneByteShift | static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[3]); + case Utf8Type::Invalid: default: // unlikely return -1; } diff --git a/include/fud_vector.hpp b/include/fud_vector.hpp index 9159770..1730c50 100644 --- a/include/fud_vector.hpp +++ b/include/fud_vector.hpp @@ -59,16 +59,21 @@ class Vector { Vector& operator=(Vector<T>&& rhs) noexcept { - cleanup(); + if (&rhs == this) { + return *this; + } + static_cast<void>(cleanup()); m_allocator = rhs.m_allocator; m_data = rhs.m_data; m_length = rhs.m_length; m_capacity = rhs.m_length; - rhs.m_allocataor = nullptr; + rhs.m_allocator = nullptr; rhs.m_data = nullptr; rhs.m_length = 0; rhs.m_capacity = 0; + + return *this; } static constexpr Vector<T> NullVector() noexcept { @@ -629,6 +634,32 @@ class Vector { return FudStatus::Success; } + template <size_t Size> + FudStatus extend(Span<const T, Size> fixedSpan) + { + if (fixedSpan.data() == nullptr) { + return FudStatus::NullPointer; + } + if (std::numeric_limits<size_t>::max() - Size < m_length) { + return FudStatus::Failure; + } + if (m_length + Size > m_capacity) + { + auto status = grow(); + if (status != FudStatus::Success) { + return status; + } + } + + for (size_t spanIndex = 0; spanIndex < Size; ++spanIndex) { + const auto* ptr = new (m_data + m_length) T(fixedSpan[spanIndex]); + fudAssert(ptr != nullptr); + m_length++; + } + + return FudStatus::Success; + } + FudStatus erase(size_t index) { if (index >= m_length) { |