From 87071200872c2450c947047350132aee493033c1 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Thu, 2 Jan 2025 15:11:51 -0600 Subject: Get basic CSV parser operating. --- include/fud_csv.hpp | 34 ++-- include/fud_file.hpp | 10 +- include/fud_format.hpp | 21 ++- include/fud_option.hpp | 3 +- include/fud_permissions.hpp | 6 + include/fud_print.hpp | 11 ++ include/fud_result.hpp | 2 +- include/fud_string_view.hpp | 9 +- include/fud_utf8.hpp | 26 +++ include/fud_vector.hpp | 35 +++- source/fud_csv.cpp | 416 ++++++++++++++++++++++++++++++++++++++++---- source/fud_file.cpp | 118 +++++++++---- source/fud_string.cpp | 4 - source/fud_string_view.cpp | 2 +- test/CMakeLists.txt | 1 + test/test_common.cpp | 19 +- test/test_common.hpp | 4 +- test/test_csv.cpp | 74 ++++++++ test/test_directory.cpp | 4 +- test/test_file.cpp | 41 ++--- test/test_format.cpp | 9 + test/test_string.cpp | 14 ++ tools/coverage.sh | 5 +- 23 files changed, 734 insertions(+), 134 deletions(-) create mode 100644 test/test_csv.cpp diff --git a/include/fud_csv.hpp b/include/fud_csv.hpp index efd37e6..38b1b81 100644 --- a/include/fud_csv.hpp +++ b/include/fud_csv.hpp @@ -22,6 +22,7 @@ #include "fud_status.hpp" #include "fud_string_view.hpp" #include "fud_text.hpp" +#include "fud_utf8.hpp" #include "fud_vector.hpp" #include // reference_wrapper @@ -29,52 +30,59 @@ namespace fud { using TextBuffer = Vector; -using CsvBuffer = Vector; -using CsvLine = Vector; struct Csv { /** \brief The number of lines of data in the CSV. */ - size_t numLines; + size_t numLines{0}; /** \brief The number of columns in the CSV. */ - size_t numColumns; + size_t numColumns{0}; /** \brief Buffer for each line with numColumns of StringView. */ - Vector lines; + Vector entries; /** \brief Backing buffer for data. */ - CsvBuffer buffer; + Vector buffer; /** \separator for each column */ Utf8 columnDelimiter{Ascii{','}}; + Utf8 quoteCharacter{Ascii{'"'}}; + /** \separator for each line */ NewlineRepr newlineDelimiter{NewlineRepr::Posix}; - bool strict; + bool strictUtf8{true}; + + bool strictColumns{true}; + + bool strictQuote{false}; + + bool skipInitialSpace{false}; /** \brief Uses global Fud allocator for lines and backing buffer. */ static Csv makeDefault(); /** \brief Specify allocator to use for both lines and backing buffer. */ - static Csv makeSingleAllocator(Allocator& allocator); + static Csv makeWithSingleAllocator(Allocator& allocator); /** \brief Specify allocator. */ - static Csv make(Allocator& lineAllocator, Allocator& bufferAllocator); + static Csv make(Allocator& entryAllocator, Allocator& bufferAllocator); /** Consume and return the CSV. */ - static FudStatus parseCsvFromFilename( + static FudStatus parseFromFilename( Csv& csv, Option bufferOption, StringView filename, OpenFlags flags = OpenFlags{}, - Option dirFdOption = NullOpt); + Option dirFdOption = NullOpt, + Option maxExtraAttempts = NullOpt); // assumes file is at start - static FudStatus parseCsvFromUnbufferedFile(Csv& csv, RegularFile&& file); + static FudStatus parseFromUnbufferedFile(Csv& csv, RegularFile&& file, Option maxExtraAttempts); // assumes file is at start - static FudStatus parseCsvFromBufferedFile(Csv& csv, BufferedRegularFile& file); + static FudStatus parseFromBufferedFile(Csv& csv, BufferedRegularFile& file, Option maxExtraAttempts); }; } // namespace fud diff --git a/include/fud_file.hpp b/include/fud_file.hpp index e7c485c..6f1acbf 100644 --- a/include/fud_file.hpp +++ b/include/fud_file.hpp @@ -38,8 +38,7 @@ enum class FileAccessMode : uint8_t ReadWrite = Read | Write }; -// enum class OpenFlagEnum : uint32_t -enum class OpenFlagEnum : uint8_t +enum class OpenFlagEnum : uint16_t { Append = 0x01, Truncate = Append << 1, @@ -207,6 +206,9 @@ class BufferedRegularFile { /** \brief Read from file as source to sink. */ DrainResult read(std::byte* sink, size_t length, Option maxExtraAttempts); + /** \brief Attempt to read one UTF8 sequence. */ + DrainResult readUtf8(Utf8& sink, Option maxExtraAttempts); + FudStatus setBuffer(Vector&& buffer, bool discardOldBuffer); DrainResult flush(size_t maxExtraAttempts = 0); @@ -261,6 +263,10 @@ class BufferedRegularFile { }; Operation m_lastOperation{Operation::None}; + + DrainResult validateBufferedIO(const std::byte* pointer, Operation requestedOperation); + + void drainReadBuffer(std::byte*& sink, size_t& length, DrainResult& result); }; } // namespace fud diff --git a/include/fud_format.hpp b/include/fud_format.hpp index 2102dc9..9be3dd9 100644 --- a/include/fud_format.hpp +++ b/include/fud_format.hpp @@ -32,8 +32,10 @@ #include #include +#include #include #include +#include namespace fud { @@ -716,21 +718,24 @@ FudStatus fillSignedBuffer(IntCharArray& buffer, T value, uint8_t& bufferLength, { static_assert(sizeof(T) <= sizeof(uint64_t)); static_assert(std::is_signed_v); + uint64_t unsignedValue{}; if (value < 0) { value++; - value = -value; - value++; + unsignedValue = static_cast(-value); + unsignedValue++; + } else { + unsignedValue = static_cast(value); } if constexpr (std::is_same_v) { - return fillUnsignedBuffer(buffer, static_cast(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v) { - return fillUnsignedBuffer(buffer, static_cast(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v) { - return fillUnsignedBuffer(buffer, static_cast(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v) { - return fillUnsignedBuffer(buffer, static_cast(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast(unsignedValue), bufferLength, radix, uppercase); } else if constexpr (std::is_same_v) { - return fillUnsignedBuffer(buffer, static_cast(value), bufferLength, radix, uppercase); + return fillUnsignedBuffer(buffer, static_cast(unsignedValue), bufferLength, radix, uppercase); } } @@ -1441,6 +1446,8 @@ FormatResult format(Sink& sink, FormatCharMode formatMode, const FormatSpec& for return result; } + // printf("From format(sink, mode, spec, stringview arg): Arg contents are %p %zu?\n\n", arg.c_str(), arg.length()); + // printf("From format(sink, mode, spec, stringview arg): What?\n%s\n", std::format("{}", std::string_view{arg.c_str(), arg.length()}).c_str()); auto drainViewResult = sink.drain(arg); result.bytesDrained += drainViewResult.bytesDrained; result.status = drainViewResult.status; diff --git a/include/fud_option.hpp b/include/fud_option.hpp index 3b0eb1b..af2fcd3 100644 --- a/include/fud_option.hpp +++ b/include/fud_option.hpp @@ -229,7 +229,8 @@ class Option { m_data.clear(); } - alignas(alignof(T)) option_detail::DataArray m_data{}; + static constexpr auto Align = std::max(alignof(T), alignof(std::reference_wrapper)); + alignas(Align) option_detail::DataArray m_data{}; bool m_engaged; }; diff --git a/include/fud_permissions.hpp b/include/fud_permissions.hpp index 66eec0c..d79fe79 100644 --- a/include/fud_permissions.hpp +++ b/include/fud_permissions.hpp @@ -186,6 +186,12 @@ constexpr PermissionField operator|(PermissionField lhs, PermissionField rhs) constexpr PermissionField PermReadWrite = PermissionType::Read | PermissionType::Write; +constexpr Permissions PermAllReadWrite{PermReadWrite, PermReadWrite, PermReadWrite}; + +constexpr Permissions PermUserGroupReadWrite{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}}; + +constexpr Permissions PermUserRwGroupRead{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}}; + } // namespace fud #endif diff --git a/include/fud_print.hpp b/include/fud_print.hpp index 592b106..a8e61de 100644 --- a/include/fud_print.hpp +++ b/include/fud_print.hpp @@ -35,6 +35,17 @@ FormatResult print(FormatString fmt, Args&&... args) return format(outSink, FormatCharMode::Unchecked, fmt, std::forward(args)...); } +inline void debugPrint(FormatString fmt) +{ + static_cast(print(fmt)); +} + +template +void debugPrint(FormatString fmt, Args&&... args) +{ + static_cast(print(fmt, std::forward(args)...)); +} + } // namespace fud #endif diff --git a/include/fud_result.hpp b/include/fud_result.hpp index b91a31a..0f501e8 100644 --- a/include/fud_result.hpp +++ b/include/fud_result.hpp @@ -307,7 +307,7 @@ class [[nodiscard]] Result { static constexpr auto Size = std::max(sizeof(T), sizeof(E)); static constexpr auto Align = std::max(alignof(T), alignof(E)); - option_detail::DataArray m_data{}; + alignas(Align) option_detail::DataArray m_data{}; enum class Discriminant : uint8_t { diff --git a/include/fud_string_view.hpp b/include/fud_string_view.hpp index 6403c27..327bf20 100644 --- a/include/fud_string_view.hpp +++ b/include/fud_string_view.hpp @@ -68,10 +68,10 @@ struct StringView { explicit StringView(const String& fudString) noexcept; template - constexpr static StringView cStringView(const char (&input)[N]) + constexpr static StringView makeFromCString(const char (&input)[N]) { static_assert(N > 0); - return StringView{N, reinterpret_cast(input)}; + return StringView{N - 1, reinterpret_cast(input)}; } [[nodiscard]] constexpr size_t length() const @@ -84,6 +84,11 @@ struct StringView { return m_data; } + [[nodiscard]] inline const char* c_str() const + { + return reinterpret_cast(m_data); + } + constexpr const utf8& operator[](size_t index) const { if constexpr (fudBoundsChecking) { diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp index 119640c..030164d 100644 --- a/include/fud_utf8.hpp +++ b/include/fud_utf8.hpp @@ -275,12 +275,37 @@ enum class Utf8Type : uint8_t Utf82Byte, Utf83Byte, Utf84Byte, + Invalid }; static_assert(Utf8TypeSet.m_values[0] == static_cast(Utf8Type::Ascii)); static_assert(Utf8TypeSet.m_values[1] == static_cast(Utf8Type::Utf82Byte)); static_assert(Utf8TypeSet.m_values[2] == static_cast(Utf8Type::Utf83Byte)); static_assert(Utf8TypeSet.m_values[3] == static_cast(Utf8Type::Utf84Byte)); +/* +| B | E | Byte 1 | Byte 2 | Byte 3 | Byte 4 +| U+0000 | U+007F | 0xxxxxxx | | | +| U+0080 | U+07FF | 110xxxxx | 10xxxxxx | | +| U+0800 | U+FFFF | 1110xxxx | 10xxxxxx | 10xxxxxx | +| U+10000 | U+10FFFF | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx +*/ +constexpr Utf8Type utf8TypeFromByte(utf8 input) { + if ((input >> 7) == 0) { + return Utf8Type::Ascii; + } + if ((input >> 5) == 0b110) { + return Utf8Type::Utf82Byte; + } + if ((input >> 4) == 0b1110) { + return Utf8Type::Utf83Byte; + } + if ((input >> 3) == 0b11110) { + return Utf8Type::Utf84Byte; + } + + return Utf8Type::Invalid; +} + struct Utf8 { Utf8Variant m_variant{Utf8Variant{Ascii{}}}; @@ -445,6 +470,7 @@ struct Utf8 { static_cast(std::get(m_variant).characters[1]) << TwoByteShift | static_cast(std::get(m_variant).characters[2]) << OneByteShift | static_cast(std::get(m_variant).characters[3]); + case Utf8Type::Invalid: default: // unlikely return -1; } diff --git a/include/fud_vector.hpp b/include/fud_vector.hpp index 9159770..1730c50 100644 --- a/include/fud_vector.hpp +++ b/include/fud_vector.hpp @@ -59,16 +59,21 @@ class Vector { Vector& operator=(Vector&& rhs) noexcept { - cleanup(); + if (&rhs == this) { + return *this; + } + static_cast(cleanup()); m_allocator = rhs.m_allocator; m_data = rhs.m_data; m_length = rhs.m_length; m_capacity = rhs.m_length; - rhs.m_allocataor = nullptr; + rhs.m_allocator = nullptr; rhs.m_data = nullptr; rhs.m_length = 0; rhs.m_capacity = 0; + + return *this; } static constexpr Vector NullVector() noexcept { @@ -629,6 +634,32 @@ class Vector { return FudStatus::Success; } + template + FudStatus extend(Span fixedSpan) + { + if (fixedSpan.data() == nullptr) { + return FudStatus::NullPointer; + } + if (std::numeric_limits::max() - Size < m_length) { + return FudStatus::Failure; + } + if (m_length + Size > m_capacity) + { + auto status = grow(); + if (status != FudStatus::Success) { + return status; + } + } + + for (size_t spanIndex = 0; spanIndex < Size; ++spanIndex) { + const auto* ptr = new (m_data + m_length) T(fixedSpan[spanIndex]); + fudAssert(ptr != nullptr); + m_length++; + } + + return FudStatus::Success; + } + FudStatus erase(size_t index) { if (index >= m_length) { diff --git a/source/fud_csv.cpp b/source/fud_csv.cpp index 031fcbc..198aeca 100644 --- a/source/fud_csv.cpp +++ b/source/fud_csv.cpp @@ -17,76 +17,428 @@ #include "fud_csv.hpp" +#include "fud_print.hpp" + namespace fud { -FudStatus Csv::parseCsvFromFilename( +} // namespace fud + +namespace fud { + +Csv Csv::makeDefault() +{ + return Csv::makeWithSingleAllocator(globalFudAllocator); +} + +Csv Csv::makeWithSingleAllocator(Allocator& allocator) +{ + return Csv::make(allocator, allocator); +} + +Csv Csv::make(Allocator& entryAllocator, Allocator& bufferAllocator) +{ + Csv csv{}; + csv.entries = Vector(entryAllocator); + csv.buffer = Vector(bufferAllocator); + return csv; +} + +FudStatus Csv::parseFromFilename( Csv& csv, Option bufferOption, StringView filename, OpenFlags flags, - Option dirFdOption) + Option dirFdOption, + Option maxExtraAttempts) { + debugPrint(u8"Enter parse from filename\n"); auto fileResult{RegularFile::open(filename, FileAccessMode::Read, flags, dirFdOption)}; if (fileResult.isError()) { + debugPrint(u8"Error opening file: {}\n", FudStatusToString(fileResult.getError())); return fileResult.takeError(); } if (bufferOption.hasValue()) { auto bufferedFile{BufferedRegularFile::make(fileResult.takeOkay(), std::move(bufferOption.value()))}; - return parseCsvFromBufferedFile(csv, bufferedFile); + return parseFromBufferedFile(csv, bufferedFile, maxExtraAttempts); } auto unbufferedFile{fileResult.takeOkay()}; - return parseCsvFromUnbufferedFile(csv, std::move(unbufferedFile)); + return parseFromUnbufferedFile(csv, std::move(unbufferedFile), maxExtraAttempts); } -enum class CsvTextState : uint8_t -{ - UnquotedField, - QuotedField, - Separator, - Newline, -}; +template +DrainResult readHeader(Csv& csv, File& file, Option maxExtraAttempts, size_t& rawSize); + +template +DrainResult scanLine(const Csv& csv, File& file, Option maxExtraAttempts, size_t& rawSize); -FudStatus Csv::parseCsvFromBufferedFile(Csv& csv, BufferedRegularFile& file) +template +FudStatus fillBuffer(Csv& csv, File& file, Option maxExtraAttempts, size_t rawSize); + +FudStatus Csv::parseFromBufferedFile(Csv& csv, BufferedRegularFile& file, Option maxExtraAttempts) { - auto lineEnding{newlineText(csv.newlineDelimiter)}; - static_cast(lineEnding); - DrainResult readResult{}; - while (true) { - utf8 letter{}; - auto drainResult = file.read(reinterpret_cast(&letter), sizeof(letter), NullOpt); - readResult.status = drainResult.status; - readResult.bytesDrained += drainResult.bytesDrained; - // if (status - // REMOVE - break; + size_t rawSize = 0; + DrainResult readResult{readHeader(csv, file, maxExtraAttempts, rawSize)}; + if (readResult.status == FudStatus::Partial) { + // fix this up with filling out the first row + return readResult.status; } - size_t rawSize = 0; + if (readResult.status != FudStatus::Success && readResult.status != FudStatus::Partial) { + return readResult.status; + } + + debugPrint(u8"Working with {} columns\n", csv.numColumns); - while (true) { - rawSize++; - // REMOVE - break; + csv.numLines = 1; + while (readResult.status == FudStatus::Success) { + auto lineResult = scanLine(csv, file, maxExtraAttempts, rawSize); + readResult.status = lineResult.status; + readResult.bytesDrained += lineResult.bytesDrained; + if (readResult.status == FudStatus::Success || readResult.status == FudStatus::Partial) { + csv.numLines++; + debugPrint(u8"Read additional line - total of {}\n", csv.numLines); + } } - auto reserveStatus = csv.buffer.reserve(rawSize); + if (readResult.status == FudStatus::Empty || readResult.status == FudStatus::Partial) { + readResult.status = FudStatus::Success; + } + + if (readResult.status != FudStatus::Success) { + return readResult.status; + } + + if (std::numeric_limits::max() / csv.numLines < csv.numColumns) { + debugPrint(u8"Fail: csv.numlines = {}, csv.numColumns = {}\n", csv.numLines, csv.numColumns); + return FudStatus::Failure; + } + + debugPrint(u8"Working with {} lines\n", csv.numLines); + + auto reserveStatus = csv.entries.reserve(csv.numLines * csv.numColumns); + if (reserveStatus != FudStatus::Success) { + return reserveStatus; + } + + reserveStatus = csv.buffer.reserve(rawSize); if (reserveStatus != FudStatus::Success) { return reserveStatus; } - return FudStatus::NotImplemented; + debugPrint(u8"Reserved space - {}\n", rawSize + 1); + + auto fillStatus = fillBuffer(csv, file, maxExtraAttempts, rawSize); + + if (fillStatus != FudStatus::Success) { + return fillStatus; + } + + return FudStatus::Success; } -FudStatus Csv::parseCsvFromUnbufferedFile(Csv& csv, RegularFile&& file) +FudStatus Csv::parseFromUnbufferedFile(Csv& csv, RegularFile&& file, Option maxExtraAttempts) { static_cast(csv); constexpr size_t BufferSize = 256; SimpleStackAllocator stackAllocator{}; auto bufferedFile{BufferedRegularFile::make(std::move(file), TextBuffer{stackAllocator})}; - return parseCsvFromBufferedFile(csv, bufferedFile); + return parseFromBufferedFile(csv, bufferedFile, maxExtraAttempts); +} + +// NOLINTBEGIN(readability-function-cognitive-complexity) +template +DrainResult readHeader(Csv& csv, File& file, Option maxExtraAttempts, size_t& rawSize) +{ + debugPrint(u8"Entered read header\n"); + + DrainResult readResult{}; + + auto lineEnding{newlineText(csv.newlineDelimiter)}; + bool endOfLine{false}; + bool maybeNewline{false}; + bool inQuote{false}; + bool sawQuote{false}; + + while (not endOfLine) { + Utf8 utf8Char{}; + + auto drainResult{file.readUtf8(utf8Char, maxExtraAttempts)}; + readResult.status = drainResult.status; + readResult.bytesDrained += drainResult.bytesDrained; + if (readResult.status != FudStatus::Success) { + debugPrint(u8"Failed to read: {}", FudStatusToString(readResult.status)); + return readResult; + } + + if (csv.strictUtf8 && not utf8Char.valid()) { + debugPrint(u8"UTF8 invalid\n"); + readResult.status = FudStatus::Utf8Invalid; + return readResult; + } + + if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) { + rawSize += 2; + maybeNewline = false; + } else if (inQuote and utf8Char == csv.quoteCharacter) { + inQuote = false; + sawQuote = true; + } else if (inQuote) { + sawQuote = false; + rawSize += utf8Char.size(); + } else if (utf8Char == csv.quoteCharacter) { + inQuote = true; + if (sawQuote) { + rawSize += utf8Char.size(); + sawQuote = false; + } + } else if (utf8Char == csv.columnDelimiter) { + csv.numColumns++; + } else if (not maybeNewline and utf8Char == Utf8{Ascii{lineEnding[0]}}) { + if (lineEnding.length() == 1) { + csv.numColumns++; + endOfLine = true; + } else { + maybeNewline = true; + } + } else if (maybeNewline and utf8Char == Utf8{Ascii{lineEnding[1]}}) { + maybeNewline = false; + endOfLine = true; + } else { + rawSize += utf8Char.size(); + } + } + + return readResult; +} + +template +DrainResult scanLine(const Csv& csv, File& file, Option maxExtraAttempts, size_t& rawSize) +{ + debugPrint(u8"Entered scanLine\n"); + DrainResult readResult{}; + + auto lineEnding{newlineText(csv.newlineDelimiter)}; + bool endOfLine{false}; + bool maybeNewline{false}; + bool inQuote{false}; + bool sawQuote{false}; + bool addToSize{}; + size_t numColumns{0}; + + while (not endOfLine) { + addToSize = false; + Utf8 utf8Char{}; + auto drainResult{file.readUtf8(utf8Char, maxExtraAttempts)}; + readResult.status = drainResult.status; + readResult.bytesDrained += drainResult.bytesDrained; + if (readResult.status != FudStatus::Success) { + break; + } + + if (csv.strictUtf8 && not utf8Char.valid()) { + readResult.status = FudStatus::Utf8Invalid; + break; + } + + debugPrint(u8"{}", utf8Char.data()); + + if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) { + rawSize += 2; + maybeNewline = false; + } else if (inQuote and utf8Char == csv.quoteCharacter) { + inQuote = false; + sawQuote = true; + } else if (inQuote) { + addToSize = true; + sawQuote = false; + } else if (utf8Char == csv.quoteCharacter) { + inQuote = true; + if (sawQuote) { + addToSize = true; + sawQuote = false; + } + } else if (utf8Char == csv.columnDelimiter) { + numColumns++; + } else if (not maybeNewline and utf8Char == Utf8{Ascii{lineEnding[0]}}) { + if (lineEnding.length() == 1) { + numColumns++; + endOfLine = true; + } else { + maybeNewline = true; + } + } else if (maybeNewline and utf8Char == Utf8{Ascii{lineEnding[1]}}) { + maybeNewline = false; + endOfLine = true; + } else { + addToSize = true; + } + + if (addToSize and numColumns < csv.numColumns) { + rawSize += utf8Char.size(); + } + + if (numColumns > csv.numColumns and csv.strictColumns) { + readResult.status = FudStatus::FormatInvalid; + break; + } + } + + if (numColumns > 0) { + debugPrint(u8"\n - Read line above with {} columns \n", numColumns); + } + + if (numColumns == 0) { + debugPrint(u8"Read no additional columns\n"); + readResult.status = FudStatus::Empty; + return readResult; + } + + if (numColumns != csv.numColumns and csv.strictColumns) { + readResult.status = FudStatus::FormatInvalid; + return readResult; + } + + return readResult; +} + +template +FudStatus fillBuffer(Csv& csv, File& file, Option maxExtraAttempts, size_t rawSize) +{ + static_cast(rawSize); + + auto seekStatus = file.seekStart(); + if (seekStatus != FudStatus::Success) { + return seekStatus; + } + + auto lineEnding{newlineText(csv.newlineDelimiter)}; + + size_t sizeCounter{0}; + size_t numColumns{0}; + bool maybeNewline{false}; + bool inQuote{false}; + bool sawQuote{false}; + bool addToSize{}; + bool terminateEntry{false}; + bool endLine{false}; + + StringView currentEntry{}; + currentEntry.m_data = csv.buffer.data() + sizeCounter; + + size_t numEntries = csv.numLines * csv.numColumns; + debugPrint(u8"Working over {} entries\n", numEntries); + for (size_t entryIndex = 0; entryIndex < numEntries;) { + addToSize = false; + Utf8 utf8Char{}; + auto drainResult{file.readUtf8(utf8Char, maxExtraAttempts)}; + if (drainResult.status != FudStatus::Success) { + return drainResult.status; + } + + if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) { + addToSize = true; + // TODO: this actually needs proper handling still + maybeNewline = false; + } else if (inQuote and utf8Char == csv.quoteCharacter) { + inQuote = false; + sawQuote = true; + } else if (inQuote) { + sawQuote = false; + addToSize = true; + } else if (utf8Char == csv.quoteCharacter) { + inQuote = true; + if (sawQuote) { + addToSize = true; + sawQuote = false; + } + } else if (utf8Char == csv.columnDelimiter) { + terminateEntry = true; + } else if (not maybeNewline and utf8Char == Utf8{Ascii{lineEnding[0]}}) { + if (lineEnding.length() == 1) { + terminateEntry = true; + endLine = true; + } else { + maybeNewline = true; + } + } else if (maybeNewline and utf8Char == Utf8{Ascii{lineEnding[1]}}) { + maybeNewline = false; + terminateEntry = true; + endLine = true; + } else { + addToSize = true; + } + + if (not inQuote and sawQuote and utf8Char != csv.quoteCharacter) { + sawQuote = false; + } + + if (terminateEntry) { + auto pushStatus = csv.entries.pushBack(currentEntry); + if (pushStatus != FudStatus::Success) { + return pushStatus; + } + currentEntry.m_length = 0; + terminateEntry = false; + entryIndex++; + if (endLine) { + numColumns = 0; + endLine = false; + } + } + + if (addToSize and numColumns < csv.numColumns) { + FudStatus extendStatus{FudStatus::Success}; + auto charSize = utf8Char.size(); + switch (charSize) { + case 1: + extendStatus = csv.buffer.extend(Span{utf8Char.data(), 1}); + break; + case 2: + extendStatus = csv.buffer.extend(Span{utf8Char.data(), 2}); + break; + case 3: + extendStatus = csv.buffer.extend(Span{utf8Char.data(), 3}); + break; + case 4: + extendStatus = csv.buffer.extend(Span{utf8Char.data(), 4}); + break; + case 0: + default: + debugPrint(u8"Char size is ??? {}\n", charSize); + charSize = 0; + break; + } + if (extendStatus != FudStatus::Success) { + return extendStatus; + } + sizeCounter += charSize; + currentEntry.m_length += charSize; + } + } + + debugPrint( + u8"Buffer, with current size = {} and sizeCounter, = {}:\n-----\n{}\n-----\n", + csv.buffer.size(), + sizeCounter, + StringView{csv.buffer.size(), csv.buffer.data()}); + + const auto* data = csv.buffer.data(); + for (auto& entry : csv.entries) { + entry.m_data = data; + data += entry.m_length; + debugPrint(u8"Entry = {}\n", entry); + } + + if (rawSize != sizeCounter) { + debugPrint(u8"Invalid: rawSize == {}, sizeCounter == {}"); + return FudStatus::Failure; + } + + return FudStatus::Success; } +// NOLINTEND(readability-function-cognitive-complexity) } // namespace fud diff --git a/source/fud_file.cpp b/source/fud_file.cpp index caf0f5a..ca6404d 100644 --- a/source/fud_file.cpp +++ b/source/fud_file.cpp @@ -552,11 +552,11 @@ FudStatus BufferedRegularFile::resizeBuffer(size_t size) return m_buffer.resize(size); } -DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, Option maxExtraAttempts) +DrainResult BufferedRegularFile::validateBufferedIO(const std::byte* pointer, Operation requestedOperation) { DrainResult result{0, FudStatus::Success}; - if (source == nullptr) { + if (pointer == nullptr) { result.status = FudStatus::NullPointer; return result; } @@ -566,9 +566,25 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O return result; } - if (m_lastOperation != Operation::Write) { + if (requestedOperation == Operation::Read && m_lastOperation == Operation::Write && m_bufferLength > 0) { + result.status = FudStatus::OperationInvalid; + return result; + } + + if (m_lastOperation != requestedOperation) { m_bufferLength = 0; - m_lastOperation = Operation::Write; + m_bufferPosition = 0; + m_lastOperation = requestedOperation; + } + + return result; +} + +DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, Option maxExtraAttempts) +{ + DrainResult result{validateBufferedIO(source, Operation::Write)}; + if (result.status != FudStatus::Success) { + return result; } if (length == 0) { @@ -628,46 +644,17 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option maxExtraAttempts) { auto extraAttempts = maxExtraAttempts.valueOr(0); - DrainResult result{0, FudStatus::Success}; + DrainResult result{validateBufferedIO(sink, Operation::Read)}; - if (sink == nullptr) { - result.status = FudStatus::NullPointer; - return result; - } - - if (not m_file.isOpen()) { - result.status = FudStatus::HandleInvalid; - return result; - } - - if (m_lastOperation == Operation::Write && m_bufferLength > 0) { - result.status = FudStatus::OperationInvalid; + if (result.status != FudStatus::Success) { return result; } - if (m_lastOperation != Operation::Read) { - m_lastOperation = Operation::Read; - m_bufferPosition = 0; - m_bufferLength = 0; - } - if (length == 0) { return result; } - if (m_bufferLength > 0 && m_bufferPosition < m_bufferLength) { - auto remainingLength = m_bufferLength - m_bufferPosition; - auto count = min(length, remainingLength); - - auto copyStatus = copyMem(sink, length, m_buffer.data() + m_bufferPosition, count); - fudAssert(copyStatus == FudStatus::Success); - - sink += count; - length -= count; - - m_bufferPosition += count; - result.bytesDrained += count; - } + drainReadBuffer(sink, length, result); fudAssert(length == 0 || m_bufferPosition == m_bufferLength); @@ -727,6 +714,65 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option maxExtraAttempts) +{ + size_t extraAttempts{maxExtraAttempts.valueOr(0)}; + Array utf8Data{}; + auto drainResult = read(reinterpret_cast(utf8Data.data()), 1, maxExtraAttempts); + if (drainResult.status != FudStatus::Success) { + return drainResult; + } + + auto utf8Type = utf8TypeFromByte(utf8Data[0]); + uint8_t bytesToRead{0}; + switch (utf8Type) { + case Utf8Type::Ascii: + break; + case Utf8Type::Utf82Byte: + bytesToRead = 1; + break; + case Utf8Type::Utf83Byte: + bytesToRead = 2; + break; + case Utf8Type::Utf84Byte: + bytesToRead = 3; + break; + case Utf8Type::Invalid: + default: + sink = Utf8{Ascii{utf8Data[0]}}; + drainResult.status = FudStatus::Utf8Invalid; + return drainResult; + } + + if (bytesToRead > 0) { + auto utf8ReadResult = read(reinterpret_cast(utf8Data.data() + 1), bytesToRead, extraAttempts); + drainResult.status = utf8ReadResult.status; + drainResult.bytesDrained += utf8ReadResult.bytesDrained; + } + + sink = Utf8::make(utf8Data); + + return drainResult; +} + +void BufferedRegularFile::drainReadBuffer(std::byte*& sink, size_t& length, DrainResult& result) +{ + if (m_bufferLength > 0 && m_bufferPosition < m_bufferLength) { + auto remainingLength = m_bufferLength - m_bufferPosition; + auto count = min(length, remainingLength); + + auto copyStatus = copyMem(sink, length, m_buffer.data() + m_bufferPosition, count); + fudAssert(copyStatus == FudStatus::Success); + + sink += count; + length -= count; + + m_bufferPosition += count; + result.bytesDrained += count; + } +} + FudStatus BufferedRegularFile::setBuffer(Vector&& buffer, bool discardOldBuffer) { static_cast(buffer); diff --git a/source/fud_string.cpp b/source/fud_string.cpp index cd3e918..2a17201 100644 --- a/source/fud_string.cpp +++ b/source/fud_string.cpp @@ -131,10 +131,6 @@ StringResult String::from(StringView view, Allocator* allocator) return StringResult::error(FudStatus::ArgumentInvalid); } - if (view.nullTerminated()) { - return StringResult::error(FudStatus::ArgumentInvalid); - } - String output{}; output.m_allocator = reinterpret_cast(allocator); size_t outputCapacity = view.length() + 1U; diff --git a/source/fud_string_view.cpp b/source/fud_string_view.cpp index 090dd6d..ba88ad4 100644 --- a/source/fud_string_view.cpp +++ b/source/fud_string_view.cpp @@ -21,7 +21,7 @@ namespace fud { -StringView::StringView(const String& fudString) noexcept : StringView(fudString.length(), fudString.data()) +StringView::StringView(const String& fudString) noexcept : StringView{fudString.length(), fudString.data()} { } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1ceca71..0a1a1e7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,6 +63,7 @@ endfunction() fud_add_test(test_fud SOURCES test_fud.cpp) fud_add_test(test_allocator SOURCES test_allocator.cpp) fud_add_test(test_assert SOURCES test_assert.cpp) +fud_add_test(test_csv SOURCES test_csv.cpp) # fud_add_test(test_c_file SOURCES test_c_file.cpp) fud_add_test(test_directory SOURCES test_directory.cpp) fud_add_test(test_file SOURCES test_file.cpp) diff --git a/test/test_common.cpp b/test/test_common.cpp index 07a0088..f272dad 100644 --- a/test/test_common.cpp +++ b/test/test_common.cpp @@ -74,7 +74,7 @@ int unlink_cb(const char* fpath, const struct stat* sb_unused, int typeflag, str return retValue; } -FudStatus removeRecursive(const String& path) +FudStatus removeRecursive(StringView path) { if (!path.utf8Valid()) { return FudStatus::Utf8Invalid; @@ -82,6 +82,9 @@ FudStatus removeRecursive(const String& path) if (path.length() < 5) { return FudStatus::ArgumentInvalid; } + if (not path.nullTerminated()) { + return FudStatus::StringInvalid; + } auto prefix{String::makeFromCString("/tmp/").takeOkay()}; auto diffResult = compareMem(path.data(), path.length(), prefix.data(), prefix.length()); if (diffResult.isError()) { @@ -92,7 +95,8 @@ FudStatus removeRecursive(const String& path) return FudStatus::ArgumentInvalid; } constexpr int maxOpenFd = 64; - auto status = nftw(path.c_str(), unlink_cb, maxOpenFd, FTW_DEPTH | FTW_PHYS); + + auto status = nftw(reinterpret_cast(path.data()), unlink_cb, maxOpenFd, FTW_DEPTH | FTW_PHYS); if (status == 0) { return FudStatus::Success; } @@ -104,4 +108,15 @@ FudStatus removeRecursive(const String& path) return FudStatus::Failure; } +auto rmFile(StringView filename) -> int +{ + auto result = unlink(filename.c_str()); + if (result == -1) { + if (errno == ENOENT) { + return 0; + } + } + return result; +} + } // namespace fud diff --git a/test/test_common.hpp b/test/test_common.hpp index 8912e42..5f6828f 100644 --- a/test/test_common.hpp +++ b/test/test_common.hpp @@ -81,7 +81,9 @@ struct MockFudAllocator { extern MockFudAllocator globalMockFudAlloc; class String; -FudStatus removeRecursive(const String& path); +class StringView; +auto rmFile(StringView filename) -> int; +FudStatus removeRecursive(StringView path); } // namespace fud diff --git a/test/test_csv.cpp b/test/test_csv.cpp new file mode 100644 index 0000000..6923f6f --- /dev/null +++ b/test/test_csv.cpp @@ -0,0 +1,74 @@ +/* + * libfud + * Copyright 2024 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fud_csv.hpp" +#include "fud_print.hpp" + +#include "gtest/gtest.h" + +namespace fud { + +const StringView happyCsvFilename{u8"fud-happy-test.csv"}; + +const StringView happyData{ + u8"foo,bar,baz\n" + u8"1,Unquoted Text,\"Quoted Text with embedded \"\" quote and embedded newline \n" + u8"see\"\n,,\"Prior two fields are empty\"\n"}; + +auto writeHappyCsv() -> FudStatus +{ + auto fileResult{RegularFile::create( + happyCsvFilename, + FileAccessMode::Write, + OpenFlags{OpenFlagEnum::Truncate}, + PermUserRwGroupRead, + false, + NullOpt)}; + if (fileResult.isError()) { + debugPrint(u8"Error opening file: {}\n", FudStatusToString(fileResult.getError())); + return fileResult.takeError(); + } + auto file{fileResult.takeOkay()}; + + auto writeResult = file.write(reinterpret_cast(happyData.data()), happyData.length()); + if (writeResult.status != FudStatus::Success) { + return writeResult.status; + } + + if (writeResult.bytesDrained != happyData.length()) { + return FudStatus::Failure; + } + + return FudStatus::Success; +} + +TEST(FudCsv, ParseCsvFromFilename) +{ + Csv csv{Csv::makeDefault()}; + + ASSERT_EQ(writeHappyCsv(), FudStatus::Success); + + debugPrint(u8"Wrote happy data:\n-----\n{}\n-----\n", happyData); + + auto parseStatus = Csv::parseFromFilename(csv, NullOpt, happyCsvFilename); + if (parseStatus != FudStatus::Success) { + debugPrint(u8"Error parsing file: {}\n", FudStatusToString(parseStatus)); + } + ASSERT_EQ(parseStatus, FudStatus::Success); +} + +} // namespace fud diff --git a/test/test_directory.cpp b/test/test_directory.cpp index 96b9c2d..0f7dc8d 100644 --- a/test/test_directory.cpp +++ b/test/test_directory.cpp @@ -41,12 +41,12 @@ TEST(FudDirectory, Basic) ASSERT_TRUE(files[0].utf8Valid()); ASSERT_TRUE(files[1].utf8Valid()); - ASSERT_EQ(removeRecursive(testDirName), FudStatus::Success); + ASSERT_EQ(removeRecursive(testDirName.asView()), FudStatus::Success); auto mkdirResult = mkdir(testDirName.c_str(), pathMode); EXPECT_EQ(mkdirResult, 0); if (mkdirResult != 0) { - ASSERT_EQ(removeRecursive(testDirName), FudStatus::Success); + ASSERT_EQ(removeRecursive(testDirName.asView()), FudStatus::Success); return; } diff --git a/test/test_file.cpp b/test/test_file.cpp index 9727e94..12cfb98 100644 --- a/test/test_file.cpp +++ b/test/test_file.cpp @@ -29,38 +29,27 @@ namespace fud { -auto rmFile(const auto& filename) -> int -{ - auto result = unlink(filename.c_str()); - if (result == -1) { - if (errno == ENOENT) { - return 0; - } - } - return result; -} - TEST(FudFile, Basic) { - constexpr const char* testDirCName = "/tmp/fud_directory_test"; - const auto testDirName{String::makeFromCString(testDirCName).takeOkay()}; + StringView testDirCName{StringView::makeFromCString("/tmp/fud_directory_test")}; + const auto testDirName{String::from(testDirCName).takeOkay()}; ASSERT_TRUE(testDirName.utf8Valid()); constexpr mode_t pathMode = 0777; - ASSERT_EQ(removeRecursive(testDirName), FudStatus::Success); + ASSERT_EQ(removeRecursive(testDirName.asView()), FudStatus::Success); - auto mkdirResult = mkdir(testDirName.c_str(), pathMode); + auto mkdirResult = mkdir(testDirCName.c_str(), pathMode); EXPECT_EQ(mkdirResult, 0); if (mkdirResult != 0) { - ASSERT_EQ(removeRecursive(testDirName), FudStatus::Success); + ASSERT_EQ(removeRecursive(testDirCName), FudStatus::Success); return; } - String testName1{String::makeFromCStrings(testDirCName, "/", "test1").takeOkay()}; - String testName2{String::makeFromCStrings(testDirCName, "/", "test2").takeOkay()}; + String testName1{String::makeFromCStrings(testDirCName.c_str(), "/", "test1").takeOkay()}; + String testName2{String::makeFromCStrings(testDirCName.c_str(), "/", "test2").takeOkay()}; - ASSERT_EQ(rmFile(testName1), 0); - ASSERT_EQ(rmFile(testName2), 0); + ASSERT_EQ(rmFile(testName1.asView()), 0); + ASSERT_EQ(rmFile(testName2.asView()), 0); auto fileResult{RegularFile::open(testName1.asView(), FileAccessMode::Read, OpenFlags{}, NullOpt)}; EXPECT_EQ(fileResult.takeErrorOr(FudStatus::Success), FudStatus::NotFound); @@ -83,8 +72,8 @@ TEST(FudFile, Basic) auto file{fileResult.takeOkay()}; ASSERT_EQ(file.close(), FudStatus::Success); - ASSERT_EQ(rmFile(testName1), 0); - ASSERT_GE(createFile(testName2), 0); + ASSERT_EQ(rmFile(testName1.asView()), 0); + ASSERT_GE(createFile(testName2.asView()), 0); ASSERT_EQ(symlink(testName2.c_str(), testName1.c_str()), 0); fileResult = RegularFile::open(testName2.asView(), FileAccessMode::Read, OpenFlags{}, NullOpt); @@ -99,9 +88,9 @@ TEST(FudFile, Basic) TEST(FudBufferedFile, OpenReadWrite) { - constexpr const char* testDirCName = "/tmp/fud_directory_test"; - const auto testDirName{String::makeFromCString(testDirCName).takeOkay()}; + StringView testDirName{StringView::makeFromCString("/tmp/fud_directory_test")}; ASSERT_TRUE(testDirName.utf8Valid()); + ASSERT_TRUE(testDirName.nullTerminated()); constexpr mode_t pathMode = 0777; ASSERT_EQ(removeRecursive(testDirName), FudStatus::Success); @@ -113,7 +102,7 @@ TEST(FudBufferedFile, OpenReadWrite) return; } - String testName{String::makeFromCStrings(testDirCName, "/", "test1").takeOkay()}; + String testName{String::makeFromCStrings(testDirName.c_str(), "/", "test1").takeOkay()}; auto fileResult{RegularFile::create( testName.asView(), FileAccessMode::ReadWrite, @@ -171,7 +160,7 @@ TEST(FudBufferedFile, OpenReadWrite) EXPECT_EQ(output[testName.size() - 1], testName.data()[testName.size() - 1]); EXPECT_EQ(bufferedFile.close(true), FudStatus::Success); - ASSERT_EQ(rmFile(testName), 0); + ASSERT_EQ(rmFile(testName.asView()), 0); } } // namespace fud diff --git a/test/test_format.cpp b/test/test_format.cpp index b9d373c..738b551 100644 --- a/test/test_format.cpp +++ b/test/test_format.cpp @@ -709,4 +709,13 @@ TEST(FormatTest, TwoArgFormatTest) EXPECT_STREQ(sink.c_str(), expected.c_str()); } +TEST(FormatTest, StringView) +{ + String sink{}; + auto expected = std::format("Test {}", std::string_view{"Hello, World!"}); + auto formatResult = format(sink, FormatCharMode::Unchecked, u8"Test {}", StringView{u8"Hello, World!"}); + EXPECT_TRUE(formatResult.isOkay()); + EXPECT_STREQ(sink.c_str(), expected.c_str()); +} + } // namespace fud diff --git a/test/test_string.cpp b/test/test_string.cpp index 6bcbd37..ba2df6c 100644 --- a/test/test_string.cpp +++ b/test/test_string.cpp @@ -56,11 +56,25 @@ TEST(FudString, BasicStringOps) StringView view1{}; ASSERT_FALSE(view1.utf8Valid()); + StringView view2{fudString}; ASSERT_TRUE(view2.utf8Valid()); ASSERT_TRUE(view2.nullTerminated()); } +TEST(FudString, ViewFromCString) +{ + StringView viewFromU8{u8"Test"}; + EXPECT_EQ(viewFromU8.length(), 4); + EXPECT_TRUE(viewFromU8.utf8Valid()); + EXPECT_TRUE(viewFromU8.nullTerminated()); + + StringView viewFromCString{StringView::makeFromCString("Test")}; + EXPECT_EQ(viewFromCString.length(), 4); + EXPECT_TRUE(viewFromCString.utf8Valid()); + EXPECT_TRUE(viewFromCString.nullTerminated()); +} + TEST(FudString, HeapAlloc) { constexpr const char filenameLiteral[] = "Amazing Saga Volume 01/000.jpg"; diff --git a/tools/coverage.sh b/tools/coverage.sh index b41c66e..870e1f9 100755 --- a/tools/coverage.sh +++ b/tools/coverage.sh @@ -4,8 +4,9 @@ set -e PROJ_ROOT=$(git rev-parse --show-toplevel) cd $PROJ_ROOT -HTML_DIR=build/coverage/html +HTML_DIR=coverage/html ctest --test-dir build/test -j8 +cd build mkdir -p ${HTML_DIR} -gcovr --exclude-throw-branches --exclude build/_deps/ --exclude test -r . --html-details ${HTML_DIR}/gcovr_report.html +gcovr --exclude-throw-branches --exclude _deps/ --exclude ../test -r . --html-details ${HTML_DIR}/gcovr_report.html -- cgit v1.2.3