diff options
author | Dominick Allen <djallen@librehumanitas.org> | 2025-01-03 00:08:58 -0600 |
---|---|---|
committer | Dominick Allen <djallen@librehumanitas.org> | 2025-01-03 00:08:58 -0600 |
commit | d93307d810b3f4ee8044f7308e360d9ea9c7cf22 (patch) | |
tree | 08cfe9ae27c2a9d5bd11881089b9a3333a3c11b8 | |
parent | 1ac94c8aff47b549f30b370be2191bcc0157826c (diff) |
More fixups to csv logic.
-rw-r--r-- | include/fud_file.hpp | 4 | ||||
-rw-r--r-- | source/fud_csv.cpp | 20 | ||||
-rw-r--r-- | source/fud_file.cpp | 22 | ||||
-rw-r--r-- | test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | test/test_csv.cpp | 73 | ||||
-rwxr-xr-x | tools/coverage.sh | 6 |
6 files changed, 116 insertions, 11 deletions
diff --git a/include/fud_file.hpp b/include/fud_file.hpp index 66719e4..bf1fe37 100644 --- a/include/fud_file.hpp +++ b/include/fud_file.hpp @@ -158,6 +158,10 @@ class RegularFile { FudStatus seek(size_t position); + [[nodiscard]] constexpr size_t position() const { + return m_position; + } + /** \brief Write from source to file as sink. */ DrainResult write(const std::byte* source, size_t length, size_t maxExtraAttempts = 0); diff --git a/source/fud_csv.cpp b/source/fud_csv.cpp index bcef925..260a4ff 100644 --- a/source/fud_csv.cpp +++ b/source/fud_csv.cpp @@ -144,7 +144,10 @@ FudStatus Csv::parseFromUnbufferedFile(Csv& csv, RegularFile&& file, size_t maxE static_cast<void>(csv); constexpr size_t BufferSize = 256; SimpleStackAllocator<BufferSize> stackAllocator{}; - auto bufferedFile{BufferedRegularFile::make(std::move(file), TextBuffer{stackAllocator})}; + TextBuffer textBuffer{stackAllocator}; + auto resizeResult = textBuffer.resize(BufferSize); + fudAssert(resizeResult == FudStatus::Success); + auto bufferedFile{BufferedRegularFile::make(std::move(file), std::move(textBuffer))}; return parseFromBufferedFile(csv, bufferedFile, maxExtraAttempts); } @@ -222,6 +225,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t bool sawQuote{false}; bool addToSize{}; size_t numColumns{0}; + size_t lineLength{0}; while (not endOfLine) { addToSize = false; @@ -239,7 +243,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t } if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) { - rawSize += 2; + lineLength += 2; maybeNewline = false; } else if (inQuote and utf8Char == csv.quoteCharacter) { inQuote = false; @@ -270,7 +274,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t } if (addToSize and numColumns < csv.numColumns) { - rawSize += utf8Char.size(); + lineLength += utf8Char.size(); } if (numColumns > csv.numColumns and csv.strictColumns) { @@ -279,6 +283,11 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t } } + rawSize += lineLength; + if (numColumns > 0 && lineLength == 0) { + numColumns = 0; + } + if (numColumns == 0) { readResult.status = FudStatus::Empty; return readResult; @@ -297,6 +306,11 @@ FudStatus fillBuffer(Csv& csv, File& file, size_t maxExtraAttempts, size_t rawSi { static_cast<void>(rawSize); + auto flushResult = file.flush(); + if (flushResult.status != FudStatus::Success) { + return flushResult.status; + } + auto seekStatus = file.seekStart(); if (seekStatus != FudStatus::Success) { return seekStatus; diff --git a/source/fud_file.cpp b/source/fud_file.cpp index ca6404d..7219638 100644 --- a/source/fud_file.cpp +++ b/source/fud_file.cpp @@ -28,6 +28,7 @@ namespace fud { +// NOLINTNEXTLINE(performance-unnecessary-value-param) FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags flags, Option<int> dirFdOption) { if (!filename.nullTerminated()) { @@ -64,6 +65,8 @@ FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags openHow.flags = openFlags; openHow.resolve = RESOLVE_NO_SYMLINKS; + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow)); if (status == -1) { if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) { @@ -119,6 +122,7 @@ FileResult RegularFile::create( OpenFlags flags, Permissions permissions, bool createOnly, + // NOLINTNEXTLINE(performance-unnecessary-value-param) Option<int> dirFdOption) { if (!filename.nullTerminated()) { @@ -156,6 +160,7 @@ FileResult RegularFile::create( openHow.resolve = RESOLVE_NO_SYMLINKS; openHow.mode = permissions.mode(); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow)); if (status == -1) { if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) { @@ -580,6 +585,7 @@ DrainResult BufferedRegularFile::validateBufferedIO(const std::byte* pointer, Op return result; } +// NOLINTNEXTLINE(performance-unnecessary-value-param) DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, Option<size_t> maxExtraAttempts) { DrainResult result{validateBufferedIO(source, Operation::Write)}; @@ -593,10 +599,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O if (m_bufferLength < m_buffer.size()) { auto cap = m_buffer.size() - m_bufferLength; - size_t count = length; - if (count > cap) { - count = cap; - } + size_t count = fud::min(length, cap); auto copyStatus = copyMem(m_buffer.data() + m_bufferLength, cap, source, count); fudAssert(copyStatus == FudStatus::Success); @@ -641,6 +644,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O return result; } +// NOLINTNEXTLINE(performance-unnecessary-value-param) DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<size_t> maxExtraAttempts) { auto extraAttempts = maxExtraAttempts.valueOr(0); @@ -693,6 +697,10 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz auto drainResult = m_file.read(m_buffer.data(), m_buffer.size(), extraAttempts); result.status = drainResult.status; if (drainResult.status == FudStatus::Success || drainResult.status == FudStatus::Partial) { + if (drainResult.status == FudStatus::Partial && drainResult.bytesDrained >= length) { + result.status = FudStatus::Success; + } + m_bufferLength = drainResult.bytesDrained; auto count = min(length, m_bufferLength); @@ -704,7 +712,7 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz length -= count; if (drainResult.status == FudStatus::Partial && length == 0) { - drainResult.status = FudStatus::Success; + result.status = FudStatus::Success; } m_bufferPosition = count; @@ -715,10 +723,12 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz } /** \brief Attempt to read one UTF8 sequence. */ +// NOLINTNEXTLINE(performance-unnecessary-value-param) DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAttempts) { size_t extraAttempts{maxExtraAttempts.valueOr(0)}; Array<utf8, 4> utf8Data{}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto drainResult = read(reinterpret_cast<std::byte*>(utf8Data.data()), 1, maxExtraAttempts); if (drainResult.status != FudStatus::Success) { return drainResult; @@ -746,6 +756,7 @@ DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAtt } if (bytesToRead > 0) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto utf8ReadResult = read(reinterpret_cast<std::byte*>(utf8Data.data() + 1), bytesToRead, extraAttempts); drainResult.status = utf8ReadResult.status; drainResult.bytesDrained += utf8ReadResult.bytesDrained; @@ -773,6 +784,7 @@ void BufferedRegularFile::drainReadBuffer(std::byte*& sink, size_t& length, Drai } } +// NOLINTNEXTLINE(readability-convert-member-functions-to-static,cppcoreguidelines-rvalue-*) FudStatus BufferedRegularFile::setBuffer(Vector<std::byte>&& buffer, bool discardOldBuffer) { static_cast<void>(buffer); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0a1a1e7..cdc8c6e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -82,3 +82,5 @@ fud_add_test(test_string_convert SOURCES test_string_convert.cpp) # fud_add_test(test_ext_array SOURCES # test_ext_array.cpp # test_ext_unique_array.cpp) + +configure_file(nuclides.csv ${CMAKE_CURRENT_BINARY_DIR} COPYONLY) diff --git a/test/test_csv.cpp b/test/test_csv.cpp index cb93a32..65c02ef 100644 --- a/test/test_csv.cpp +++ b/test/test_csv.cpp @@ -138,6 +138,79 @@ TEST(FudCsv, ParseCsvFromFilename) } } +TEST(FudCsv, ParseNuclides) +{ + Vector<StringView> expectedHeaders{}; + auto pushExpected = [&](StringView item) { + auto pushStatus = expectedHeaders.pushBack(StringView{u8"z"}); + fudAssert(pushStatus == FudStatus::Success); + }; + pushExpected(StringView{u8"z"}); + pushExpected(StringView{u8"n"}); + pushExpected(StringView{u8"symbol"}); + pushExpected(StringView{u8"radius"}); + pushExpected(StringView{u8"unc_r"}); + pushExpected(StringView{u8"abundance"}); + pushExpected(StringView{u8"unc_a"}); + pushExpected(StringView{u8"energy_shift"}); + pushExpected(StringView{u8"energy"}); + pushExpected(StringView{u8"unc_e"}); + pushExpected(StringView{u8"ripl_shift"}); + pushExpected(StringView{u8"jp"}); + pushExpected(StringView{u8"half_life"}); + pushExpected(StringView{u8"operator_hl"}); + pushExpected(StringView{u8"unc_hl"}); + pushExpected(StringView{u8"unit_hl"}); + pushExpected(StringView{u8"half_life_sec"}); + pushExpected(StringView{u8"unc_hls"}); + pushExpected(StringView{u8"decay_1"}); + pushExpected(StringView{u8"decay_1_%"}); + pushExpected(StringView{u8"unc_1"}); + pushExpected(StringView{u8"decay_2"}); + pushExpected(StringView{u8"decay_2_%"}); + pushExpected(StringView{u8"unc_2"}); + pushExpected(StringView{u8"decay_3"}); + pushExpected(StringView{u8"decay_3_%"}); + pushExpected(StringView{u8"unc_3"}); + pushExpected(StringView{u8"isospin"}); + pushExpected(StringView{u8"magnetic_dipole"}); + pushExpected(StringView{u8"unc_md"}); + pushExpected(StringView{u8"electric_quadrupole"}); + pushExpected(StringView{u8"unc_eq"}); + pushExpected(StringView{u8"qbm"}); + pushExpected(StringView{u8"unc_qb"}); + pushExpected(StringView{u8"qbm_n"}); + pushExpected(StringView{u8"unc_qbmn"}); + pushExpected(StringView{u8"qa"}); + pushExpected(StringView{u8"unc_qa"}); + pushExpected(StringView{u8"qec"}); + pushExpected(StringView{u8"unc_qec"}); + pushExpected(StringView{u8"sn"}); + pushExpected(StringView{u8"unc_sn"}); + pushExpected(StringView{u8"sp"}); + pushExpected(StringView{u8"unc_sp"}); + pushExpected(StringView{u8"binding"}); + pushExpected(StringView{u8"unc_ba"}); + pushExpected(StringView{u8"atomic_mass"}); + pushExpected(StringView{u8"unc_am"}); + pushExpected(StringView{u8"massexcess"}); + pushExpected(StringView{u8"unc_me"}); + pushExpected(StringView{u8"me_systematics"}); + pushExpected(StringView{u8"discovery"}); + pushExpected(StringView{u8"ENSDFpublicationcut-off"}); + pushExpected(StringView{u8"ENSDFauthors"}); + pushExpected(StringView{u8"Extraction_date"}); + + StringView nuclidesFilename{u8"test/nuclides.csv"}; + Csv csv{Csv::makeDefault()}; + auto parseStatus = Csv::parseFromFilenameUnbuffered(csv, nuclidesFilename); + if (parseStatus != FudStatus::Success) { + debugPrint(u8"Error parsing file: {}\n", FudStatusToString(parseStatus)); + } + ASSERT_EQ(parseStatus, FudStatus::Success); + +} + } // namespace fud //NOLINTEND(readability-magic-numbers) diff --git a/tools/coverage.sh b/tools/coverage.sh index 870e1f9..0790d68 100755 --- a/tools/coverage.sh +++ b/tools/coverage.sh @@ -4,9 +4,9 @@ set -e PROJ_ROOT=$(git rev-parse --show-toplevel) cd $PROJ_ROOT -HTML_DIR=coverage/html +HTML_DIR=build/coverage/html ctest --test-dir build/test -j8 -cd build +# cd build mkdir -p ${HTML_DIR} -gcovr --exclude-throw-branches --exclude _deps/ --exclude ../test -r . --html-details ${HTML_DIR}/gcovr_report.html +gcovr --exclude-throw-branches --exclude build/_deps/ --exclude test -r . --html-details ${HTML_DIR}/gcovr_report.html |