More fixups to csv logic.

author: Dominick Allen <djallen@librehumanitas.org> 2025-01-03 00:08:58 -0600
committer: Dominick Allen <djallen@librehumanitas.org> 2025-01-03 00:08:58 -0600
commit: d93307d810b3f4ee8044f7308e360d9ea9c7cf22 (patch)
tree: 08cfe9ae27c2a9d5bd11881089b9a3333a3c11b8
parent: 1ac94c8aff47b549f30b370be2191bcc0157826c (diff)
6 files changed, 116 insertions, 11 deletions
diff --git a/include/fud_file.hpp b/include/fud_file.hpp
index 66719e4..bf1fe37 100644
--- a/include/fud_file.hpp
+++ b/include/fud_file.hpp
@@ -158,6 +158,10 @@ class RegularFile {
 
     FudStatus seek(size_t position);
 
+    [[nodiscard]] constexpr size_t position() const {
+        return m_position;
+    }
+
     /** \brief Write from source to file as sink. */
     DrainResult write(const std::byte* source, size_t length, size_t maxExtraAttempts = 0);
 
diff --git a/source/fud_csv.cpp b/source/fud_csv.cpp
index bcef925..260a4ff 100644
--- a/source/fud_csv.cpp
+++ b/source/fud_csv.cpp
@@ -144,7 +144,10 @@ FudStatus Csv::parseFromUnbufferedFile(Csv& csv, RegularFile&& file, size_t maxE
     static_cast<void>(csv);
     constexpr size_t BufferSize = 256;
     SimpleStackAllocator<BufferSize> stackAllocator{};
-    auto bufferedFile{BufferedRegularFile::make(std::move(file), TextBuffer{stackAllocator})};
+    TextBuffer textBuffer{stackAllocator};
+    auto resizeResult = textBuffer.resize(BufferSize);
+    fudAssert(resizeResult == FudStatus::Success);
+    auto bufferedFile{BufferedRegularFile::make(std::move(file), std::move(textBuffer))};
     return parseFromBufferedFile(csv, bufferedFile, maxExtraAttempts);
 }
 
@@ -222,6 +225,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
     bool sawQuote{false};
     bool addToSize{};
     size_t numColumns{0};
+    size_t lineLength{0};
 
     while (not endOfLine) {
         addToSize = false;
@@ -239,7 +243,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
         }
 
         if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) {
-            rawSize += 2;
+            lineLength += 2;
             maybeNewline = false;
         } else if (inQuote and utf8Char == csv.quoteCharacter) {
             inQuote = false;
@@ -270,7 +274,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
         }
 
         if (addToSize and numColumns < csv.numColumns) {
-            rawSize += utf8Char.size();
+            lineLength += utf8Char.size();
         }
 
         if (numColumns > csv.numColumns and csv.strictColumns) {
@@ -279,6 +283,11 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
         }
     }
 
+    rawSize += lineLength;
+    if (numColumns > 0 && lineLength == 0) {
+        numColumns = 0;
+    }
+
     if (numColumns == 0) {
         readResult.status = FudStatus::Empty;
         return readResult;
@@ -297,6 +306,11 @@ FudStatus fillBuffer(Csv& csv, File& file, size_t maxExtraAttempts, size_t rawSi
 {
     static_cast<void>(rawSize);
 
+    auto flushResult = file.flush();
+    if (flushResult.status != FudStatus::Success) {
+        return flushResult.status;
+    }
+
     auto seekStatus = file.seekStart();
     if (seekStatus != FudStatus::Success) {
         return seekStatus;
diff --git a/source/fud_file.cpp b/source/fud_file.cpp
index ca6404d..7219638 100644
--- a/source/fud_file.cpp
+++ b/source/fud_file.cpp
@@ -28,6 +28,7 @@
 
 namespace fud {
 
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
 FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags flags, Option<int> dirFdOption)
 {
     if (!filename.nullTerminated()) {
@@ -64,6 +65,8 @@ FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags
     openHow.flags = openFlags;
     openHow.resolve = RESOLVE_NO_SYMLINKS;
 
+    
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
     auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow));
     if (status == -1) {
         if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) {
@@ -119,6 +122,7 @@ FileResult RegularFile::create(
     OpenFlags flags,
     Permissions permissions,
     bool createOnly,
+    // NOLINTNEXTLINE(performance-unnecessary-value-param)
     Option<int> dirFdOption)
 {
     if (!filename.nullTerminated()) {
@@ -156,6 +160,7 @@ FileResult RegularFile::create(
     openHow.resolve = RESOLVE_NO_SYMLINKS;
     openHow.mode = permissions.mode();
 
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
     auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow));
     if (status == -1) {
         if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) {
@@ -580,6 +585,7 @@ DrainResult BufferedRegularFile::validateBufferedIO(const std::byte* pointer, Op
     return result;
 }
 
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
 DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, Option<size_t> maxExtraAttempts)
 {
     DrainResult result{validateBufferedIO(source, Operation::Write)};
@@ -593,10 +599,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O
 
     if (m_bufferLength < m_buffer.size()) {
         auto cap = m_buffer.size() - m_bufferLength;
-        size_t count = length;
-        if (count > cap) {
-            count = cap;
-        }
+        size_t count = fud::min(length, cap);
         auto copyStatus = copyMem(m_buffer.data() + m_bufferLength, cap, source, count);
         fudAssert(copyStatus == FudStatus::Success);
 
@@ -641,6 +644,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O
     return result;
 }
 
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
 DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<size_t> maxExtraAttempts)
 {
     auto extraAttempts = maxExtraAttempts.valueOr(0);
@@ -693,6 +697,10 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
     auto drainResult = m_file.read(m_buffer.data(), m_buffer.size(), extraAttempts);
     result.status = drainResult.status;
     if (drainResult.status == FudStatus::Success || drainResult.status == FudStatus::Partial) {
+        if (drainResult.status == FudStatus::Partial && drainResult.bytesDrained >= length) {
+            result.status = FudStatus::Success;
+        }
+
         m_bufferLength = drainResult.bytesDrained;
 
         auto count = min(length, m_bufferLength);
@@ -704,7 +712,7 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
         length -= count;
 
         if (drainResult.status == FudStatus::Partial && length == 0) {
-            drainResult.status = FudStatus::Success;
+            result.status = FudStatus::Success;
         }
 
         m_bufferPosition = count;
@@ -715,10 +723,12 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
 }
 
 /** \brief Attempt to read one UTF8 sequence. */
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
 DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAttempts)
 {
     size_t extraAttempts{maxExtraAttempts.valueOr(0)};
     Array<utf8, 4> utf8Data{};
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
     auto drainResult = read(reinterpret_cast<std::byte*>(utf8Data.data()), 1, maxExtraAttempts);
     if (drainResult.status != FudStatus::Success) {
         return drainResult;
@@ -746,6 +756,7 @@ DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAtt
     }
 
     if (bytesToRead > 0) {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
         auto utf8ReadResult = read(reinterpret_cast<std::byte*>(utf8Data.data() + 1), bytesToRead, extraAttempts);
         drainResult.status = utf8ReadResult.status;
         drainResult.bytesDrained += utf8ReadResult.bytesDrained;
@@ -773,6 +784,7 @@ void BufferedRegularFile::drainReadBuffer(std::byte*& sink, size_t& length, Drai
     }
 }
 
+// NOLINTNEXTLINE(readability-convert-member-functions-to-static,cppcoreguidelines-rvalue-*)
 FudStatus BufferedRegularFile::setBuffer(Vector<std::byte>&& buffer, bool discardOldBuffer)
 {
     static_cast<void>(buffer);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0a1a1e7..cdc8c6e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -82,3 +82,5 @@ fud_add_test(test_string_convert SOURCES test_string_convert.cpp)
 # fud_add_test(test_ext_array SOURCES
 #     test_ext_array.cpp
 #     test_ext_unique_array.cpp)
+
+configure_file(nuclides.csv ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
diff --git a/test/test_csv.cpp b/test/test_csv.cpp
index cb93a32..65c02ef 100644
--- a/test/test_csv.cpp
+++ b/test/test_csv.cpp
@@ -138,6 +138,79 @@ TEST(FudCsv, ParseCsvFromFilename)
     }
 }
 
+TEST(FudCsv, ParseNuclides)
+{
+    Vector<StringView> expectedHeaders{};
+    auto pushExpected = [&](StringView item) {
+        auto pushStatus = expectedHeaders.pushBack(StringView{u8"z"});
+        fudAssert(pushStatus == FudStatus::Success);
+    };
+    pushExpected(StringView{u8"z"});
+    pushExpected(StringView{u8"n"});
+    pushExpected(StringView{u8"symbol"});
+    pushExpected(StringView{u8"radius"});
+    pushExpected(StringView{u8"unc_r"});
+    pushExpected(StringView{u8"abundance"});
+    pushExpected(StringView{u8"unc_a"});
+    pushExpected(StringView{u8"energy_shift"});
+    pushExpected(StringView{u8"energy"});
+    pushExpected(StringView{u8"unc_e"});
+    pushExpected(StringView{u8"ripl_shift"});
+    pushExpected(StringView{u8"jp"});
+    pushExpected(StringView{u8"half_life"});
+    pushExpected(StringView{u8"operator_hl"});
+    pushExpected(StringView{u8"unc_hl"});
+    pushExpected(StringView{u8"unit_hl"});
+    pushExpected(StringView{u8"half_life_sec"});
+    pushExpected(StringView{u8"unc_hls"});
+    pushExpected(StringView{u8"decay_1"});
+    pushExpected(StringView{u8"decay_1_%"});
+    pushExpected(StringView{u8"unc_1"});
+    pushExpected(StringView{u8"decay_2"});
+    pushExpected(StringView{u8"decay_2_%"});
+    pushExpected(StringView{u8"unc_2"});
+    pushExpected(StringView{u8"decay_3"});
+    pushExpected(StringView{u8"decay_3_%"});
+    pushExpected(StringView{u8"unc_3"});
+    pushExpected(StringView{u8"isospin"});
+    pushExpected(StringView{u8"magnetic_dipole"});
+    pushExpected(StringView{u8"unc_md"});
+    pushExpected(StringView{u8"electric_quadrupole"});
+    pushExpected(StringView{u8"unc_eq"});
+    pushExpected(StringView{u8"qbm"});
+    pushExpected(StringView{u8"unc_qb"});
+    pushExpected(StringView{u8"qbm_n"});
+    pushExpected(StringView{u8"unc_qbmn"});
+    pushExpected(StringView{u8"qa"});
+    pushExpected(StringView{u8"unc_qa"});
+    pushExpected(StringView{u8"qec"});
+    pushExpected(StringView{u8"unc_qec"});
+    pushExpected(StringView{u8"sn"});
+    pushExpected(StringView{u8"unc_sn"});
+    pushExpected(StringView{u8"sp"});
+    pushExpected(StringView{u8"unc_sp"});
+    pushExpected(StringView{u8"binding"});
+    pushExpected(StringView{u8"unc_ba"});
+    pushExpected(StringView{u8"atomic_mass"});
+    pushExpected(StringView{u8"unc_am"});
+    pushExpected(StringView{u8"massexcess"});
+    pushExpected(StringView{u8"unc_me"});
+    pushExpected(StringView{u8"me_systematics"});
+    pushExpected(StringView{u8"discovery"});
+    pushExpected(StringView{u8"ENSDFpublicationcut-off"});
+    pushExpected(StringView{u8"ENSDFauthors"});
+    pushExpected(StringView{u8"Extraction_date"});
+
+    StringView nuclidesFilename{u8"test/nuclides.csv"};
+    Csv csv{Csv::makeDefault()};
+    auto parseStatus = Csv::parseFromFilenameUnbuffered(csv, nuclidesFilename);
+    if (parseStatus != FudStatus::Success) {
+        debugPrint(u8"Error parsing file: {}\n", FudStatusToString(parseStatus));
+    }
+    ASSERT_EQ(parseStatus, FudStatus::Success);
+
+}
+
 } // namespace fud
 
 //NOLINTEND(readability-magic-numbers)
diff --git a/tools/coverage.sh b/tools/coverage.sh
index 870e1f9..0790d68 100755
--- a/tools/coverage.sh
+++ b/tools/coverage.sh
@@ -4,9 +4,9 @@ set -e
 PROJ_ROOT=$(git rev-parse --show-toplevel)
 cd $PROJ_ROOT
 
-HTML_DIR=coverage/html
+HTML_DIR=build/coverage/html
 
 ctest --test-dir build/test -j8
-cd build
+# cd build
 mkdir -p ${HTML_DIR}
-gcovr --exclude-throw-branches --exclude _deps/ --exclude ../test -r  . --html-details ${HTML_DIR}/gcovr_report.html
+gcovr --exclude-throw-branches --exclude build/_deps/ --exclude test -r  . --html-details ${HTML_DIR}/gcovr_report.html
author	Dominick Allen <djallen@librehumanitas.org>	2025-01-03 00:08:58 -0600
committer	Dominick Allen <djallen@librehumanitas.org>	2025-01-03 00:08:58 -0600
commit	d93307d810b3f4ee8044f7308e360d9ea9c7cf22 (patch)
tree	08cfe9ae27c2a9d5bd11881089b9a3333a3c11b8
parent	1ac94c8aff47b549f30b370be2191bcc0157826c (diff)