summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2025-01-03 00:08:58 -0600
committerDominick Allen <djallen@librehumanitas.org>2025-01-03 00:08:58 -0600
commitd93307d810b3f4ee8044f7308e360d9ea9c7cf22 (patch)
tree08cfe9ae27c2a9d5bd11881089b9a3333a3c11b8
parent1ac94c8aff47b549f30b370be2191bcc0157826c (diff)
More fixups to csv logic.
-rw-r--r--include/fud_file.hpp4
-rw-r--r--source/fud_csv.cpp20
-rw-r--r--source/fud_file.cpp22
-rw-r--r--test/CMakeLists.txt2
-rw-r--r--test/test_csv.cpp73
-rwxr-xr-xtools/coverage.sh6
6 files changed, 116 insertions, 11 deletions
diff --git a/include/fud_file.hpp b/include/fud_file.hpp
index 66719e4..bf1fe37 100644
--- a/include/fud_file.hpp
+++ b/include/fud_file.hpp
@@ -158,6 +158,10 @@ class RegularFile {
FudStatus seek(size_t position);
+ [[nodiscard]] constexpr size_t position() const {
+ return m_position;
+ }
+
/** \brief Write from source to file as sink. */
DrainResult write(const std::byte* source, size_t length, size_t maxExtraAttempts = 0);
diff --git a/source/fud_csv.cpp b/source/fud_csv.cpp
index bcef925..260a4ff 100644
--- a/source/fud_csv.cpp
+++ b/source/fud_csv.cpp
@@ -144,7 +144,10 @@ FudStatus Csv::parseFromUnbufferedFile(Csv& csv, RegularFile&& file, size_t maxE
static_cast<void>(csv);
constexpr size_t BufferSize = 256;
SimpleStackAllocator<BufferSize> stackAllocator{};
- auto bufferedFile{BufferedRegularFile::make(std::move(file), TextBuffer{stackAllocator})};
+ TextBuffer textBuffer{stackAllocator};
+ auto resizeResult = textBuffer.resize(BufferSize);
+ fudAssert(resizeResult == FudStatus::Success);
+ auto bufferedFile{BufferedRegularFile::make(std::move(file), std::move(textBuffer))};
return parseFromBufferedFile(csv, bufferedFile, maxExtraAttempts);
}
@@ -222,6 +225,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
bool sawQuote{false};
bool addToSize{};
size_t numColumns{0};
+ size_t lineLength{0};
while (not endOfLine) {
addToSize = false;
@@ -239,7 +243,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
}
if (maybeNewline and utf8Char != Utf8{Ascii{lineEnding[1]}}) {
- rawSize += 2;
+ lineLength += 2;
maybeNewline = false;
} else if (inQuote and utf8Char == csv.quoteCharacter) {
inQuote = false;
@@ -270,7 +274,7 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
}
if (addToSize and numColumns < csv.numColumns) {
- rawSize += utf8Char.size();
+ lineLength += utf8Char.size();
}
if (numColumns > csv.numColumns and csv.strictColumns) {
@@ -279,6 +283,11 @@ DrainResult scanLine(const Csv& csv, File& file, size_t maxExtraAttempts, size_t
}
}
+ rawSize += lineLength;
+ if (numColumns > 0 && lineLength == 0) {
+ numColumns = 0;
+ }
+
if (numColumns == 0) {
readResult.status = FudStatus::Empty;
return readResult;
@@ -297,6 +306,11 @@ FudStatus fillBuffer(Csv& csv, File& file, size_t maxExtraAttempts, size_t rawSi
{
static_cast<void>(rawSize);
+ auto flushResult = file.flush();
+ if (flushResult.status != FudStatus::Success) {
+ return flushResult.status;
+ }
+
auto seekStatus = file.seekStart();
if (seekStatus != FudStatus::Success) {
return seekStatus;
diff --git a/source/fud_file.cpp b/source/fud_file.cpp
index ca6404d..7219638 100644
--- a/source/fud_file.cpp
+++ b/source/fud_file.cpp
@@ -28,6 +28,7 @@
namespace fud {
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags flags, Option<int> dirFdOption)
{
if (!filename.nullTerminated()) {
@@ -64,6 +65,8 @@ FileResult RegularFile::open(StringView filename, FileAccessMode mode, OpenFlags
openHow.flags = openFlags;
openHow.resolve = RESOLVE_NO_SYMLINKS;
+
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow));
if (status == -1) {
if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) {
@@ -119,6 +122,7 @@ FileResult RegularFile::create(
OpenFlags flags,
Permissions permissions,
bool createOnly,
+ // NOLINTNEXTLINE(performance-unnecessary-value-param)
Option<int> dirFdOption)
{
if (!filename.nullTerminated()) {
@@ -156,6 +160,7 @@ FileResult RegularFile::create(
openHow.resolve = RESOLVE_NO_SYMLINKS;
openHow.mode = permissions.mode();
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
auto status = syscall(SYS_openat2, dirFd, filename.data(), &openHow, sizeof(openHow));
if (status == -1) {
if constexpr (EAGAIN != EWOULDBLOCK && status == EWOULDBLOCK) {
@@ -580,6 +585,7 @@ DrainResult BufferedRegularFile::validateBufferedIO(const std::byte* pointer, Op
return result;
}
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, Option<size_t> maxExtraAttempts)
{
DrainResult result{validateBufferedIO(source, Operation::Write)};
@@ -593,10 +599,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O
if (m_bufferLength < m_buffer.size()) {
auto cap = m_buffer.size() - m_bufferLength;
- size_t count = length;
- if (count > cap) {
- count = cap;
- }
+ size_t count = fud::min(length, cap);
auto copyStatus = copyMem(m_buffer.data() + m_bufferLength, cap, source, count);
fudAssert(copyStatus == FudStatus::Success);
@@ -641,6 +644,7 @@ DrainResult BufferedRegularFile::write(const std::byte* source, size_t length, O
return result;
}
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<size_t> maxExtraAttempts)
{
auto extraAttempts = maxExtraAttempts.valueOr(0);
@@ -693,6 +697,10 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
auto drainResult = m_file.read(m_buffer.data(), m_buffer.size(), extraAttempts);
result.status = drainResult.status;
if (drainResult.status == FudStatus::Success || drainResult.status == FudStatus::Partial) {
+ if (drainResult.status == FudStatus::Partial && drainResult.bytesDrained >= length) {
+ result.status = FudStatus::Success;
+ }
+
m_bufferLength = drainResult.bytesDrained;
auto count = min(length, m_bufferLength);
@@ -704,7 +712,7 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
length -= count;
if (drainResult.status == FudStatus::Partial && length == 0) {
- drainResult.status = FudStatus::Success;
+ result.status = FudStatus::Success;
}
m_bufferPosition = count;
@@ -715,10 +723,12 @@ DrainResult BufferedRegularFile::read(std::byte* sink, size_t length, Option<siz
}
/** \brief Attempt to read one UTF8 sequence. */
+// NOLINTNEXTLINE(performance-unnecessary-value-param)
DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAttempts)
{
size_t extraAttempts{maxExtraAttempts.valueOr(0)};
Array<utf8, 4> utf8Data{};
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
auto drainResult = read(reinterpret_cast<std::byte*>(utf8Data.data()), 1, maxExtraAttempts);
if (drainResult.status != FudStatus::Success) {
return drainResult;
@@ -746,6 +756,7 @@ DrainResult BufferedRegularFile::readUtf8(Utf8& sink, Option<size_t> maxExtraAtt
}
if (bytesToRead > 0) {
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
auto utf8ReadResult = read(reinterpret_cast<std::byte*>(utf8Data.data() + 1), bytesToRead, extraAttempts);
drainResult.status = utf8ReadResult.status;
drainResult.bytesDrained += utf8ReadResult.bytesDrained;
@@ -773,6 +784,7 @@ void BufferedRegularFile::drainReadBuffer(std::byte*& sink, size_t& length, Drai
}
}
+// NOLINTNEXTLINE(readability-convert-member-functions-to-static,cppcoreguidelines-rvalue-*)
FudStatus BufferedRegularFile::setBuffer(Vector<std::byte>&& buffer, bool discardOldBuffer)
{
static_cast<void>(buffer);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0a1a1e7..cdc8c6e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -82,3 +82,5 @@ fud_add_test(test_string_convert SOURCES test_string_convert.cpp)
# fud_add_test(test_ext_array SOURCES
# test_ext_array.cpp
# test_ext_unique_array.cpp)
+
+configure_file(nuclides.csv ${CMAKE_CURRENT_BINARY_DIR} COPYONLY)
diff --git a/test/test_csv.cpp b/test/test_csv.cpp
index cb93a32..65c02ef 100644
--- a/test/test_csv.cpp
+++ b/test/test_csv.cpp
@@ -138,6 +138,79 @@ TEST(FudCsv, ParseCsvFromFilename)
}
}
+TEST(FudCsv, ParseNuclides)
+{
+ Vector<StringView> expectedHeaders{};
+ auto pushExpected = [&](StringView item) {
+ auto pushStatus = expectedHeaders.pushBack(StringView{u8"z"});
+ fudAssert(pushStatus == FudStatus::Success);
+ };
+ pushExpected(StringView{u8"z"});
+ pushExpected(StringView{u8"n"});
+ pushExpected(StringView{u8"symbol"});
+ pushExpected(StringView{u8"radius"});
+ pushExpected(StringView{u8"unc_r"});
+ pushExpected(StringView{u8"abundance"});
+ pushExpected(StringView{u8"unc_a"});
+ pushExpected(StringView{u8"energy_shift"});
+ pushExpected(StringView{u8"energy"});
+ pushExpected(StringView{u8"unc_e"});
+ pushExpected(StringView{u8"ripl_shift"});
+ pushExpected(StringView{u8"jp"});
+ pushExpected(StringView{u8"half_life"});
+ pushExpected(StringView{u8"operator_hl"});
+ pushExpected(StringView{u8"unc_hl"});
+ pushExpected(StringView{u8"unit_hl"});
+ pushExpected(StringView{u8"half_life_sec"});
+ pushExpected(StringView{u8"unc_hls"});
+ pushExpected(StringView{u8"decay_1"});
+ pushExpected(StringView{u8"decay_1_%"});
+ pushExpected(StringView{u8"unc_1"});
+ pushExpected(StringView{u8"decay_2"});
+ pushExpected(StringView{u8"decay_2_%"});
+ pushExpected(StringView{u8"unc_2"});
+ pushExpected(StringView{u8"decay_3"});
+ pushExpected(StringView{u8"decay_3_%"});
+ pushExpected(StringView{u8"unc_3"});
+ pushExpected(StringView{u8"isospin"});
+ pushExpected(StringView{u8"magnetic_dipole"});
+ pushExpected(StringView{u8"unc_md"});
+ pushExpected(StringView{u8"electric_quadrupole"});
+ pushExpected(StringView{u8"unc_eq"});
+ pushExpected(StringView{u8"qbm"});
+ pushExpected(StringView{u8"unc_qb"});
+ pushExpected(StringView{u8"qbm_n"});
+ pushExpected(StringView{u8"unc_qbmn"});
+ pushExpected(StringView{u8"qa"});
+ pushExpected(StringView{u8"unc_qa"});
+ pushExpected(StringView{u8"qec"});
+ pushExpected(StringView{u8"unc_qec"});
+ pushExpected(StringView{u8"sn"});
+ pushExpected(StringView{u8"unc_sn"});
+ pushExpected(StringView{u8"sp"});
+ pushExpected(StringView{u8"unc_sp"});
+ pushExpected(StringView{u8"binding"});
+ pushExpected(StringView{u8"unc_ba"});
+ pushExpected(StringView{u8"atomic_mass"});
+ pushExpected(StringView{u8"unc_am"});
+ pushExpected(StringView{u8"massexcess"});
+ pushExpected(StringView{u8"unc_me"});
+ pushExpected(StringView{u8"me_systematics"});
+ pushExpected(StringView{u8"discovery"});
+ pushExpected(StringView{u8"ENSDFpublicationcut-off"});
+ pushExpected(StringView{u8"ENSDFauthors"});
+ pushExpected(StringView{u8"Extraction_date"});
+
+ StringView nuclidesFilename{u8"test/nuclides.csv"};
+ Csv csv{Csv::makeDefault()};
+ auto parseStatus = Csv::parseFromFilenameUnbuffered(csv, nuclidesFilename);
+ if (parseStatus != FudStatus::Success) {
+ debugPrint(u8"Error parsing file: {}\n", FudStatusToString(parseStatus));
+ }
+ ASSERT_EQ(parseStatus, FudStatus::Success);
+
+}
+
} // namespace fud
//NOLINTEND(readability-magic-numbers)
diff --git a/tools/coverage.sh b/tools/coverage.sh
index 870e1f9..0790d68 100755
--- a/tools/coverage.sh
+++ b/tools/coverage.sh
@@ -4,9 +4,9 @@ set -e
PROJ_ROOT=$(git rev-parse --show-toplevel)
cd $PROJ_ROOT
-HTML_DIR=coverage/html
+HTML_DIR=build/coverage/html
ctest --test-dir build/test -j8
-cd build
+# cd build
mkdir -p ${HTML_DIR}
-gcovr --exclude-throw-branches --exclude _deps/ --exclude ../test -r . --html-details ${HTML_DIR}/gcovr_report.html
+gcovr --exclude-throw-branches --exclude build/_deps/ --exclude test -r . --html-details ${HTML_DIR}/gcovr_report.html