summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDominick Allen <djallen@librehumanitas.org>2025-01-02 15:11:51 -0600
committerDominick Allen <djallen@librehumanitas.org>2025-01-02 15:11:51 -0600
commit87071200872c2450c947047350132aee493033c1 (patch)
tree49109532d9bbd148b4e59043120037684093be33 /include
parent16379362c02a2472f00fac49cad62788547c9519 (diff)
Get basic CSV parser operating.
Diffstat (limited to 'include')
-rw-r--r--include/fud_csv.hpp34
-rw-r--r--include/fud_file.hpp10
-rw-r--r--include/fud_format.hpp21
-rw-r--r--include/fud_option.hpp3
-rw-r--r--include/fud_permissions.hpp6
-rw-r--r--include/fud_print.hpp11
-rw-r--r--include/fud_result.hpp2
-rw-r--r--include/fud_string_view.hpp9
-rw-r--r--include/fud_utf8.hpp26
-rw-r--r--include/fud_vector.hpp35
10 files changed, 129 insertions, 28 deletions
diff --git a/include/fud_csv.hpp b/include/fud_csv.hpp
index efd37e6..38b1b81 100644
--- a/include/fud_csv.hpp
+++ b/include/fud_csv.hpp
@@ -22,6 +22,7 @@
#include "fud_status.hpp"
#include "fud_string_view.hpp"
#include "fud_text.hpp"
+#include "fud_utf8.hpp"
#include "fud_vector.hpp"
#include <functional> // reference_wrapper
@@ -29,52 +30,59 @@
namespace fud {
using TextBuffer = Vector<std::byte>;
-using CsvBuffer = Vector<std::byte>;
-using CsvLine = Vector<StringView>;
struct Csv {
/** \brief The number of lines of data in the CSV. */
- size_t numLines;
+ size_t numLines{0};
/** \brief The number of columns in the CSV. */
- size_t numColumns;
+ size_t numColumns{0};
/** \brief Buffer for each line with numColumns of StringView. */
- Vector<CsvLine> lines;
+ Vector<StringView> entries;
/** \brief Backing buffer for data. */
- CsvBuffer buffer;
+ Vector<utf8> buffer;
/** \separator for each column */
Utf8 columnDelimiter{Ascii{','}};
+ Utf8 quoteCharacter{Ascii{'"'}};
+
/** \separator for each line */
NewlineRepr newlineDelimiter{NewlineRepr::Posix};
- bool strict;
+ bool strictUtf8{true};
+
+ bool strictColumns{true};
+
+ bool strictQuote{false};
+
+ bool skipInitialSpace{false};
/** \brief Uses global Fud allocator for lines and backing buffer. */
static Csv makeDefault();
/** \brief Specify allocator to use for both lines and backing buffer. */
- static Csv makeSingleAllocator(Allocator& allocator);
+ static Csv makeWithSingleAllocator(Allocator& allocator);
/** \brief Specify allocator. */
- static Csv make(Allocator& lineAllocator, Allocator& bufferAllocator);
+ static Csv make(Allocator& entryAllocator, Allocator& bufferAllocator);
/** Consume and return the CSV. */
- static FudStatus parseCsvFromFilename(
+ static FudStatus parseFromFilename(
Csv& csv,
Option<TextBuffer&&> bufferOption,
StringView filename,
OpenFlags flags = OpenFlags{},
- Option<int> dirFdOption = NullOpt);
+ Option<int> dirFdOption = NullOpt,
+ Option<size_t> maxExtraAttempts = NullOpt);
// assumes file is at start
- static FudStatus parseCsvFromUnbufferedFile(Csv& csv, RegularFile&& file);
+ static FudStatus parseFromUnbufferedFile(Csv& csv, RegularFile&& file, Option<size_t> maxExtraAttempts);
// assumes file is at start
- static FudStatus parseCsvFromBufferedFile(Csv& csv, BufferedRegularFile& file);
+ static FudStatus parseFromBufferedFile(Csv& csv, BufferedRegularFile& file, Option<size_t> maxExtraAttempts);
};
} // namespace fud
diff --git a/include/fud_file.hpp b/include/fud_file.hpp
index e7c485c..6f1acbf 100644
--- a/include/fud_file.hpp
+++ b/include/fud_file.hpp
@@ -38,8 +38,7 @@ enum class FileAccessMode : uint8_t
ReadWrite = Read | Write
};
-// enum class OpenFlagEnum : uint32_t
-enum class OpenFlagEnum : uint8_t
+enum class OpenFlagEnum : uint16_t
{
Append = 0x01,
Truncate = Append << 1,
@@ -207,6 +206,9 @@ class BufferedRegularFile {
/** \brief Read from file as source to sink. */
DrainResult read(std::byte* sink, size_t length, Option<size_t> maxExtraAttempts);
+ /** \brief Attempt to read one UTF8 sequence. */
+ DrainResult readUtf8(Utf8& sink, Option<size_t> maxExtraAttempts);
+
FudStatus setBuffer(Vector<std::byte>&& buffer, bool discardOldBuffer);
DrainResult flush(size_t maxExtraAttempts = 0);
@@ -261,6 +263,10 @@ class BufferedRegularFile {
};
Operation m_lastOperation{Operation::None};
+
+ DrainResult validateBufferedIO(const std::byte* pointer, Operation requestedOperation);
+
+ void drainReadBuffer(std::byte*& sink, size_t& length, DrainResult& result);
};
} // namespace fud
diff --git a/include/fud_format.hpp b/include/fud_format.hpp
index 2102dc9..9be3dd9 100644
--- a/include/fud_format.hpp
+++ b/include/fud_format.hpp
@@ -32,8 +32,10 @@
#include <concepts>
#include <cstdint>
+#include <format>
#include <limits>
#include <variant>
+#include <cstdio>
namespace fud {
@@ -716,21 +718,24 @@ FudStatus fillSignedBuffer(IntCharArray& buffer, T value, uint8_t& bufferLength,
{
static_assert(sizeof(T) <= sizeof(uint64_t));
static_assert(std::is_signed_v<T>);
+ uint64_t unsignedValue{};
if (value < 0) {
value++;
- value = -value;
- value++;
+ unsignedValue = static_cast<uint64_t>(-value);
+ unsignedValue++;
+ } else {
+ unsignedValue = static_cast<uint64_t>(value);
}
if constexpr (std::is_same_v<T, char>) {
- return fillUnsignedBuffer(buffer, static_cast<uint8_t>(value), bufferLength, radix, uppercase);
+ return fillUnsignedBuffer(buffer, static_cast<uint8_t>(unsignedValue), bufferLength, radix, uppercase);
} else if constexpr (std::is_same_v<T, int8_t>) {
- return fillUnsignedBuffer(buffer, static_cast<uint8_t>(value), bufferLength, radix, uppercase);
+ return fillUnsignedBuffer(buffer, static_cast<uint8_t>(unsignedValue), bufferLength, radix, uppercase);
} else if constexpr (std::is_same_v<T, int16_t>) {
- return fillUnsignedBuffer(buffer, static_cast<uint16_t>(value), bufferLength, radix, uppercase);
+ return fillUnsignedBuffer(buffer, static_cast<uint16_t>(unsignedValue), bufferLength, radix, uppercase);
} else if constexpr (std::is_same_v<T, int32_t>) {
- return fillUnsignedBuffer(buffer, static_cast<uint32_t>(value), bufferLength, radix, uppercase);
+ return fillUnsignedBuffer(buffer, static_cast<uint32_t>(unsignedValue), bufferLength, radix, uppercase);
} else if constexpr (std::is_same_v<T, int64_t>) {
- return fillUnsignedBuffer(buffer, static_cast<uint64_t>(value), bufferLength, radix, uppercase);
+ return fillUnsignedBuffer(buffer, static_cast<uint64_t>(unsignedValue), bufferLength, radix, uppercase);
}
}
@@ -1441,6 +1446,8 @@ FormatResult format(Sink& sink, FormatCharMode formatMode, const FormatSpec& for
return result;
}
+ // printf("From format(sink, mode, spec, stringview arg): Arg contents are %p %zu?\n\n", arg.c_str(), arg.length());
+ // printf("From format(sink, mode, spec, stringview arg): What?\n%s\n", std::format("{}", std::string_view{arg.c_str(), arg.length()}).c_str());
auto drainViewResult = sink.drain(arg);
result.bytesDrained += drainViewResult.bytesDrained;
result.status = drainViewResult.status;
diff --git a/include/fud_option.hpp b/include/fud_option.hpp
index 3b0eb1b..af2fcd3 100644
--- a/include/fud_option.hpp
+++ b/include/fud_option.hpp
@@ -229,7 +229,8 @@ class Option {
m_data.clear();
}
- alignas(alignof(T)) option_detail::DataArray<Size> m_data{};
+ static constexpr auto Align = std::max(alignof(T), alignof(std::reference_wrapper<T>));
+ alignas(Align) option_detail::DataArray<Size> m_data{};
bool m_engaged;
};
diff --git a/include/fud_permissions.hpp b/include/fud_permissions.hpp
index 66eec0c..d79fe79 100644
--- a/include/fud_permissions.hpp
+++ b/include/fud_permissions.hpp
@@ -186,6 +186,12 @@ constexpr PermissionField operator|(PermissionField lhs, PermissionField rhs)
constexpr PermissionField PermReadWrite = PermissionType::Read | PermissionType::Write;
+constexpr Permissions PermAllReadWrite{PermReadWrite, PermReadWrite, PermReadWrite};
+
+constexpr Permissions PermUserGroupReadWrite{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}};
+
+constexpr Permissions PermUserRwGroupRead{PermReadWrite, PermReadWrite, PermissionField{PermissionType::None}};
+
} // namespace fud
#endif
diff --git a/include/fud_print.hpp b/include/fud_print.hpp
index 592b106..a8e61de 100644
--- a/include/fud_print.hpp
+++ b/include/fud_print.hpp
@@ -35,6 +35,17 @@ FormatResult print(FormatString fmt, Args&&... args)
return format(outSink, FormatCharMode::Unchecked, fmt, std::forward<Args>(args)...);
}
+inline void debugPrint(FormatString fmt)
+{
+ static_cast<void>(print(fmt));
+}
+
+template <typename... Args>
+void debugPrint(FormatString fmt, Args&&... args)
+{
+ static_cast<void>(print(fmt, std::forward<Args>(args)...));
+}
+
} // namespace fud
#endif
diff --git a/include/fud_result.hpp b/include/fud_result.hpp
index b91a31a..0f501e8 100644
--- a/include/fud_result.hpp
+++ b/include/fud_result.hpp
@@ -307,7 +307,7 @@ class [[nodiscard]] Result {
static constexpr auto Size = std::max(sizeof(T), sizeof(E));
static constexpr auto Align = std::max(alignof(T), alignof(E));
- option_detail::DataArray<Size> m_data{};
+ alignas(Align) option_detail::DataArray<Size> m_data{};
enum class Discriminant : uint8_t
{
diff --git a/include/fud_string_view.hpp b/include/fud_string_view.hpp
index 6403c27..327bf20 100644
--- a/include/fud_string_view.hpp
+++ b/include/fud_string_view.hpp
@@ -68,10 +68,10 @@ struct StringView {
explicit StringView(const String& fudString) noexcept;
template <size_t N>
- constexpr static StringView cStringView(const char (&input)[N])
+ constexpr static StringView makeFromCString(const char (&input)[N])
{
static_assert(N > 0);
- return StringView{N, reinterpret_cast<const utf8*>(input)};
+ return StringView{N - 1, reinterpret_cast<const utf8*>(input)};
}
[[nodiscard]] constexpr size_t length() const
@@ -84,6 +84,11 @@ struct StringView {
return m_data;
}
+ [[nodiscard]] inline const char* c_str() const
+ {
+ return reinterpret_cast<const char*>(m_data);
+ }
+
constexpr const utf8& operator[](size_t index) const
{
if constexpr (fudBoundsChecking) {
diff --git a/include/fud_utf8.hpp b/include/fud_utf8.hpp
index 119640c..030164d 100644
--- a/include/fud_utf8.hpp
+++ b/include/fud_utf8.hpp
@@ -275,12 +275,37 @@ enum class Utf8Type : uint8_t
Utf82Byte,
Utf83Byte,
Utf84Byte,
+ Invalid
};
static_assert(Utf8TypeSet.m_values[0] == static_cast<uint8_t>(Utf8Type::Ascii));
static_assert(Utf8TypeSet.m_values[1] == static_cast<uint8_t>(Utf8Type::Utf82Byte));
static_assert(Utf8TypeSet.m_values[2] == static_cast<uint8_t>(Utf8Type::Utf83Byte));
static_assert(Utf8TypeSet.m_values[3] == static_cast<uint8_t>(Utf8Type::Utf84Byte));
+/*
+| B | E | Byte 1 | Byte 2 | Byte 3 | Byte 4
+| U+0000 | U+007F | 0xxxxxxx | | |
+| U+0080 | U+07FF | 110xxxxx | 10xxxxxx | |
+| U+0800 | U+FFFF | 1110xxxx | 10xxxxxx | 10xxxxxx |
+| U+10000 | U+10FFFF | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx
+*/
+constexpr Utf8Type utf8TypeFromByte(utf8 input) {
+ if ((input >> 7) == 0) {
+ return Utf8Type::Ascii;
+ }
+ if ((input >> 5) == 0b110) {
+ return Utf8Type::Utf82Byte;
+ }
+ if ((input >> 4) == 0b1110) {
+ return Utf8Type::Utf83Byte;
+ }
+ if ((input >> 3) == 0b11110) {
+ return Utf8Type::Utf84Byte;
+ }
+
+ return Utf8Type::Invalid;
+}
+
struct Utf8 {
Utf8Variant m_variant{Utf8Variant{Ascii{}}};
@@ -445,6 +470,7 @@ struct Utf8 {
static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[1]) << TwoByteShift |
static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[2]) << OneByteShift |
static_cast<int64_t>(std::get<Utf84Byte>(m_variant).characters[3]);
+ case Utf8Type::Invalid:
default: // unlikely
return -1;
}
diff --git a/include/fud_vector.hpp b/include/fud_vector.hpp
index 9159770..1730c50 100644
--- a/include/fud_vector.hpp
+++ b/include/fud_vector.hpp
@@ -59,16 +59,21 @@ class Vector {
Vector& operator=(Vector<T>&& rhs) noexcept
{
- cleanup();
+ if (&rhs == this) {
+ return *this;
+ }
+ static_cast<void>(cleanup());
m_allocator = rhs.m_allocator;
m_data = rhs.m_data;
m_length = rhs.m_length;
m_capacity = rhs.m_length;
- rhs.m_allocataor = nullptr;
+ rhs.m_allocator = nullptr;
rhs.m_data = nullptr;
rhs.m_length = 0;
rhs.m_capacity = 0;
+
+ return *this;
}
static constexpr Vector<T> NullVector() noexcept {
@@ -629,6 +634,32 @@ class Vector {
return FudStatus::Success;
}
+ template <size_t Size>
+ FudStatus extend(Span<const T, Size> fixedSpan)
+ {
+ if (fixedSpan.data() == nullptr) {
+ return FudStatus::NullPointer;
+ }
+ if (std::numeric_limits<size_t>::max() - Size < m_length) {
+ return FudStatus::Failure;
+ }
+ if (m_length + Size > m_capacity)
+ {
+ auto status = grow();
+ if (status != FudStatus::Success) {
+ return status;
+ }
+ }
+
+ for (size_t spanIndex = 0; spanIndex < Size; ++spanIndex) {
+ const auto* ptr = new (m_data + m_length) T(fixedSpan[spanIndex]);
+ fudAssert(ptr != nullptr);
+ m_length++;
+ }
+
+ return FudStatus::Success;
+ }
+
FudStatus erase(size_t index)
{
if (index >= m_length) {