From cb9fa588ba8144fcdd52ba4b83d69d93fb18066f Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Sun, 30 Mar 2025 23:08:43 -0500 Subject: Add hash map. --- include/fud_hash_map.hpp | 389 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 include/fud_hash_map.hpp (limited to 'include/fud_hash_map.hpp') diff --git a/include/fud_hash_map.hpp b/include/fud_hash_map.hpp new file mode 100644 index 0000000..4333df7 --- /dev/null +++ b/include/fud_hash_map.hpp @@ -0,0 +1,389 @@ +/* + * libfud + * Copyright 2025 Dominick Allen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FUD_HASH_MAP_HPP +#define FUD_HASH_MAP_HPP + +#include "fud_allocator.hpp" +#include "fud_hash.hpp" +#include "fud_option.hpp" +#include "fud_result.hpp" +#include "fud_status.hpp" + +#include + +namespace fud { + +constexpr double hashMapMaxLoadFactor = 0.6; + +/** \brief An open-address hash map using quadratic probing. + * + * Templates on a Hash object to facilitate a variety of hashing function implementations. */ +template > +class HashMap { + public: + struct KeyValuePair { + Key m_key; + Value m_value; + }; + using Node = Option; + static constexpr size_t NodeSize = sizeof(Node); + static constexpr size_t Alignment = alignof(Node); + + constexpr HashMap() noexcept = default; + HashMap(const HashMap&) = delete; + HashMap(HashMap&&) = delete; + HashMap& operator=(const HashMap&) = delete; + HashMap& operator=(HashMap&&) = delete; + constexpr ~HashMap() noexcept + { + static_cast(cleanup()); + } + /** \brief Construct a HashMap explicitly using the specified allocator. */ + constexpr explicit HashMap(Allocator& allocator) noexcept : m_allocator{&allocator} + { + } + + [[nodiscard]] size_t size() noexcept + { + return m_count; + } + + [[nodiscard]] size_t capacity() + { + return m_buckets; + } + + [[nodiscard]] bool contains(const Key& key) const; + [[nodiscard]] bool contains(Key&& key) const; + + FudStatus insert(const Key& key, const Value& value) + { + auto growStatus = checkGrow(); + if (growStatus != FudStatus::Success) { + return growStatus; + } + + auto hashIndexResult = findEmptyBucket(key, m_buckets, m_data); + if (hashIndexResult.isError()) { + return hashIndexResult.takeError(); + } + auto hashIndex{hashIndexResult.takeOkay()}; + m_data[hashIndex].~Node(); + auto* ptr = new (m_data + hashIndex) Node{{key, value}}; + m_count++; + return FudStatus::Success; + } + + FudStatus insert(const Key& key, Value&& value) + { + auto growStatus = checkGrow(); + if (growStatus != FudStatus::Success) { + return growStatus; + } + + auto hashIndexResult = findEmptyBucket(key, m_buckets, m_data); + if (hashIndexResult.isError()) { + return hashIndexResult.takeError(); + } + auto hashIndex{hashIndexResult.takeOkay()}; + m_data[hashIndex].~Node(); + auto* ptr = new (m_data + hashIndex) Node{{key, std::move(value)}}; + m_count++; + return FudStatus::Success; + } + + FudStatus insert(Key&& key, const Value& value) + { + auto growStatus = checkGrow(); + if (growStatus != FudStatus::Success) { + return growStatus; + } + + auto hashIndexResult = findEmptyBucket(key, m_buckets, m_data); + if (hashIndexResult.isError()) { + return hashIndexResult.takeError(); + } + auto hashIndex{hashIndexResult.takeOkay()}; + m_data[hashIndex].~Node(); + auto* ptr = new (m_data + hashIndex) Node{{std::move(key), value}}; + m_count++; + return FudStatus::Success; + } + + FudStatus insert(Key&& key, Value&& value) + { + auto growStatus = checkGrow(); + if (growStatus != FudStatus::Success) { + return growStatus; + } + + auto hashIndexResult = findEmptyBucket(key, m_buckets, m_data); + if (hashIndexResult.isError()) { + return hashIndexResult.takeError(); + } + auto hashIndex{hashIndexResult.takeOkay()}; + m_data[hashIndex].~Node(); + auto* ptr = new (m_data + hashIndex) Node{{std::move(key), std::move(value)}}; + m_count++; + return FudStatus::Success; + } + + FudStatus replace(const Key& key, const Value& value); + FudStatus replace(const Key& key, Value&& value); + FudStatus replace(Key&& key, const Value& value); + FudStatus replace(Key&& key, Value&& value); + + FudStatus remove(const Key& key); + FudStatus remove(Key&& key); + + Option extract(const Key& key); + Option extract(Key& key); + Option extractPair(const Key& key); + Option extractPair(Key& key); + + Option get(const Key& key) const; + Option getRef(const Key& key); + + Option getConstRef(const Key& key) const + { + auto hashIndexOption = lookup(key); + if (hashIndexOption.isNone()) { + return NullOpt; + } + + return m_data[hashIndexOption.value()].value().m_value; + } + + [[nodiscard]] bool empty() const; + + FudStatus clear() + { + if (m_allocator == nullptr || m_data == nullptr) { + if (m_buckets > 0 || m_count > 0) { + return FudStatus::ObjectInvalid; + } + return FudStatus::Success; + } + for (size_t index = 0; index < m_buckets; ++index) { + m_data[index].~Node(); + } + m_count = 0; + return FudStatus::Success; + } + + FudStatus reserve(size_t count) + { + if (count <= m_buckets) { + return FudStatus::Success; + } + + if (m_allocator == nullptr) { + return FudStatus::ObjectInvalid; + } + + if (count > SIZE_MAX / NodeSize) { + return FudStatus::ArgumentInvalid; + } + + size_t requestedSize = count * NodeSize; + auto dataPtrResult = m_allocator->allocate(requestedSize, Alignment); + if (dataPtrResult.isError()) { + return dataPtrResult.takeError(); + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + auto* dataPtr = reinterpret_cast(dataPtrResult.takeOkay()); + for (size_t index = 0; index < count; ++index) { + const auto* ptr = new (dataPtr + index) Node(NullOpt); + fudAssert(ptr != nullptr); + } + + for (size_t index = 0; index < m_buckets; ++index) { + if (m_data[index].hasValue()) { + const auto& key = m_data[index].value().m_key; + const auto hash = m_hasher(key, m_seed); + auto newHashIndexResult = findEmptyBucket(key, count, dataPtr); + if (newHashIndexResult.isError()) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + m_allocator->deallocate(reinterpret_cast(dataPtr), requestedSize); + return FudStatus::Failure; + } + const auto newHashIndex{newHashIndexResult.takeOkay()}; + dataPtr[newHashIndex].~Node(); + const auto* ptr = new (dataPtr + newHashIndex) Node(std::move(m_data[index])); + fudAssert(ptr != nullptr); + m_data[index].~Node(); + } + } + + auto status = FudStatus::Success; + if (m_buckets > 0) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + m_allocator->deallocate(reinterpret_cast(m_data), m_buckets * NodeSize); + } + + m_data = dataPtr; + m_buckets = count; + + return status; + } + + Result exchange(const Key& key, const Value& value); + + private: + [[nodiscard]] double loadFactor() const + { + if (m_buckets == 0) { + return hashMapMaxLoadFactor + 1.0; + } + return static_cast(m_count) / static_cast(m_buckets); + } + + FudStatus checkGrow() + { + if (loadFactor() > hashMapMaxLoadFactor) { + auto growStatus = grow(); + if (growStatus != FudStatus::Success) { + return growStatus; + } + } + return FudStatus::Success; + } + + FudStatus grow() + { + size_t additional = m_buckets < 3 ? 3 : m_buckets / 2; + constexpr auto maxSize = std::numeric_limits::max(); + if (maxSize - additional * NodeSize < m_buckets * NodeSize) { + additional = maxSize - m_buckets * NodeSize / 2; + } + while (additional > 0) { + auto reserveStatus = reserve(additional + m_buckets); + if (reserveStatus == FudStatus::Success) { + break; + } + if (reserveStatus == FudStatus::AllocFailure) { + additional /= 2; + } else { + return reserveStatus; + } + } + + if (loadFactor() >= hashMapMaxLoadFactor) { + return FudStatus::AllocFailure; + } + + return FudStatus::Success; + } + + FudStatus cleanup() noexcept + { + auto status = clear(); + if (m_data != nullptr && m_allocator != nullptr) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + m_allocator->deallocate(reinterpret_cast(m_data), m_buckets); + } + + m_allocator = nullptr; + m_data = nullptr; + m_count = 0; + m_buckets = 0; + + return status; + } + + [[nodiscard]] constexpr size_t calculateHashIndex(size_t hash, size_t probe) const + { + return hash + ((probe + probe * probe) / 2); + } + + Option lookup(const Key& key) const + { + const auto hash = m_hasher(key, m_seed); + const auto nearestPowerOf2 = detail::roundToNearest2(m_buckets); + size_t probe = 0; + size_t hashIndex{}; + auto collision = true; + + while (collision and probe < m_buckets) { + hashIndex = calculateHashIndex(hash, probe) % nearestPowerOf2; + if (hashIndex >= m_buckets) { + probe++; + continue; + } + collision = m_data[hashIndex].hasValue(); + if (collision and m_data[hashIndex].value().m_key == key) { + break; + } + if (collision) { + probe++; + } + } + + if (collision and m_data[hashIndex].value().m_key == key) { + return hashIndex; + } + + return NullOpt; + } + + Result findEmptyBucket(const Key& key, size_t buckets, Node* data) + { + const auto hash = m_hasher(key, m_seed); + const auto nearestPowerOf2 = detail::roundToNearest2(buckets); + size_t hashIndex{}; + auto collision = true; + size_t probe = 0; + + while (collision and probe < buckets) { + hashIndex = calculateHashIndex(hash, probe) % nearestPowerOf2; + if (hashIndex >= buckets) { + probe++; + continue; + } + collision = data[hashIndex].hasValue(); + if (collision and data[hashIndex].value().m_key == key) { + break; + } + if (collision) { + probe++; + } + } + + if (collision and data[hashIndex].value().m_key == key) { + return Error{FudStatus::Exists}; + } + + if (collision) { + return Error{FudStatus::Failure}; + } + + return Okay{hashIndex}; + } + + Allocator* m_allocator{&globalFudAllocator}; + Node* m_data{nullptr}; + size_t m_count{0}; + size_t m_buckets{0}; + size_t m_seed{0}; + Hash m_hasher{}; +}; + +} // namespace fud + +#endif -- cgit v1.2.3