// Copyright 2020-2024 Junekey Jeon // // The contents of this file may be used under the terms of // the Apache License v2.0 with LLVM Exceptions. // // (See accompanying file LICENSE-Apache or copy at // https://llvm.org/foundation/relicensing/LICENSE.txt) // // Alternatively, the contents of this file may be used under the terms of // the Boost Software License, Version 1.0. // (See accompanying file LICENSE-Boost or copy at // https://www.boost.org/LICENSE_1_0.txt) // // Unless required by applicable law or agreed to in writing, this software // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. /* * Modifications copyright 2024 Dominick Allen * * Modifications to the contents of this file may be used under the terms of * the Apache License v2.0 with LLVM Exceptions. * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef JKJ_HEADER_DRAGONBOX #define JKJ_HEADER_DRAGONBOX // Attribute for storing static data into a dedicated place, e.g. flash memory. Every ODR-used // static data declaration will be decorated with this macro. The users may define this macro, // before including the library headers, into whatever they want. #ifndef JKJ_STATIC_DATA_SECTION #define JKJ_STATIC_DATA_SECTION #else #define JKJ_STATIC_DATA_SECTION_DEFINED 1 #endif // To use the library with toolchains without standard C++ headers, the users may define this macro // into their custom namespace which contains the definitions of all the standard C++ library // features used in this header. (The list can be found below.) #ifndef JKJ_STD_REPLACEMENT_NAMESPACE #define JKJ_STD_REPLACEMENT_NAMESPACE std #include #include #include #include #include #ifdef __has_include #if __has_include() #include #endif #endif #else #define JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED 1 #endif //////////////////////////////////////////////////////////////////////////////////////// // Language feature detections. //////////////////////////////////////////////////////////////////////////////////////// // C++14 constexpr #if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L #define JKJ_HAS_CONSTEXPR14 1 #elif __cplusplus >= 201402L #define JKJ_HAS_CONSTEXPR14 1 #elif defined(_MSC_VER) && _MSC_VER >= 1910 && _MSVC_LANG >= 201402L #define JKJ_HAS_CONSTEXPR14 1 #else #define JKJ_HAS_CONSTEXPR14 0 #endif #if JKJ_HAS_CONSTEXPR14 #define JKJ_CONSTEXPR14 constexpr #else #define JKJ_CONSTEXPR14 #endif // C++17 constexpr lambdas #if defined(__cpp_constexpr) && __cpp_constexpr >= 201603L #define JKJ_HAS_CONSTEXPR17 1 #elif __cplusplus >= 201703L #define JKJ_HAS_CONSTEXPR17 1 #elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L #define JKJ_HAS_CONSTEXPR17 1 #else #define JKJ_HAS_CONSTEXPR17 0 #endif // C++17 inline variables #if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L #define JKJ_HAS_INLINE_VARIABLE 1 #elif __cplusplus >= 201703L #define JKJ_HAS_INLINE_VARIABLE 1 #elif defined(_MSC_VER) && _MSC_VER >= 1912 && _MSVC_LANG >= 201703L #define JKJ_HAS_INLINE_VARIABLE 1 #else #define JKJ_HAS_INLINE_VARIABLE 0 #endif #if JKJ_HAS_INLINE_VARIABLE #define JKJ_INLINE_VARIABLE inline constexpr #else #define JKJ_INLINE_VARIABLE static constexpr #endif // C++17 if constexpr #if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L #define JKJ_HAS_IF_CONSTEXPR 1 #elif __cplusplus >= 201703L #define JKJ_HAS_IF_CONSTEXPR 1 #elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L #define JKJ_HAS_IF_CONSTEXPR 1 #else #define JKJ_HAS_IF_CONSTEXPR 0 #endif #if JKJ_HAS_IF_CONSTEXPR #define JKJ_IF_CONSTEXPR if constexpr #else #define JKJ_IF_CONSTEXPR if #endif // C++20 std::bit_cast #if JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED #if JKJ_STD_REPLACEMENT_HAS_BIT_CAST #define JKJ_HAS_BIT_CAST 1 #else #define JKJ_HAS_BIT_CAST 0 #endif #elif defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L #include #define JKJ_HAS_BIT_CAST 1 #else #define JKJ_HAS_BIT_CAST 0 #endif // C++23 if consteval or C++20 std::is_constant_evaluated #if defined(__cpp_if_consteval) && __cpp_is_consteval >= 202106L #define JKJ_IF_CONSTEVAL if consteval #define JKJ_IF_NOT_CONSTEVAL if !consteval #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 #define JKJ_USE_IS_CONSTANT_EVALUATED 0 #elif JKJ_STD_REPLACEMENT_NAMESPACE_DEFINED #if JKJ_STD_REPLACEMENT_HAS_IS_CONSTANT_EVALUATED #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 #define JKJ_USE_IS_CONSTANT_EVALUATED 1 #elif JKJ_HAS_IF_CONSTEXPR #define JKJ_IF_CONSTEVAL if constexpr (false) #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 #define JKJ_USE_IS_CONSTANT_EVALUATED 0 #else #define JKJ_IF_CONSTEVAL if (false) #define JKJ_IF_NOT_CONSTEVAL if (true) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 #define JKJ_USE_IS_CONSTANT_EVALUATED 0 #endif #else #if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L #define JKJ_IF_CONSTEVAL if (stdr::is_constant_evaluated()) #define JKJ_IF_NOT_CONSTEVAL if (!stdr::is_constant_evaluated()) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 1 #define JKJ_USE_IS_CONSTANT_EVALUATED 1 #elif JKJ_HAS_IF_CONSTEXPR #define JKJ_IF_CONSTEVAL if constexpr (false) #define JKJ_IF_NOT_CONSTEVAL if constexpr (true) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 #define JKJ_USE_IS_CONSTANT_EVALUATED 0 #else #define JKJ_IF_CONSTEVAL if (false) #define JKJ_IF_NOT_CONSTEVAL if (true) #define JKJ_CAN_BRANCH_ON_CONSTEVAL 0 #define JKJ_USE_IS_CONSTANT_EVALUATED 0 #endif #endif #if JKJ_CAN_BRANCH_ON_CONSTEVAL && JKJ_HAS_BIT_CAST #define JKJ_CONSTEXPR20 constexpr #else #define JKJ_CONSTEXPR20 #endif // Suppress additional buffer overrun check. // I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun // attacks. No, they aren't. #if defined(__GNUC__) || defined(__clang__) #define JKJ_SAFEBUFFERS #define JKJ_FORCEINLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) #define JKJ_SAFEBUFFERS __declspec(safebuffers) #define JKJ_FORCEINLINE __forceinline #else #define JKJ_SAFEBUFFERS #define JKJ_FORCEINLINE inline #endif #if defined(__has_builtin) #define JKJ_HAS_BUILTIN(x) __has_builtin(x) #else #define JKJ_HAS_BUILTIN(x) false #endif #if defined(_MSC_VER) #include #elif defined(__INTEL_COMPILER) #include #endif namespace jkj { namespace dragonbox { //////////////////////////////////////////////////////////////////////////////////////// // The Compatibility layer for toolchains without standard C++ headers. //////////////////////////////////////////////////////////////////////////////////////// namespace detail { namespace stdr { // #if JKJ_HAS_BIT_CAST using JKJ_STD_REPLACEMENT_NAMESPACE::bit_cast; #endif // // We need assert() macro, but it is not namespaced anyway, so nothing to do here. // using JKJ_STD_REPLACEMENT_NAMESPACE::int_least8_t; using JKJ_STD_REPLACEMENT_NAMESPACE::int_least16_t; using JKJ_STD_REPLACEMENT_NAMESPACE::int_least32_t; using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast8_t; using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast16_t; using JKJ_STD_REPLACEMENT_NAMESPACE::int_fast32_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least8_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least16_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least32_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_least64_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast8_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast16_t; using JKJ_STD_REPLACEMENT_NAMESPACE::uint_fast32_t; // We need INT32_C, UINT32_C and UINT64_C macros too, but again there is nothing to do // here. // using JKJ_STD_REPLACEMENT_NAMESPACE::size_t; using JKJ_STD_REPLACEMENT_NAMESPACE::memcpy; // template using numeric_limits = JKJ_STD_REPLACEMENT_NAMESPACE::numeric_limits; // template using enable_if = JKJ_STD_REPLACEMENT_NAMESPACE::enable_if; template using add_rvalue_reference = JKJ_STD_REPLACEMENT_NAMESPACE::add_rvalue_reference; template using conditional = JKJ_STD_REPLACEMENT_NAMESPACE::conditional; #if JKJ_USE_IS_CONSTANT_EVALUATED using JKJ_STD_REPLACEMENT_NAMESPACE::is_constant_evaluated; #endif template using is_same = JKJ_STD_REPLACEMENT_NAMESPACE::is_same; #if !JKJ_HAS_BIT_CAST template using is_trivially_copyable = JKJ_STD_REPLACEMENT_NAMESPACE::is_trivially_copyable; #endif template using is_integral = JKJ_STD_REPLACEMENT_NAMESPACE::is_integral; template using is_signed = JKJ_STD_REPLACEMENT_NAMESPACE::is_signed; template using is_unsigned = JKJ_STD_REPLACEMENT_NAMESPACE::is_unsigned; } } //////////////////////////////////////////////////////////////////////////////////////// // Some general utilities for C++11-compatibility. //////////////////////////////////////////////////////////////////////////////////////// namespace detail { #if !JKJ_HAS_CONSTEXPR17 template struct index_sequence {}; template struct make_index_sequence_impl { using type = typename make_index_sequence_impl::type; }; template struct make_index_sequence_impl { using type = index_sequence; }; template using make_index_sequence = typename make_index_sequence_impl<0, N, void>::type; #endif // Available since C++11, but including just for this is an overkill. template typename stdr::add_rvalue_reference::type declval() noexcept; // Similarly, including is an overkill. template struct array { T data_[N]; constexpr T operator[](stdr::size_t idx) const noexcept { return data_[idx]; } JKJ_CONSTEXPR14 T& operator[](stdr::size_t idx) noexcept { return data_[idx]; } }; } //////////////////////////////////////////////////////////////////////////////////////// // Some basic features for encoding/decoding IEEE-754 formats. //////////////////////////////////////////////////////////////////////////////////////// namespace detail { template struct physical_bits { static constexpr stdr::size_t value = sizeof(T) * stdr::numeric_limits::digits; }; template struct value_bits { static constexpr stdr::size_t value = stdr::numeric_limits< typename stdr::enable_if::value, T>::type>::digits; }; template JKJ_CONSTEXPR20 To bit_cast(const From& from) { #if JKJ_HAS_BIT_CAST return stdr::bit_cast(from); #else static_assert(sizeof(From) == sizeof(To), ""); static_assert(stdr::is_trivially_copyable::value, ""); static_assert(stdr::is_trivially_copyable::value, ""); To to; stdr::memcpy(&to, &from, sizeof(To)); return to; #endif } } // These classes expose encoding specs of IEEE-754-like floating-point formats. // Currently available formats are IEEE-754 binary32 & IEEE-754 binary64. struct ieee754_binary32 { static constexpr int total_bits = 32; static constexpr int significand_bits = 23; static constexpr int exponent_bits = 8; static constexpr int min_exponent = -126; static constexpr int max_exponent = 127; static constexpr int exponent_bias = -127; static constexpr int decimal_significand_digits = 9; static constexpr int decimal_exponent_digits = 2; }; struct ieee754_binary64 { static constexpr int total_bits = 64; static constexpr int significand_bits = 52; static constexpr int exponent_bits = 11; static constexpr int min_exponent = -1022; static constexpr int max_exponent = 1023; static constexpr int exponent_bias = -1023; static constexpr int decimal_significand_digits = 17; static constexpr int decimal_exponent_digits = 3; }; // A floating-point format traits class defines ways to interpret a bit pattern of given size as // an encoding of floating-point number. This is an implementation of such a traits class, // supporting ways to interpret IEEE-754 binary floating-point numbers. template struct ieee754_binary_traits { // CarrierUInt needs to have enough size to hold the entire contents of floating-point // numbers. The actual bits are assumed to be aligned to the LSB, and every other bits are // assumed to be zeroed. static_assert(detail::value_bits::value >= Format::total_bits, "jkj::dragonbox: insufficient number of bits"); static_assert(detail::stdr::is_unsigned::value, ""); // ExponentUInt needs to be large enough to hold (unsigned) exponent bits as well as the // (signed) actual exponent. // TODO: static overflow guard against intermediate computations. static_assert(detail::value_bits::value >= Format::exponent_bits + 1, "jkj::dragonbox: insufficient number of bits"); static_assert(detail::stdr::is_signed::value, ""); using format = Format; using carrier_uint = CarrierUInt; static constexpr int carrier_bits = int(detail::value_bits::value); using exponent_int = ExponentInt; // Extract exponent bits from a bit pattern. // The result must be aligned to the LSB so that there is no additional zero paddings // on the right. This function does not do bias adjustment. static constexpr exponent_int extract_exponent_bits(carrier_uint u) noexcept { return exponent_int((u >> format::significand_bits) & ((exponent_int(1) << format::exponent_bits) - 1)); } // Extract significand bits from a bit pattern. // The result must be aligned to the LSB so that there is no additional zero paddings // on the right. The result does not contain the implicit bit. static constexpr carrier_uint extract_significand_bits(carrier_uint u) noexcept { return carrier_uint(u & ((carrier_uint(1) << format::significand_bits) - 1u)); } // Remove the exponent bits and extract significand bits together with the sign bit. static constexpr carrier_uint remove_exponent_bits(carrier_uint u) noexcept { return carrier_uint(u & ~(((carrier_uint(1) << format::exponent_bits) - 1u) << format::significand_bits)); } // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. static constexpr carrier_uint remove_sign_bit_and_shift(carrier_uint u) noexcept { return carrier_uint((carrier_uint(u) << 1) & ((((carrier_uint(1) << (Format::total_bits - 1)) - 1u) << 1) | 1u)); } // Obtain the actual value of the binary exponent from the extracted exponent bits. static constexpr exponent_int binary_exponent(exponent_int exponent_bits) noexcept { return exponent_int(exponent_bits == 0 ? format::min_exponent : exponent_bits + format::exponent_bias); } // Obtain the actual value of the binary significand from the extracted significand bits // and exponent bits. static constexpr carrier_uint binary_significand(carrier_uint significand_bits, exponent_int exponent_bits) noexcept { return carrier_uint( exponent_bits == 0 ? significand_bits : (significand_bits | (carrier_uint(1) << format::significand_bits))); } /* Various boolean observer functions */ static constexpr bool is_nonzero(carrier_uint u) noexcept { return (u & ((carrier_uint(1) << (format::significand_bits + format::exponent_bits)) - 1u)) != 0; } static constexpr bool is_positive(carrier_uint u) noexcept { return u < (carrier_uint(1) << (format::significand_bits + format::exponent_bits)); } static constexpr bool is_negative(carrier_uint u) noexcept { return !is_positive(u); } static constexpr bool is_finite(exponent_int exponent_bits) noexcept { return exponent_bits != ((exponent_int(1) << format::exponent_bits) - 1); } static constexpr bool has_all_zero_significand_bits(carrier_uint u) noexcept { return ((u << 1) & ((((carrier_uint(1) << (Format::total_bits - 1)) - 1u) << 1) | 1u)) == 0; } static constexpr bool has_even_significand_bits(carrier_uint u) noexcept { return u % 2 == 0; } }; // Convert between bit patterns stored in carrier_uint and instances of an actual // floating-point type. Depending on format and carrier_uint, this operation might not // be possible for some specific bit patterns. However, the contract is that u always // denotes a valid bit pattern, so the functions here are assumed to be noexcept. // Users might specialize this class to change the behavior for certain types. // The default provided by the library is to treat the given floating-point type Float as either // IEEE-754 binary32 or IEEE-754 binary64, depending on the bitwise size of Float. template struct default_float_bit_carrier_conversion_traits { // Guards against types that have different internal representations than IEEE-754 // binary32/64. I don't know if there is a truly reliable way of detecting IEEE-754 binary // formats. I just did my best here. Note that in some cases // numeric_limits::is_iec559 may report false even if the internal representation is // IEEE-754 compatible. In such a case, the user can specialize this traits template and // remove this static sanity check in order to make Dragonbox work for Float. static_assert(detail::stdr::numeric_limits::is_iec559 && detail::stdr::numeric_limits::radix == 2 && (detail::physical_bits::value == 32 || detail::physical_bits::value == 64), "jkj::dragonbox: Float may not be of IEEE-754 binary32/binary64"); // Specifies the unsigned integer type to hold bitwise value of Float. using carrier_uint = typename detail::stdr::conditional::value == 32, detail::stdr::uint_least32_t, detail::stdr::uint_least64_t>::type; // Specifies the floating-point format. using format = typename detail::stdr::conditional::value == 32, ieee754_binary32, ieee754_binary64>::type; // Converts the floating-point type into the bit-carrier unsigned integer type. static JKJ_CONSTEXPR20 carrier_uint float_to_carrier(Float x) noexcept { return detail::bit_cast(x); } // Converts the bit-carrier unsigned integer type into the floating-point type. static JKJ_CONSTEXPR20 Float carrier_to_float(carrier_uint x) noexcept { return detail::bit_cast(x); } }; // Convenient wrappers for floating-point traits classes. // In order to reduce the argument passing overhead, these classes should be as simple as // possible (e.g., no inheritance, no private non-static data member, etc.; this is an // unfortunate fact about common ABI convention). template struct signed_significand_bits { using format_traits = FormatTraits; using carrier_uint = typename format_traits::carrier_uint; carrier_uint u; signed_significand_bits() = default; constexpr explicit signed_significand_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {} // Shift the obtained signed significand bits to the left by 1 to remove the sign bit. constexpr carrier_uint remove_sign_bit_and_shift() const noexcept { return format_traits::remove_sign_bit_and_shift(u); } constexpr bool is_positive() const noexcept { return format_traits::is_positive(u); } constexpr bool is_negative() const noexcept { return format_traits::is_negative(u); } constexpr bool has_all_zero_significand_bits() const noexcept { return format_traits::has_all_zero_significand_bits(u); } constexpr bool has_even_significand_bits() const noexcept { return format_traits::has_even_significand_bits(u); } }; template struct float_bits { using format_traits = FormatTraits; using carrier_uint = typename format_traits::carrier_uint; using exponent_int = typename format_traits::exponent_int; carrier_uint u; float_bits() = default; constexpr explicit float_bits(carrier_uint bit_pattern) noexcept : u{bit_pattern} {} // Extract exponent bits from a bit pattern. // The result must be aligned to the LSB so that there is no additional zero paddings // on the right. This function does not do bias adjustment. constexpr exponent_int extract_exponent_bits() const noexcept { return format_traits::extract_exponent_bits(u); } // Extract significand bits from a bit pattern. // The result must be aligned to the LSB so that there is no additional zero paddings // on the right. The result does not contain the implicit bit. constexpr carrier_uint extract_significand_bits() const noexcept { return format_traits::extract_significand_bits(u); } // Remove the exponent bits and extract significand bits together with the sign bit. constexpr signed_significand_bits remove_exponent_bits() const noexcept { return signed_significand_bits(format_traits::remove_exponent_bits(u)); } // Obtain the actual value of the binary exponent from the extracted exponent bits. static constexpr exponent_int binary_exponent(exponent_int exponent_bits) noexcept { return format_traits::binary_exponent(exponent_bits); } constexpr exponent_int binary_exponent() const noexcept { return binary_exponent(extract_exponent_bits()); } // Obtain the actual value of the binary exponent from the extracted significand bits // and exponent bits. static constexpr carrier_uint binary_significand(carrier_uint significand_bits, exponent_int exponent_bits) noexcept { return format_traits::binary_significand(significand_bits, exponent_bits); } constexpr carrier_uint binary_significand() const noexcept { return binary_significand(extract_significand_bits(), extract_exponent_bits()); } constexpr bool is_nonzero() const noexcept { return format_traits::is_nonzero(u); } constexpr bool is_positive() const noexcept { return format_traits::is_positive(u); } constexpr bool is_negative() const noexcept { return format_traits::is_negative(u); } constexpr bool is_finite(exponent_int exponent_bits) const noexcept { return format_traits::is_finite(exponent_bits); } constexpr bool is_finite() const noexcept { return format_traits::is_finite(extract_exponent_bits()); } constexpr bool has_even_significand_bits() const noexcept { return format_traits::has_even_significand_bits(u); } }; template , class FormatTraits = ieee754_binary_traits> JKJ_CONSTEXPR20 float_bits make_float_bits(Float x) noexcept { return float_bits(ConversionTraits::float_to_carrier(x)); } namespace detail { //////////////////////////////////////////////////////////////////////////////////////// // Bit operation intrinsics. //////////////////////////////////////////////////////////////////////////////////////// namespace bits { // Most compilers should be able to optimize this into the ROR instruction. // n is assumed to be at most of bit_width bits. template JKJ_CONSTEXPR14 UInt rotr(UInt n, unsigned int r) noexcept { static_assert(bit_width > 0, "jkj::dragonbox: rotation bit-width must be positive"); static_assert(bit_width <= value_bits::value, "jkj::dragonbox: rotation bit-width is too large"); r &= (bit_width - 1); return (n >> r) | (n << ((bit_width - r) & (bit_width - 1))); } } //////////////////////////////////////////////////////////////////////////////////////// // Utilities for wide unsigned integer arithmetic. //////////////////////////////////////////////////////////////////////////////////////// namespace wuint { // Compilers might support built-in 128-bit integer types. However, it seems that // emulating them with a pair of 64-bit integers actually produces a better code, // so we avoid using those built-ins. That said, they are still useful for // implementing 64-bit x 64-bit -> 128-bit multiplication. // clang-format off #if defined(__SIZEOF_INT128__) // To silence "error: ISO C++ does not support '__int128' for 'type name' // [-Wpedantic]" #if defined(__GNUC__) __extension__ #endif using builtin_uint128_t = unsigned __int128; #endif // clang-format on struct uint128 { uint128() = default; stdr::uint_least64_t high_; stdr::uint_least64_t low_; constexpr uint128(stdr::uint_least64_t high, stdr::uint_least64_t low) noexcept : high_{high}, low_{low} {} constexpr stdr::uint_least64_t high() const noexcept { return high_; } constexpr stdr::uint_least64_t low() const noexcept { return low_; } JKJ_CONSTEXPR20 uint128& operator+=(stdr::uint_least64_t n) & noexcept { auto const generic_impl = [&] { auto const sum = (low_ + n) & UINT64_C(0xffffffffffffffff); high_ += (sum < low_ ? 1 : 0); low_ = sum; }; // To suppress warning. static_cast(generic_impl); JKJ_IF_CONSTEXPR(value_bits::value > 64) { generic_impl(); return *this; } JKJ_IF_CONSTEVAL { generic_impl(); return *this; } // See https://github.com/fmtlib/fmt/pull/2985. #if JKJ_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__) JKJ_IF_CONSTEXPR( stdr::is_same::value) { unsigned long long carry{}; low_ = stdr::uint_least64_t(__builtin_addcll(low_, n, 0, &carry)); high_ = stdr::uint_least64_t(__builtin_addcll(high_, 0, carry, &carry)); return *this; } #endif #if JKJ_HAS_BUILTIN(__builtin_addcl) && !defined(__ibmxl__) JKJ_IF_CONSTEXPR(stdr::is_same::value) { unsigned long carry{}; low_ = stdr::uint_least64_t( __builtin_addcl(static_cast(low_), static_cast(n), 0, &carry)); high_ = stdr::uint_least64_t( __builtin_addcl(static_cast(high_), 0, carry, &carry)); return *this; } #endif #if JKJ_HAS_BUILTIN(__builtin_addc) && !defined(__ibmxl__) JKJ_IF_CONSTEXPR(stdr::is_same::value) { unsigned int carry{}; low_ = stdr::uint_least64_t(__builtin_addc(static_cast(low_), static_cast(n), 0, &carry)); high_ = stdr::uint_least64_t( __builtin_addc(static_cast(high_), 0, carry, &carry)); return *this; } #endif #if JKJ_HAS_BUILTIN(__builtin_ia32_addcarry_u64) // __builtin_ia32_addcarry_u64 is not documented, but it seems it takes unsigned // long long arguments. unsigned long long result{}; auto const carry = __builtin_ia32_addcarry_u64(0, low_, n, &result); low_ = stdr::uint_least64_t(result); __builtin_ia32_addcarry_u64(carry, high_, 0, &result); high_ = stdr::uint_least64_t(result); #elif defined(_MSC_VER) && defined(_M_X64) // On MSVC, uint_least64_t and __int64 must be unsigned long long; see // https://learn.microsoft.com/en-us/cpp/c-runtime-library/standard-types // and https://learn.microsoft.com/en-us/cpp/cpp/int8-int16-int32-int64. static_assert(stdr::is_same::value, ""); auto const carry = _addcarry_u64(0, low_, n, &low_); _addcarry_u64(carry, high_, 0, &high_); #elif defined(__INTEL_COMPILER) && (defined(_M_X64) || defined(__x86_64)) // Cannot find any documentation on how things are defined, but hopefully this // is always true... static_assert(stdr::is_same::value, ""); auto const carry = _addcarry_u64(0, low_, n, &low_); _addcarry_u64(carry, high_, 0, &high_); #else generic_impl(); #endif return *this; } }; inline JKJ_CONSTEXPR20 stdr::uint_least64_t umul64(stdr::uint_least32_t x, stdr::uint_least32_t y) noexcept { #if defined(_MSC_VER) && defined(_M_IX86) JKJ_IF_NOT_CONSTEVAL { return __emulu(x, y); } #endif return x * stdr::uint_least64_t(y); } // Get 128-bit result of multiplication of two 64-bit unsigned integers. JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul128(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { auto const generic_impl = [=]() -> uint128 { auto const a = stdr::uint_least32_t(x >> 32); auto const b = stdr::uint_least32_t(x); auto const c = stdr::uint_least32_t(y >> 32); auto const d = stdr::uint_least32_t(y); auto const ac = umul64(a, c); auto const bc = umul64(b, c); auto const ad = umul64(a, d); auto const bd = umul64(b, d); auto const intermediate = (bd >> 32) + stdr::uint_least32_t(ad) + stdr::uint_least32_t(bc); return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), (intermediate << 32) + stdr::uint_least32_t(bd)}; }; // To silence warning. static_cast(generic_impl); #if defined(__SIZEOF_INT128__) auto const result = builtin_uint128_t(x) * builtin_uint128_t(y); return {stdr::uint_least64_t(result >> 64), stdr::uint_least64_t(result)}; #elif defined(_MSC_VER) && defined(_M_X64) JKJ_IF_CONSTEVAL { // This redundant variable is to workaround MSVC's codegen bug caused by the // interaction of NRVO and intrinsics. auto const result = generic_impl(); return result; } uint128 result; #if defined(__AVX2__) result.low_ = _mulx_u64(x, y, &result.high_); #else result.low_ = _umul128(x, y, &result.high_); #endif return result; #else return generic_impl(); #endif } // Get high half of the 128-bit result of multiplication of two 64-bit unsigned // integers. JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 stdr::uint_least64_t umul128_upper64(stdr::uint_least64_t x, stdr::uint_least64_t y) noexcept { auto const generic_impl = [=]() -> stdr::uint_least64_t { auto const a = stdr::uint_least32_t(x >> 32); auto const b = stdr::uint_least32_t(x); auto const c = stdr::uint_least32_t(y >> 32); auto const d = stdr::uint_least32_t(y); auto const ac = umul64(a, c); auto const bc = umul64(b, c); auto const ad = umul64(a, d); auto const bd = umul64(b, d); auto const intermediate = (bd >> 32) + stdr::uint_least32_t(ad) + stdr::uint_least32_t(bc); return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32); }; // To silence warning. static_cast(generic_impl); #if defined(__SIZEOF_INT128__) auto const result = builtin_uint128_t(x) * builtin_uint128_t(y); return stdr::uint_least64_t(result >> 64); #elif defined(_MSC_VER) && defined(_M_X64) JKJ_IF_CONSTEVAL { // This redundant variable is to workaround MSVC's codegen bug caused by the // interaction of NRVO and intrinsics. auto const result = generic_impl(); return result; } stdr::uint_least64_t result; #if defined(__AVX2__) _mulx_u64(x, y, &result); #else result = __umulh(x, y); #endif return result; #else return generic_impl(); #endif } // Get upper 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit // unsigned integer. JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_upper128(stdr::uint_least64_t x, uint128 y) noexcept { auto r = umul128(x, y.high()); r += umul128_upper64(x, y.low()); return r; } // Get upper 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit // unsigned integer. inline JKJ_CONSTEXPR20 stdr::uint_least64_t umul96_upper64(stdr::uint_least32_t x, stdr::uint_least64_t y) noexcept { #if defined(__SIZEOF_INT128__) || (defined(_MSC_VER) && defined(_M_X64)) return umul128_upper64(stdr::uint_least64_t(x) << 32, y); #else auto const yh = stdr::uint_least32_t(y >> 32); auto const yl = stdr::uint_least32_t(y); auto const xyh = umul64(x, yh); auto const xyl = umul64(x, yl); return xyh + (xyl >> 32); #endif } // Get lower 128-bits of multiplication of a 64-bit unsigned integer and a 128-bit // unsigned integer. JKJ_SAFEBUFFERS inline JKJ_CONSTEXPR20 uint128 umul192_lower128(stdr::uint_least64_t x, uint128 y) noexcept { auto const high = x * y.high(); auto const high_low = umul128(x, y.low()); return {(high + high_low.high()) & UINT64_C(0xffffffffffffffff), high_low.low()}; } // Get lower 64-bits of multiplication of a 32-bit unsigned integer and a 64-bit // unsigned integer. constexpr stdr::uint_least64_t umul96_lower64(stdr::uint_least32_t x, stdr::uint_least64_t y) noexcept { return (x * y) & UINT64_C(0xffffffffffffffff); } } //////////////////////////////////////////////////////////////////////////////////////// // Some simple utilities for constexpr computation. //////////////////////////////////////////////////////////////////////////////////////// template constexpr Int compute_power(Int a) noexcept { static_assert(k >= 0, ""); #if JKJ_HAS_CONSTEXPR14 Int p = 1; for (int i = 0; i < k; ++i) { p *= a; } return p; #else return k == 0 ? 1 : k % 2 == 0 ? compute_power(a * a) : a * compute_power(a * a); #endif } template constexpr int count_factors(UInt n) noexcept { static_assert(a > 1, ""); #if JKJ_HAS_CONSTEXPR14 int c = 0; while (n % a == 0) { n /= a; ++c; } return c; #else return n % a == 0 ? count_factors(n / a) + 1 : 0; #endif } //////////////////////////////////////////////////////////////////////////////////////// // Utilities for fast/constexpr log computation. //////////////////////////////////////////////////////////////////////////////////////// namespace log { static_assert((stdr::int_fast32_t(-1) >> 1) == stdr::int_fast32_t(-1) && (stdr::int_fast16_t(-1) >> 1) == stdr::int_fast16_t(-1), "jkj::dragonbox: right-shift for signed integers must be arithmetic"); // For constexpr computation. // Returns -1 when n = 0. template constexpr int floor_log2(UInt n) noexcept { #if JKJ_HAS_CONSTEXPR14 int count = -1; while (n != 0) { ++count; n >>= 1; } return count; #else return n == 0 ? -1 : floor_log2(n / 2) + 1; #endif } template