BLI: add fixed width integer type

This is intended to be used in the new exact mesh boolean algorithm by @howardt. The new `BLI_fixed_width_int.hh` header provides types like `Int256` and `UInt256` which are like e.g. `uint64_t` but with higher precision. The code supports many different integer sizes. The following operations are supported: * Addition * Subtraction * Multiplication * Comparisons * Negation * Conversion to and from other number types * Conversion to and from string (based on `GMP`) Division is not implemented. It could be implemented, but it's more complex and is not required for the new mesh boolean algorithm. Some alternatives to having a custom implementation have been discussed in https://devtalk.blender.org/t/fixed-length-multiprecision-arithmetic/29189/. Generally, the implementation is fairly straight forward. The main complexity is the addition/multiplication algorithm which isn't too complicated. It's nice to have control over this part as it allows us to optimize the code more if necessary. Also, from what I understand, we might be able to benefit from some special cases like multiplying a large integer with a smaller one. I tried some different ways to optimize this already, but so far the normal compiler optimization turned out to work best. Not sure if the same is true on windows though, as it doesn't have native support for an `int128` which helps the compiler understand what I'm doing. Alternatives I tried so far are using intrinsics directly (mainly `_addcarry_u64` and similar), writing inline assembly manually and copying the assembly output from the compiler. I assume the assembly implementation didn't help for me because it prohibited other compiler optimizations. Pull Request: https://projects.blender.org/blender/blender/pulls/119528
2024-03-25 23:39:42 +01:00 · 2024-03-25 23:39:42 +01:00 · 7314c86869
commit 7314c86869
parent 1681e55114
7 changed files with 781 additions and 15 deletions
--- a/source/blender/blenlib/BLI_fixed_width_int.hh
+++ b/source/blender/blenlib/BLI_fixed_width_int.hh
@ -0,0 +1,563 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <cmath>
+
+#include "BLI_string_ref.hh"
+#include "BLI_unroll.hh"
+
+namespace blender::fixed_width_int {
+
+/**
+ * An unsigned fixed width integer.
+ *
+ * For some algorithms, the largest cross platform integer type (`uint64_t`) is not large enough.
+ * Then one has the choice to use some big-integer implementation like the one from GMP or one can
+ * use fixed-width-integers as implemented here.
+ *
+ * Internally, this type combines multiple smaller integers into a bigger integer.
+ */
+template<typename T, int S> struct UIntF {
+  static_assert(std::is_unsigned_v<T>);
+  static_assert(S >= 1);
+
+  /**
+   * Array of smaller integers that make up the bigger integer. The first element is the least
+   * significant digit.
+   */
+  std::array<T, S> v;
+
+  /** Allow default construction. Note that the value is not initialized in this case. */
+  UIntF() = default;
+
+  /** Construct from a specific integer. */
+  explicit UIntF(uint64_t value);
+
+  /** Construct from a string. */
+  explicit UIntF(StringRefNull str, int base = 10);
+
+  /** Convert to a normal integer. Note that this may lose digits. */
+  explicit operator uint64_t() const;
+
+  /** Convert to floating point. This may lose precision. */
+  explicit operator double() const;
+  explicit operator float() const;
+
+/* See `BLI_fixed_width_int_str.hh`. */
+#ifdef WITH_GMP
+  /** Update value based on the integer encoded in the string. */
+  void set_from_str(StringRefNull str, int base = 10);
+
+  /** Convert to a string. */
+  std::string to_string(int base = 10) const;
+#endif
+};
+
+/**
+ * A signed fixed width integer. It's mostly the same as #UIntF, but signed.
+ */
+template<typename T, int S> struct IntF {
+  static_assert(std::is_unsigned_v<T>);
+  static_assert(S >= 1);
+
+  /**
+   * Array of smaller integers that make up the bigger integer. The first element is the least
+   * significant digit.
+   */
+  std::array<T, S> v;
+
+  /** Allow default construction. Note that the value is not initialized in this case. */
+  IntF() = default;
+
+  /** Construct from a specific integer. */
+  explicit IntF(int64_t value);
+
+  /** Support casting unsigned to signed fixed-width-int. */
+  explicit IntF(const UIntF<T, S> &value);
+
+  /** Construct from a string. */
+  explicit IntF(StringRefNull str, int base = 10);
+
+  /** Convert to a normal integer. Note that this may lose digits. */
+  explicit operator int64_t() const;
+
+  /** Convert to floating point. This may lose precision. */
+  explicit operator double() const;
+  explicit operator float() const;
+
+  /** Support casting from signed to unsigned fixed-width-int. */
+  explicit operator UIntF<T, S>() const;
+
+/* See `BLI_fixed_width_int_str.hh`. */
+#ifdef WITH_GMP
+  /** Update value based on the integer encoded in the string. */
+  void set_from_str(const StringRefNull str, const int base = 10);
+
+  /** Convert to a string. */
+  std::string to_string(int base = 10) const;
+#endif
+};
+
+template<typename T> struct DoubleUIntType {
+  using type = void;
+};
+template<> struct DoubleUIntType<uint8_t> {
+  using type = uint16_t;
+};
+template<> struct DoubleUIntType<uint16_t> {
+  using type = uint32_t;
+};
+template<> struct DoubleUIntType<uint32_t> {
+  using type = uint64_t;
+};
+#ifndef _MSC_VER
+template<> struct DoubleUIntType<uint64_t> {
+  using type = __uint128_t;
+};
+#endif
+
+/** Maps unsigned integer types to a type that's twice the size. E.g. uint16_t to uint32_t. */
+template<typename T> using double_uint_type = typename DoubleUIntType<T>::type;
+
+using UInt64_8 = UIntF<uint8_t, 8>;
+using UInt64_16 = UIntF<uint16_t, 4>;
+using UInt64_32 = UIntF<uint32_t, 2>;
+
+using Int64_8 = IntF<uint8_t, 8>;
+using Int64_16 = IntF<uint16_t, 4>;
+using Int64_32 = IntF<uint32_t, 2>;
+
+using UInt128_8 = UIntF<uint8_t, 16>;
+using UInt128_16 = UIntF<uint16_t, 8>;
+using UInt128_32 = UIntF<uint32_t, 4>;
+using UInt128_64 = UIntF<uint64_t, 2>;
+
+using UInt256_8 = UIntF<uint8_t, 32>;
+using UInt256_16 = UIntF<uint16_t, 16>;
+using UInt256_32 = UIntF<uint32_t, 8>;
+using UInt256_64 = UIntF<uint64_t, 4>;
+
+using Int128_8 = IntF<uint8_t, 16>;
+using Int128_16 = IntF<uint16_t, 8>;
+using Int128_32 = IntF<uint32_t, 4>;
+using Int128_64 = IntF<uint64_t, 2>;
+
+using Int256_8 = IntF<uint8_t, 32>;
+using Int256_16 = IntF<uint16_t, 16>;
+using Int256_32 = IntF<uint32_t, 8>;
+using Int256_64 = IntF<uint64_t, 4>;
+
+#ifdef _MSC_VER
+using UInt128 = UInt128_32;
+using UInt256 = UInt256_32;
+using Int128 = Int128_32;
+using Int256 = Int256_32;
+#else
+using UInt128 = UInt128_64;
+using UInt256 = UInt256_64;
+using Int128 = Int128_64;
+using Int256 = Int256_64;
+#endif
+
+template<typename T, int S> inline UIntF<T, S>::UIntF(const uint64_t value)
+{
+  constexpr int Count = std::min(S, int(sizeof(decltype(value)) / sizeof(T)));
+  constexpr int BitsPerT = 8 * sizeof(T);
+
+  for (int i = 0; i < Count; i++) {
+    this->v[i] = T(value >> (BitsPerT * i));
+  }
+  for (int i = Count; i < S; i++) {
+    this->v[i] = 0;
+  }
+}
+
+template<typename T, int S> inline IntF<T, S>::IntF(const int64_t value)
+{
+  constexpr int Count = std::min(S, int(sizeof(decltype(value)) / sizeof(T)));
+  constexpr int BitsPerT = 8 * sizeof(T);
+
+  for (int i = 0; i < Count; i++) {
+    this->v[i] = T(value >> (BitsPerT * i));
+  }
+  const T sign_extend_fill = value < 0 ? T(-1) : T(0);
+  for (int i = Count; i < S; i++) {
+    this->v[i] = sign_extend_fill;
+  }
+}
+
+template<typename T, int S> inline IntF<T, S>::IntF(const UIntF<T, S> &value) : v(value.v) {}
+
+template<typename T, int S> UIntF<T, S>::UIntF(const StringRefNull str, const int base)
+{
+  this->set_from_str(str, base);
+}
+
+template<typename T, int S> IntF<T, S>::IntF(const StringRefNull str, const int base)
+{
+  this->set_from_str(str, base);
+}
+
+template<typename T, int S> inline UIntF<T, S>::operator uint64_t() const
+{
+  constexpr int Count = std::min(S, int(sizeof(uint64_t) / sizeof(T)));
+  constexpr int BitsPerT = 8 * sizeof(T);
+
+  uint64_t result = 0;
+  for (int i = 0; i < Count; i++) {
+    result |= uint64_t(this->v[i]) << (BitsPerT * i);
+  }
+  return result;
+}
+
+template<typename T, int S> inline UIntF<T, S>::operator double() const
+{
+  double result = double(this->v[0]);
+  for (int i = 1; i < S; i++) {
+    const T a = this->v[i];
+    if (a == 0) {
+      continue;
+    }
+    result += ldexp(a, 8 * sizeof(T) * i);
+  }
+  return result;
+}
+
+template<typename T, int S> inline UIntF<T, S>::operator float() const
+{
+  return float(double(*this));
+}
+
+template<typename T, int S> inline IntF<T, S>::operator int64_t() const
+{
+  return int64_t(uint64_t(UIntF<T, S>(*this)));
+}
+
+template<typename T, int S> inline IntF<T, S>::operator double() const
+{
+  if (is_negative(*this)) {
+    return -double(-*this);
+  }
+  double result = double(this->v[0]);
+  for (int i = 1; i < S; i++) {
+    const T a = this->v[i];
+    if (a == 0) {
+      continue;
+    }
+    result += ldexp(a, 8 * sizeof(T) * i);
+  }
+  return result;
+}
+
+template<typename T, int S> inline IntF<T, S>::operator float() const
+{
+  return float(double(*this));
+}
+
+template<typename T, int S> inline IntF<T, S>::operator UIntF<T, S>() const
+{
+  UIntF<T, S> result;
+  result.v = this->v;
+  return result;
+}
+
+/**
+ * Adds two fixed-width-integer together using the standard addition with carry algorithm taught
+ * in schools. The main difference is that the digits here are not 0 to 9, but 0 to max(T).
+ *
+ * Due to the design of two's-complement numbers, this works for signed and unsigned
+ * fixed-width-integer. The overflow behavior is wrap-around.
+ *
+ * \param T: Type for individual digits.
+ * \param T2: Integer type that is twice as large as T.
+ * \param S: Number of digits of type T in each fixed-width-integer.
+ */
+template<typename T, typename T2, int S>
+inline void generic_add(T *__restrict dst, const T *a, const T *b)
+{
+  constexpr int shift = 8 * sizeof(T);
+  T2 carry = 0;
+  unroll<S>([&](auto i) {
+    const T2 ai = T2(a[i]);
+    const T2 bi = T2(b[i]);
+    const T2 ri = ai + bi + carry;
+    dst[i] = T(ri);
+    carry = ri >> shift;
+  });
+}
+
+/**
+ * Similar to #generic_add, but for subtraction.
+ */
+template<typename T, typename T2, int S>
+inline void generic_sub(T *__restrict dst, const T *a, const T *b)
+{
+  T2 carry = 0;
+  unroll<S>([&](auto i) {
+    const T2 ai = T2(a[i]);
+    const T2 bi = T2(b[i]);
+    const T2 ri = ai - bi - carry;
+    dst[i] = T(ri);
+    carry = ri > ai;
+  });
+}
+
+/** Similar to #generic_add, but for unsigned multiplication. */
+template<typename T, typename T2, int S>
+inline void generic_unsigned_mul(T *__restrict dst, const T *a, const T *b)
+{
+  constexpr int shift = 8 * sizeof(T);
+
+  T2 r[S] = {};
+
+  for (int i = 0; i < S; i++) {
+    const T2 bi = T2(b[i]);
+    T2 carry = 0;
+    for (int j = 0; j < S - i; j++) {
+      const T2 rji = T2(a[j]) * bi + carry;
+      carry = rji >> shift;
+      r[i + j] += T2(T(rji));
+    }
+  }
+
+  T2 carry = 0;
+  for (int i = 0; i < S; i++) {
+    const T2 ri = r[i] + carry;
+    carry = ri >> shift;
+    dst[i] = T(ri);
+  }
+}
+
+template<typename T, int Size, BLI_ENABLE_IF((!std::is_void_v<double_uint_type<T>>))>
+inline UIntF<T, Size> operator+(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  UIntF<T, Size> result;
+  generic_add<T, double_uint_type<T>, Size>(result.v.data(), a.v.data(), b.v.data());
+  return result;
+}
+
+template<typename T, int Size, BLI_ENABLE_IF((!std::is_void_v<double_uint_type<T>>))>
+inline IntF<T, Size> operator+(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  IntF<T, Size> result;
+  generic_add<T, double_uint_type<T>, Size>(result.v.data(), a.v.data(), b.v.data());
+  return result;
+}
+
+template<typename T, int Size>
+inline UIntF<T, Size> operator-(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  UIntF<T, Size> result;
+  generic_sub<T, double_uint_type<T>, Size>(result.v.data(), a.v.data(), b.v.data());
+  return result;
+}
+
+template<typename T, int Size>
+inline IntF<T, Size> operator-(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  IntF<T, Size> result;
+  generic_sub<T, double_uint_type<T>, Size>(result.v.data(), a.v.data(), b.v.data());
+  return result;
+}
+
+template<typename T, int Size, BLI_ENABLE_IF((!std::is_void_v<double_uint_type<T>>))>
+inline UIntF<T, Size> operator*(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  UIntF<T, Size> result;
+  generic_unsigned_mul<T, double_uint_type<T>, Size>(result.v.data(), a.v.data(), b.v.data());
+  return result;
+}
+
+/**
+ * Using this function is faster than using the comparison operator. Only a single bit has to be
+ * checked to determine if the value is negative.
+ */
+template<typename T, int Size> bool is_negative(const IntF<T, Size> &a)
+{
+  return (a.v[Size - 1] & (T(1) << (sizeof(T) * 8 - 1))) != 0;
+}
+
+template<typename T, int Size> inline bool is_zero(const UIntF<T, Size> &a)
+{
+  bool result = true;
+  unroll<Size>([&](auto i) { result &= (a.v[i] == 0); });
+  return result;
+}
+
+template<typename T, int Size> inline bool is_zero(const IntF<T, Size> &a)
+{
+  bool result = true;
+  unroll<Size>([&](auto i) { result &= (a.v[i] == 0); });
+  return result;
+}
+
+template<typename T, int Size, BLI_ENABLE_IF((!std::is_void_v<double_uint_type<T>>))>
+inline IntF<T, Size> operator*(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  using UIntF = UIntF<T, Size>;
+  using IntF = IntF<T, Size>;
+
+  /* Signed multiplication is implemented in terms of unsigned multiplication. */
+  const bool is_negative_a = is_negative(a);
+  const bool is_negative_b = is_negative(b);
+  if (is_negative_a && is_negative_b) {
+    return IntF(UIntF(-a) * UIntF(-b));
+  }
+  if (is_negative_a) {
+    return -IntF(UIntF(-a) * UIntF(b));
+  }
+  if (is_negative_b) {
+    return -IntF(UIntF(a) * UIntF(-b));
+  }
+  return IntF(UIntF(a) * UIntF(b));
+}
+
+template<typename T, int Size> inline IntF<T, Size> operator-(const IntF<T, Size> &a)
+{
+  IntF<T, Size> result;
+  for (int i = 0; i < Size; i++) {
+    result.v[i] = ~a.v[i];
+  }
+  return result + IntF<T, Size>(1);
+}
+
+template<typename T, int Size> inline void operator+=(UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  a = a + b;
+}
+
+template<typename T, int Size> inline void operator+=(IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  a = a + b;
+}
+
+template<typename T, int Size> inline void operator-=(UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  a = a - b;
+}
+
+template<typename T, int Size> inline void operator-=(IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  a = a - b;
+}
+
+template<typename T, int Size> inline void operator*=(UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  a = a * b;
+}
+
+template<typename T, int Size> inline void operator*=(IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  a = a * b;
+}
+
+template<typename T, int Size>
+inline bool operator==(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  return a.v == b.v;
+}
+
+template<typename T, int Size>
+inline bool operator==(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return a.v == b.v;
+}
+
+template<typename T, int Size>
+inline bool operator!=(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  return a.v != b.v;
+}
+
+template<typename T, int Size>
+inline bool operator!=(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return a.v != b.v;
+}
+
+template<typename T, size_t Size>
+inline int compare_reversed_order(const std::array<T, Size> &a, const std::array<T, Size> &b)
+{
+  for (int i = Size - 1; i >= 0; i--) {
+    if (a[i] < b[i]) {
+      return -1;
+    }
+    if (a[i] > b[i]) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+template<typename T, int Size>
+inline bool operator<(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  const bool is_negative_a = is_negative(a);
+  const bool is_negative_b = is_negative(b);
+  if (is_negative_a == is_negative_b) {
+    return compare_reversed_order(a.v, b.v) < 0;
+  }
+  return is_negative_a;
+}
+
+template<typename T, int Size>
+inline bool operator<=(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  const bool is_negative_a = is_negative(a);
+  const bool is_negative_b = is_negative(b);
+  if (is_negative_a == is_negative_b) {
+    return compare_reversed_order(a.v, b.v) <= 0;
+  }
+  return is_negative_a;
+}
+
+template<typename T, int Size>
+inline bool operator>(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  const bool is_negative_a = is_negative(a);
+  const bool is_negative_b = is_negative(b);
+  if (is_negative_a == is_negative_b) {
+    return compare_reversed_order(a.v, b.v) > 0;
+  }
+  return is_negative_b;
+}
+
+template<typename T, int Size>
+inline bool operator>=(const IntF<T, Size> &a, const IntF<T, Size> &b)
+{
+  const bool is_negative_a = is_negative(a);
+  const bool is_negative_b = is_negative(b);
+  if (is_negative_a == is_negative_b) {
+    return compare_reversed_order(a.v, b.v) >= 0;
+  }
+  return is_negative_b;
+}
+
+template<typename T, int Size>
+inline bool operator<(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return compare_reversed_order(a.v, b.v) < 0;
+}
+
+template<typename T, int Size>
+inline bool operator<=(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return compare_reversed_order(a.v, b.v) <= 0;
+}
+
+template<typename T, int Size>
+inline bool operator>(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return compare_reversed_order(a.v, b.v) > 0;
+}
+
+template<typename T, int Size>
+inline bool operator>=(const UIntF<T, Size> &a, const UIntF<T, Size> &b)
+{
+  return compare_reversed_order(a.v, b.v) >= 0;
+}
+
+}  // namespace blender::fixed_width_int
--- a/source/blender/blenlib/BLI_fixed_width_int_str.hh
+++ b/source/blender/blenlib/BLI_fixed_width_int_str.hh
@ -0,0 +1,75 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** Conversions to and from strings use GMP internally currently. */
+#ifdef WITH_GMP
+
+#  include <gmpxx.h>
+
+#  include "BLI_array.hh"
+#  include "BLI_fixed_width_int.hh"
+
+namespace blender::fixed_width_int {
+
+template<typename T, int S>
+inline void UIntF<T, S>::set_from_str(const StringRefNull str, const int base)
+{
+  mpz_t x;
+  mpz_init(x);
+  mpz_set_str(x, str.c_str(), base);
+  for (int i = 0; i < S; i++) {
+    static_assert(sizeof(T) <= sizeof(decltype(mpz_get_ui(x))));
+    this->v[i] = T(mpz_get_ui(x));
+    mpz_div_2exp(x, x, 8 * sizeof(T));
+  }
+  mpz_clear(x);
+}
+
+template<typename T, int S>
+inline void IntF<T, S>::set_from_str(const StringRefNull str, const int base)
+{
+  if (str[0] == '-') {
+    const UIntF<T, S> unsigned_value(str.c_str() + 1, base);
+    this->v = unsigned_value.v;
+    *this = -*this;
+  }
+  else {
+    const UIntF<T, S> unsigned_value(str.c_str(), base);
+    this->v = unsigned_value.v;
+  }
+}
+
+template<typename T, int S> inline std::string UIntF<T, S>::to_string(const int base) const
+{
+  mpz_t x;
+  mpz_init(x);
+  for (int i = S - 1; i >= 0; i--) {
+    static_assert(sizeof(T) <= sizeof(decltype(mpz_get_ui(x))));
+    mpz_mul_2exp(x, x, 8 * sizeof(T));
+    mpz_add_ui(x, x, this->v[i]);
+  }
+  /* Add 2 because of possible +/- sign and null terminator. */
+  /* Also see https://gmplib.org/manual/Converting-Integers. */
+  const int str_size = mpz_sizeinbase(x, base) + 2;
+  Array<char, 1024> str(str_size);
+  mpz_get_str(str.data(), base, x);
+  mpz_clear(x);
+  return std::string(str.data());
+}
+
+template<typename T, int S> inline std::string IntF<T, S>::to_string(const int base) const
+{
+  if (is_negative(*this)) {
+    std::string str = UIntF<T, S>(-*this);
+    str.insert(str.begin(), '-');
+    return str;
+  }
+  return UIntF<T, S>(*this).to_string();
+}
+
+}  // namespace blender::fixed_width_int
+
+#endif /* WITH_GMP */
--- a/source/blender/blenlib/BLI_math_vector_types.hh
+++ b/source/blender/blenlib/BLI_math_vector_types.hh
@ -13,6 +13,7 @@
 #include <ostream>
 #include <type_traits>

+#include "BLI_unroll.hh"
 #include "BLI_utildefines.h"

 namespace blender {
@ -41,21 +42,6 @@ template<typename T> struct vec_struct_base<T, 4> {
  T x, y, z, w;
 };

-template<class Fn, size_t... I> void unroll_impl(Fn fn, std::index_sequence<I...> /*indices*/)
-{
-  (fn(I), ...);
-}
-
-/**
- * Variadic templates are used to unroll loops manually. This helps GCC avoid branching during math
- * operations and makes the code generation more explicit and predictable. Unrolling should always
- * be worth it because the vector size is expected to be small.
- */
-template<int N, class Fn> void unroll(Fn fn)
-{
-  unroll_impl(fn, std::make_index_sequence<N>());
-}
-
 namespace math {

 template<typename T> uint64_t vector_hash(const T &vec)
--- a/source/blender/blenlib/BLI_rand.hh
+++ b/source/blender/blenlib/BLI_rand.hh
@ -56,6 +56,11 @@ class RandomNumberGenerator {
    return int32_t(x_ >> 17);
  }

+  uint64_t get_uint64()
+  {
+    return (uint64_t(this->get_uint32()) << 32) | this->get_uint32();
+  }
+
  /**
   * \return Random value (0..N), but never N.
   */
--- a/source/blender/blenlib/BLI_unroll.hh
+++ b/source/blender/blenlib/BLI_unroll.hh
@ -0,0 +1,26 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_utildefines.h"
+
+namespace blender {
+
+template<class Fn, size_t... I> void unroll_impl(Fn fn, std::index_sequence<I...> /*indices*/)
+{
+  (fn(I), ...);
+}
+
+/**
+ * Variadic templates are used to unroll loops manually. This helps GCC avoid branching during math
+ * operations and makes the code generation more explicit and predictable. Unrolling should always
+ * be worth it because the vector size is expected to be small.
+ */
+template<int N, class Fn> void unroll(Fn fn)
+{
+  unroll_impl(fn, std::make_index_sequence<N>());
+}
+
+}  // namespace blender
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@ -229,6 +229,8 @@ set(SRC
  BLI_fileops.hh
  BLI_fileops_types.h
  BLI_filereader.h
+  BLI_fixed_width_int.hh
+  BLI_fixed_width_int_str.hh
  BLI_fnmatch.h
  BLI_function_ref.hh
  BLI_generic_array.hh
@ -373,6 +375,7 @@ set(SRC
  BLI_timeit.hh
  BLI_timer.h
  BLI_unique_sorted_indices.hh
+  BLI_unroll.hh
  BLI_utildefines.h
  BLI_utildefines_iter.h
  BLI_utildefines_stack.h
@ -508,6 +511,7 @@ if(WITH_GTESTS)
    tests/BLI_disjoint_set_test.cc
    tests/BLI_expr_pylike_eval_test.cc
    tests/BLI_fileops_test.cc
+    tests/BLI_fixed_width_int_test.cc
    tests/BLI_function_ref_test.cc
    tests/BLI_generic_array_test.cc
    tests/BLI_generic_span_test.cc
--- a/source/blender/blenlib/tests/BLI_fixed_width_int_test.cc
+++ b/source/blender/blenlib/tests/BLI_fixed_width_int_test.cc
@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+#include "BLI_fixed_width_int.hh"
+#include "BLI_fixed_width_int_str.hh"
+#include "BLI_rand.hh"
+#include "BLI_timeit.hh"
+#include "BLI_vector.hh"
+
+#include "testing/testing.h"
+
+/* See `BLI_fixed_width_int_str.hh` for why this is necessary.  */
+#ifdef WITH_GMP
+
+namespace blender::fixed_width_int::tests {
+
+TEST(fixed_width_int, IsZero)
+{
+  EXPECT_TRUE(is_zero(UInt256(0)));
+  EXPECT_TRUE(is_zero(UInt256(10) - UInt256(10)));
+  EXPECT_TRUE(is_zero(UInt256(10) - UInt256(15) + UInt256(5)));
+  EXPECT_FALSE(is_zero(UInt256(10)));
+
+  EXPECT_TRUE(is_zero(Int256(0)));
+  EXPECT_TRUE(is_zero(Int256(10) - Int256(10)));
+  EXPECT_TRUE(is_zero(Int256(10) - Int256(15) + Int256(5)));
+  EXPECT_FALSE(is_zero(Int256(10)));
+  EXPECT_FALSE(is_zero(Int256(-10)));
+}
+
+TEST(fixed_width_int, Add256)
+{
+  EXPECT_EQ(UInt256("290213998554153310989149424513459608072") +
+                UInt256("236559186774771353723629567597011581379"),
+            UInt256("526773185328924664712778992110471189451"));
+  EXPECT_EQ(UInt256("211377365172829431692550347604827003294") +
+                UInt256("151035310604094577723885879186052138391"),
+            UInt256("362412675776924009416436226790879141685"));
+  EXPECT_EQ(UInt256("34490924248914309185690728897294455642") +
+                UInt256("151329651396698072567782489740109235288"),
+            UInt256("185820575645612381753473218637403690930"));
+  EXPECT_EQ(UInt256("23020790973174243895398009931650855178") +
+                UInt256("242538071468046767660828531945711005380"),
+            UInt256("265558862441221011556226541877361860558"));
+  EXPECT_EQ(UInt256("220030846719277288761017165278417179519") +
+                UInt256("13817458575896368146281651263001012349"),
+            UInt256("233848305295173656907298816541418191868"));
+  EXPECT_EQ(UInt256("225958958932723616286848406010143428110") +
+                UInt256("309322190961572274983773819144991425669"),
+            UInt256("535281149894295891270622225155134853779"));
+  EXPECT_EQ(UInt256("166851370558999106635673647011389012481") +
+                UInt256("85443075281725354911889976920463997722"),
+            UInt256("252294445840724461547563623931853010203"));
+  EXPECT_EQ(UInt256("274485954517155769304275705148933346392") +
+                UInt256("215279677420695754877443907998549347900"),
+            UInt256("489765631937851524181719613147482694292"));
+  EXPECT_EQ(UInt256("3522191569845770793524407096643088669") +
+                UInt256("100106234023644716469012457480771518776"),
+            UInt256("103628425593490487262536864577414607445"));
+  EXPECT_EQ(UInt256("163994307071630654616433355844082912619") +
+                UInt256("263001956277142014131208604303902541977"),
+            UInt256("426996263348772668747641960147985454596"));
+}
+
+TEST(fixed_width_int, Fuzzy)
+{
+  RandomNumberGenerator rng;
+  for ([[maybe_unused]] const int i : IndexRange(10000)) {
+    {
+      const uint64_t a = rng.get_uint64();
+      const uint64_t b = rng.get_uint64();
+      EXPECT_EQ(a + b, uint64_t(UInt64_8(a) + UInt64_8(b)));
+      EXPECT_EQ(a * b, uint64_t(UInt64_8(a) * UInt64_8(b)));
+      EXPECT_EQ(a - b, uint64_t(UInt64_8(a) - UInt64_8(b)));
+      EXPECT_EQ(a < b, UInt64_8(a) < UInt64_8(b));
+      EXPECT_EQ(a > b, UInt64_8(a) > UInt64_8(b));
+      EXPECT_EQ(a <= b, UInt64_8(a) <= UInt64_8(b));
+      EXPECT_EQ(a >= b, UInt64_8(a) >= UInt64_8(b));
+      EXPECT_EQ(a == b, UInt64_8(a) == UInt64_8(b));
+      EXPECT_EQ(a != b, UInt64_8(a) != UInt64_8(b));
+      EXPECT_FLOAT_EQ(double(a), double(UInt64_8(a)));
+      EXPECT_FLOAT_EQ(float(a), float(UInt64_8(a)));
+    }
+    {
+      const int64_t a = int64_t(rng.get_uint64()) * (rng.get_float() < 0.5f ? -1 : 1);
+      const int64_t b = int64_t(rng.get_uint64()) * (rng.get_float() < 0.5f ? -1 : 1);
+      EXPECT_EQ(a + b, int64_t(Int64_8(a) + Int64_8(b)));
+      EXPECT_EQ(a * b, int64_t(Int64_8(a) * Int64_8(b)));
+      EXPECT_EQ(a - b, int64_t(Int64_8(a) - Int64_8(b)));
+      EXPECT_EQ(a < b, Int64_8(a) < Int64_8(b));
+      EXPECT_EQ(a > b, Int64_8(a) > Int64_8(b));
+      EXPECT_EQ(a <= b, Int64_8(a) <= Int64_8(b));
+      EXPECT_EQ(a >= b, Int64_8(a) >= Int64_8(b));
+      EXPECT_EQ(a == b, Int64_8(a) == Int64_8(b));
+      EXPECT_EQ(a != b, Int64_8(a) != Int64_8(b));
+      EXPECT_EQ(a == 0, is_zero(Int64_8(a)));
+      EXPECT_EQ(b == 0, is_zero(Int64_8(b)));
+      EXPECT_EQ(a < 0, is_negative(Int64_8(a)));
+      EXPECT_EQ(b < 0, is_negative(Int64_8(b)));
+      EXPECT_FLOAT_EQ(double(a), double(Int64_8(a)));
+      EXPECT_FLOAT_EQ(float(a), float(Int64_8(a)));
+    }
+  }
+}
+
+}  // namespace blender::fixed_width_int::tests
+
+#endif /* WITH_GMP */