// Copyright 2002 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS-IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef S2_UTIL_BITS_BITS_H_ #define S2_UTIL_BITS_BITS_H_ // // Various bit-twiddling functions, all of which are static members of the Bits // class (making it effectively a namespace). Operands are unsigned integers. // Munging bits in _signed_ integers is fraught with peril! For example, // -5 << n has undefined behavior (for some values of n). // // Bits provide the following: // // * Count(Ones.*|LeadingZeros.*)? . In a similar vein, there's also the // Find[LM]SBSetNonZero.* family of functions. You can think of them as // (trailing|leading) zero bit count + 1. Also in a similar vein, // (Capped)?Difference, which count the number of one bits in foo ^ bar. // // * ReverseBits${power_of_two} // // * Log2(Floor|Ceiling)(NonZero)?.* - The NonZero variants have undefined // behavior if argument is 0. // // * Bytes(ContainByte(LessThan)?|AllInRange) - These scan a sequence of bytes // looking for one with(out)? some property. // // * (Get|Set|Copy)Bits // // * GetLowBits - Extract N lowest bits from value. // // The only other thing is BitPattern, which is a trait class template (not in // Bits) containing a few bit patterns (which vary based on value of template // parameter). #include "s2/third_party/absl/base/casts.h" #include "s2/third_party/absl/numeric/int128.h" #if defined(__i386__) || defined(__x86_64__) #include #endif #include #include "s2/base/integral_types.h" #include "s2/base/logging.h" #include "s2/base/port.h" #include "s2/third_party/absl/base/macros.h" class Bits { public: // A traits class template for unsigned integer type sizes. Primary // information contained herein is corresponding (unsigned) integer type. // E.g. UnsignedTypeBySize<32>::Type is uint32. Used by UnsignedType. template struct UnsignedTypeBySize; // Auxiliary struct for figuring out an unsigned type for a given type. template struct UnsignedType { typedef typename UnsignedTypeBySize::Type Type; }; // Return the number of one bits in the given integer. static int CountOnesInByte(unsigned char n); static int CountOnes(uint32 n) { #if defined(__powerpc64__) && defined(__GNUC__) // Use popcount builtin if we know it is inlined and fast. return PopcountWithBuiltin(n); #elif (defined(__i386__) || defined(__x86_64__)) && defined(__POPCNT__) && \ defined(__GNUC__) return PopcountWithBuiltin(n); #else n -= ((n >> 1) & 0x55555555); n = ((n >> 2) & 0x33333333) + (n & 0x33333333); return static_cast((((n + (n >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24); #endif } // Count bits using sideways addition [WWG'57]. See Knuth TAOCP v4 7.1.3(59) static inline int CountOnes64(uint64 n) { #if defined(__powerpc64__) && defined(__GNUC__) return PopcountWithBuiltin(n); #elif defined(__x86_64__) && defined(__POPCNT__) && defined(__GNUC__) return PopcountWithBuiltin(n); #elif defined(_LP64) n -= (n >> 1) & 0x5555555555555555ULL; n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL); return static_cast( (((n + (n >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); #else return CountOnes(n >> 32) + CountOnes(n & 0xffffffff); #endif } // Count bits in uint128 static inline int CountOnes128(absl::uint128 n) { return Bits::CountOnes64(absl::Uint128High64(n)) + Bits::CountOnes64(absl::Uint128Low64(n)); } // Count leading zeroes. This is similar to wordsize - 1 - floor(log2(n)). // Returns number of bits if n is 0. static inline int CountLeadingZeros32(uint32 n) { // Instead of using __builtin_clz(), we explicitly write target specific // assembly because we want to handle n == 0. If we used __builtin_clz(), // we would need to use something like "n ? __builtin_clz(n) : 32". The // check is not necessary on POWER and aarch64 but we cannot depend on // that because __builtin_clz(0) is documented to be undefined. #if defined(__aarch64__) && defined(__GNUC__) int32 count; asm("clz %w0,%w1" : "=r"(count) : "r"(n)); return count; #elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \ defined(__GNUC__) return __lzcnt32(n); #elif (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) if (n == 0) return 32; int32 idx; asm("bsr %1, %0" : "=r"(idx) : "ro"(n) : "cc"); // bsr writes Z flag return 31 ^ idx; #elif defined(__powerpc64__) && defined(__GNUC__) int32 count; asm("cntlzw %0,%1" : "=r"(count) : "r"(n)); return count; #elif defined(__GNUC__) return CountLeadingZerosWithBuiltin(n); #else return CountLeadingZeros32_Portable(n); #endif } static inline int CountLeadingZeros64(uint64 n) { #if defined(__aarch64__) && defined(__GNUC__) int64 count; asm("clz %0,%1" : "=r"(count) : "r"(n)); return static_cast(count); #elif defined(__powerpc64__) && defined(__GNUC__) int64 count; asm("cntlzd %0,%1" : "=r"(count) : "r"(n)); return static_cast(count); #elif (defined(__i386__) || defined(__x86_64__)) && defined(__LZCNT__) && \ defined(__GNUC__) return __lzcnt64(n); #elif defined(__x86_64__) && defined(__GNUC__) if (n == 0) return 64; int64 idx; asm ("bsr %1, %0" : "=r"(idx) : "ro"(n) : "cc"); // bsr writes Z flag return static_cast(63 ^ idx); #elif defined(__GNUC__) return CountLeadingZerosWithBuiltin(n); #else return CountLeadingZeros64_Portable(n); #endif } static inline int CountLeadingZeros128(absl::uint128 n) { if (uint64 hi = absl::Uint128High64(n)) return Bits::CountLeadingZeros64(hi); return Bits::CountLeadingZeros64(absl::Uint128Low64(n)) + 64; } // Reverse the bits in the given integer. static uint8 ReverseBits8(uint8 n); static uint32 ReverseBits32(uint32 n); static uint64 ReverseBits64(uint64 n); static absl::uint128 ReverseBits128(absl::uint128 n); // Return the number of one bits in the byte sequence. static int Count(const void *m, int num_bytes); // Return the number of different bits in the given byte sequences. // (i.e., the Hamming distance) static int Difference(const void *m1, const void *m2, int num_bytes); // Return the number of different bits in the given byte sequences, // up to a maximum. Values larger than the maximum may be returned // (because multiple bits are checked at a time), but the function // may exit early if the cap is exceeded. static int CappedDifference(const void *m1, const void *m2, int num_bytes, int cap); // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. static int Log2Floor(uint32 n); static int Log2Floor64(uint64 n); static int Log2Floor128(absl::uint128 n); // Potentially faster version of Log2Floor() that returns an // undefined value if n == 0 static int Log2FloorNonZero(uint32 n); static int Log2FloorNonZero64(uint64 n); static int Log2FloorNonZero128(absl::uint128 n); // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. static int Log2Ceiling(uint32 n); static int Log2Ceiling64(uint64 n); static int Log2Ceiling128(absl::uint128 n); // Return the first set least / most significant bit, 0-indexed. Returns an // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except // that it's 0-indexed, while FindMSBSetNonZero() is the same as // Log2FloorNonZero(). static int FindLSBSetNonZero(uint32 n); static int FindLSBSetNonZero64(uint64 n); static int FindLSBSetNonZero128(absl::uint128 n); static int FindMSBSetNonZero(uint32 n) { return Log2FloorNonZero(n); } static int FindMSBSetNonZero64(uint64 n) { return Log2FloorNonZero64(n); } static int FindMSBSetNonZero128(absl::uint128 n) { return Log2FloorNonZero128(n); } // Viewing bytes as a stream of unsigned bytes, does that stream // contain any byte equal to c? template static bool BytesContainByte(T bytes, uint8 c); // Viewing bytes as a stream of unsigned bytes, does that stream // contain any byte b < c? template static bool BytesContainByteLessThan(T bytes, uint8 c); // Viewing bytes as a stream of unsigned bytes, are all elements of that // stream in [lo, hi]? template static bool BytesAllInRange(T bytes, uint8 lo, uint8 hi); // Extract 'nbits' consecutive bits from 'src'. Position of bits are // specified by 'offset' from the LSB. 'T' is a scalar type (integral, // float or pointer) whose size is the same as one of the unsigned types. // The return type is an unsigned type having the same size as T. template static typename UnsignedType::Type GetBits(const T src, const int offset, const int nbits) { typedef typename UnsignedType::Type UnsignedT; const UnsignedT unsigned_src = absl::bit_cast(src); S2_DCHECK_GT(sizeof(UnsignedT) * 8, offset); S2_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits); return GetBitsImpl(unsigned_src, offset, nbits); } // Overwrite 'nbits' consecutive bits of 'dest.'. Position of bits are // specified by an offset from the LSB. 'T' is a scalar type (integral, // float or pointer) whose size is the same as one of the unsigned types. template static void SetBits(const typename UnsignedType::Type value, const int offset, const int nbits, T* const dest) { typedef typename UnsignedType::Type UnsignedT; const UnsignedT unsigned_dest = absl::bit_cast(*dest); S2_DCHECK_GT(sizeof(UnsignedT) * 8, offset); S2_DCHECK_GE(sizeof(UnsignedT) * 8, offset + nbits); const UnsignedT mask = NBitsFromLSB(nbits); const UnsignedT unsigned_result = (unsigned_dest & ~(mask << offset)) | ((value & mask) << offset); *dest = absl::bit_cast(unsigned_result); } // Combine SetBits and GetBits for convenience. This is meant to be a // replacement for BitCopy() for some use cases. Unlike BitCopy(), // Bits::CopyBits() operating on multibyte types has the same behavior on // big-endian and little-endian machines. Sample usage: // // uint32 a, b; // Bits::CopyBits(&a, 0, b, 12, 3); template static void CopyBits(DestType* const dest, const int dest_offset, const SrcType src, const int src_offset, const int nbits) { const typename UnsignedType::Type value = GetBits(src, src_offset, nbits); SetBits(value, dest_offset, nbits, dest); } // Extract the lowest 'nbits' consecutive bits from 'src'. // Bits::GetLowBits(13, 3); /* = 5 (0b1101 => 0b101) */ template static typename UnsignedType::Type GetLowBits(const T src, const int nbits) { typedef typename UnsignedType::Type UnsignedT; const UnsignedT unsigned_src = absl::bit_cast(src); S2_DCHECK_GE(sizeof(UnsignedT) * 8, nbits); return GetLowBitsImpl(unsigned_src, nbits); } private: // We only use this for unsigned types and for 0 <= n <= sizeof(UnsignedT). template static UnsignedT NBitsFromLSB(const int nbits) { const UnsignedT all_ones = ~static_cast(0); return nbits == 0 ? static_cast(0) : all_ones >> (sizeof(UnsignedT) * 8 - nbits); } template static inline UnsignedT GetBitsImpl(const UnsignedT src, const int offset, const int nbits); template static inline UnsignedT GetLowBitsImpl(const UnsignedT src, const int nbits); #ifdef __GNUC__ static int CountLeadingZerosWithBuiltin(unsigned n); // NOLINTNEXTLINE(runtime/int) static int CountLeadingZerosWithBuiltin(unsigned long n); // NOLINTNEXTLINE(runtime/int) static int CountLeadingZerosWithBuiltin(unsigned long long n); static int PopcountWithBuiltin(unsigned n); static int PopcountWithBuiltin(unsigned long n); // NOLINT(runtime/int) static int PopcountWithBuiltin(unsigned long long n); // NOLINT(runtime/int) #if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__)) static inline uint32 GetBitsImpl(const uint32 src, const int offset, const int nbits); #endif #if defined(__BMI__) && defined(__x86_64__) static inline uint64 GetBitsImpl(const uint64 src, const int offset, const int nbits); #endif #if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__)) static inline uint32 GetLowBitsImpl(const uint32 src, const int nbits); #endif #if defined(__BMI2__) && defined(__x86_64__) static inline uint64 GetLowBitsImpl(const uint64 src, const int nbits); #endif #endif // __GNUC__ // Portable implementations. static int Log2Floor_Portable(uint32 n); static int Log2Floor64_Portable(uint64 n); static int Log2FloorNonZero_Portable(uint32 n); static int Log2FloorNonZero64_Portable(uint64 n); static int CountLeadingZeros32_Portable(uint32 n); static int CountLeadingZeros64_Portable(uint64 n); static int FindLSBSetNonZero_Portable(uint32 n); static int FindLSBSetNonZero64_Portable(uint64 n); static const char num_bits[]; Bits(Bits const&) = delete; void operator=(Bits const&) = delete; }; // A utility class for some handy bit patterns. The names l and h // were chosen to match Knuth Volume 4: l is 0x010101... and h is 0x808080...; // half_ones is ones in the lower half only. We assume sizeof(T) is 1 or even. template struct BitPattern { typedef typename std::make_unsigned::type U; static const U half_ones = (static_cast(1) << (sizeof(U) * 4)) - 1; static const U l = (sizeof(U) == 1) ? 1 : (half_ones / 0xff * (half_ones + 2)); static const U h = ~(l * 0x7f); }; // ------------------------------------------------------------------------ // Implementation details follow // ------------------------------------------------------------------------ #if defined(__GNUC__) inline int Bits::Log2Floor(uint32 n) { return n == 0 ? -1 : 31 ^ __builtin_clz(n); } inline int Bits::Log2FloorNonZero(uint32 n) { return 31 ^ __builtin_clz(n); } inline int Bits::FindLSBSetNonZero(uint32 n) { return __builtin_ctz(n); } inline int Bits::Log2Floor64(uint64 n) { return n == 0 ? -1 : 63 ^ __builtin_clzll(n); } inline int Bits::Log2FloorNonZero64(uint64 n) { return 63 ^ __builtin_clzll(n); } inline int Bits::FindLSBSetNonZero64(uint64 n) { return __builtin_ctzll(n); } #elif defined(_MSC_VER) inline int Bits::FindLSBSetNonZero(uint32 n) { return Bits::FindLSBSetNonZero_Portable(n); } inline int Bits::FindLSBSetNonZero64(uint64 n) { return Bits::FindLSBSetNonZero64_Portable(n); } inline int Bits::Log2FloorNonZero(uint32 n) { #ifdef _M_IX86 _asm { bsr ebx, n mov n, ebx } return n; #else return Bits::Log2FloorNonZero_Portable(n); #endif } inline int Bits::Log2Floor(uint32 n) { #ifdef _M_IX86 _asm { xor ebx, ebx mov eax, n and eax, eax jz return_ebx bsr ebx, eax return_ebx: mov n, ebx } return n; #else return Bits::Log2Floor_Portable(n); #endif } inline int Bits::Log2Floor64(uint64 n) { return Bits::Log2Floor64_Portable(n); } inline int Bits::Log2FloorNonZero64(uint64 n) { return Bits::Log2FloorNonZero64_Portable(n); } #else // !__GNUC__ && !_MSC_VER inline int Bits::Log2Floor(uint32 n) { return Bits::Log2Floor_Portable(n); } inline int Bits::Log2FloorNonZero(uint32 n) { return Bits::Log2FloorNonZero_Portable(n); } inline int Bits::FindLSBSetNonZero(uint32 n) { return Bits::FindLSBSetNonZero_Portable(n); } inline int Bits::Log2Floor64(uint64 n) { return Bits::Log2Floor64_Portable(n); } inline int Bits::Log2FloorNonZero64(uint64 n) { return Bits::Log2FloorNonZero64_Portable(n); } inline int Bits::FindLSBSetNonZero64(uint64 n) { return Bits::FindLSBSetNonZero64_Portable(n); } #endif inline int Bits::Log2Floor128(absl::uint128 n) { if (uint64 hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi); return Log2Floor64(absl::Uint128Low64(n)); } inline int Bits::Log2FloorNonZero128(absl::uint128 n) { if (uint64 hi = absl::Uint128High64(n)) return 64 + Log2FloorNonZero64(hi); return Log2FloorNonZero64(absl::Uint128Low64(n)); } inline int Bits::FindLSBSetNonZero128(absl::uint128 n) { if (uint64 lo = absl::Uint128Low64(n)) return Bits::FindLSBSetNonZero64(lo); return 64 + Bits::FindLSBSetNonZero64(absl::Uint128High64(n)); } inline int Bits::CountOnesInByte(unsigned char n) { return num_bits[n]; } inline uint8 Bits::ReverseBits8(unsigned char n) { #if defined(__aarch64__) && defined(__GNUC__) // aarch64 has a reverse bits instruction but there is no gcc builtin. uint32 result; const uint32 n_shifted = static_cast(n) << 24; asm("rbit %w0, %w1" : "=r"(result) : "r"(n_shifted)); return static_cast(result); #elif defined (__powerpc64__) uint64 temp = n; // bpermd selects a byte's worth of bits from its second input. Grab one byte // at a time, in reversed order. 0x3f is the lowest order bit of a 64-bit int. // Bits 0x0 through 0x37 will all be zero, and bits 0x38 through 0x3f will // hold the 8 bits from `n`. uint64 result = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp); return static_cast(result); #else n = static_cast(((n >> 1) & 0x55) | ((n & 0x55) << 1)); n = static_cast(((n >> 2) & 0x33) | ((n & 0x33) << 2)); return static_cast(((n >> 4) & 0x0f) | ((n & 0x0f) << 4)); #endif } inline uint32 Bits::ReverseBits32(uint32 n) { #if defined(__aarch64__) && defined(__GNUC__) uint32 result; asm("rbit %w0, %w1" : "=r"(result) : "r"(n)); return result; #elif defined(__powerpc64__) uint64 temp = n; uint64 result_0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, temp) << 24; uint64 result_1 = __builtin_bpermd(0x3736353433323130, temp) << 16; uint64 result_2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, temp) << 8; uint64 result_3 = __builtin_bpermd(0x2726252423222120, temp); return static_cast(result_0 | result_1 | result_2 | result_3); #else n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); return bswap_32(n); #endif } inline uint64 Bits::ReverseBits64(uint64 n) { #if defined(__aarch64__) && defined(__GNUC__) uint64 result; asm("rbit %0, %1" : "=r"(result) : "r"(n)); return result; #elif defined(__powerpc64__) uint64 result_lo0 = __builtin_bpermd(0x3f3e3d3c3b3a3938, n) << 56; uint64 result_lo1 = __builtin_bpermd(0x3736353433323130, n) << 48; uint64 result_lo2 = __builtin_bpermd(0x2f2e2d2c2b2a2928, n) << 40; uint64 result_lo3 = __builtin_bpermd(0x2726252423222120, n) << 32; uint64 result_hi0 = __builtin_bpermd(0x1f1e1d1c1b1a1918, n) << 24; uint64 result_hi1 = __builtin_bpermd(0x1716151413121110, n) << 16; uint64 result_hi2 = __builtin_bpermd(0x0f0e0d0c0b0a0908, n) << 8; uint64 result_hi3 = __builtin_bpermd(0x0706050403020100, n); return (result_lo0 | result_lo1 | result_lo2 | result_lo3 | result_hi0 | result_hi1 | result_hi2 | result_hi3); #elif defined(_LP64) n = ((n >> 1) & 0x5555555555555555ULL) | ((n & 0x5555555555555555ULL) << 1); n = ((n >> 2) & 0x3333333333333333ULL) | ((n & 0x3333333333333333ULL) << 2); n = ((n >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((n & 0x0F0F0F0F0F0F0F0FULL) << 4); return bswap_64(n); #else return ReverseBits32( n >> 32 ) | (static_cast(ReverseBits32(n & 0xffffffff)) << 32); #endif } inline absl::uint128 Bits::ReverseBits128(absl::uint128 n) { return absl::MakeUint128(ReverseBits64(absl::Uint128Low64(n)), ReverseBits64(absl::Uint128High64(n))); } inline int Bits::Log2FloorNonZero_Portable(uint32 n) { // Just use the common routine return Log2Floor(n); } // Log2Floor64() is defined in terms of Log2Floor32(), Log2FloorNonZero32() inline int Bits::Log2Floor64_Portable(uint64 n) { const uint32 topbits = static_cast(n >> 32); if (topbits == 0) { // Top bits are zero, so scan in bottom bits return Log2Floor(static_cast(n)); } else { return 32 + Log2FloorNonZero(topbits); } } // Log2FloorNonZero64() is defined in terms of Log2FloorNonZero32() inline int Bits::Log2FloorNonZero64_Portable(uint64 n) { const uint32 topbits = static_cast(n >> 32); if (topbits == 0) { // Top bits are zero, so scan in bottom bits return Log2FloorNonZero(static_cast(n)); } else { return 32 + Log2FloorNonZero(topbits); } } // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero() inline int Bits::FindLSBSetNonZero64_Portable(uint64 n) { const uint32 bottombits = static_cast(n); if (bottombits == 0) { // Bottom bits are zero, so scan in top bits return 32 + FindLSBSetNonZero(static_cast(n >> 32)); } else { return FindLSBSetNonZero(bottombits); } } template inline bool Bits::BytesContainByteLessThan(T bytes, uint8 c) { auto l = BitPattern::l; auto h = BitPattern::h; // The c <= 0x80 code is straight out of Knuth Volume 4. // Usually c will be manifestly constant. return c <= 0x80 ? ((h & (bytes - l * c) & ~bytes) != 0) : ((((bytes - l * c) | (bytes ^ h)) & h) != 0); } template inline bool Bits::BytesContainByte(T bytes, uint8 c) { // Usually c will be manifestly constant. return Bits::BytesContainByteLessThan(bytes ^ (c * BitPattern::l), 1); } template inline bool Bits::BytesAllInRange(T bytes, uint8 lo, uint8 hi) { auto l = BitPattern::l; auto h = BitPattern::h; // In the common case, lo and hi are manifest constants. if (lo > hi) { return false; } if (hi - lo < 128) { auto x = bytes - l * lo; auto y = bytes + l * (127 - hi); return ((x | y) & h) == 0; } return !Bits::BytesContainByteLessThan(bytes + (255 - hi) * l, lo + (255 - hi)); } // Specializations for Bits::UnsignedTypeBySize. For unsupported type // sizes, a compile-time error will be generated. template<> struct Bits::UnsignedTypeBySize<1> { typedef uint8 Type; }; template<> struct Bits::UnsignedTypeBySize<2> { typedef uint16 Type; }; template<> struct Bits::UnsignedTypeBySize<4> { typedef uint32 Type; }; template<> struct Bits::UnsignedTypeBySize<8> { typedef uint64 Type; }; template<> struct Bits::UnsignedTypeBySize<16> { typedef absl::uint128 Type; }; #ifdef __GNUC__ inline int Bits::CountLeadingZerosWithBuiltin(unsigned n) { if (n == 0) { return sizeof(n) * 8; // __builtin_clz(0) is undefined. } return __builtin_clz(n); } // NOLINTNEXTLINE(runtime/int) inline int Bits::CountLeadingZerosWithBuiltin(unsigned long n) { if (n == 0) { return sizeof(n) * 8; // __builtin_clzl(0) is undefined. } return __builtin_clzl(n); } // NOLINTNEXTLINE(runtime/int) inline int Bits::CountLeadingZerosWithBuiltin(unsigned long long n) { if (n == 0) { return sizeof(n) * 8; // __builtin_clzll(0) is undefined. } return __builtin_clzll(n); } inline int Bits::PopcountWithBuiltin(unsigned n) { return __builtin_popcount(n); } // NOLINTNEXTLINE(runtime/int) inline int Bits::PopcountWithBuiltin(unsigned long n) { return __builtin_popcountl(n); } // NOLINTNEXTLINE(runtime/int) inline int Bits::PopcountWithBuiltin(unsigned long long n) { return __builtin_popcountll(n); } #if defined(__BMI__) && (defined(__i386__) || defined(__x86_64__)) inline uint32 Bits::GetBitsImpl(const uint32 src, const int offset, const int nbits) { return _bextr_u32(src, offset, nbits); } #endif #if defined(__BMI__) && defined(__x86_64__) inline uint64 Bits::GetBitsImpl(const uint64 src, const int offset, const int nbits) { return _bextr_u64(src, offset, nbits); } #endif #if defined(__BMI2__) && (defined(__i386__) || defined(__x86_64__)) inline uint32 Bits::GetLowBitsImpl(const uint32 src, const int nbits) { return _bzhi_u32(src, nbits); } #endif #if defined(__BMI2__) && defined(__x86_64__) inline uint64 Bits::GetLowBitsImpl(const uint64 src, const int nbits) { return _bzhi_u64(src, nbits); } #endif #endif // __GNUC__ template inline UnsignedT Bits::GetBitsImpl(const UnsignedT src, const int offset, const int nbits) { const UnsignedT result = (src >> offset) & NBitsFromLSB(nbits); return result; } template inline UnsignedT Bits::GetLowBitsImpl(const UnsignedT src, const int nbits) { return GetBitsImpl(src, 0, nbits); } #endif // S2_UTIL_BITS_BITS_H_