#pragma once // We should template this to use __popcnt64 if available // but that is premature optimization inline uint64_t bitcount(uint64_t c) { c = c - ((c >> 1) & 0x5555555555555555); c = ((c >> 2) & 0x3333333333333333) + (c & 0x3333333333333333); c = ((c >> 4) + c) & 0x0F0F0F0F0F0F0F0F; c = ((c >> 8) + c) & 0x00FF00FF00FF00FF; c = ((c >> 16) + c) & 0x0000FFFF0000FFFF; c = ((c >> 32) + c) & 0x00000000FFFFFFFF; return c; } // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog "Bit Hacks" // Find ⌊log2⌋ // We should template this to use lzcnt64, __builtin_clz or _BitScanReverse if available, // but that is premature optimization. inline auto rounded_log2(uint32_t v) { // This algorithm extends to 64 bits, by adding a step, shrinks to sixteen bits by removing a step. decltype(v) r{ 0 }, s; // This redundant initialization and redundant |= of r can be eliminated, // but eliminating it obfuscates the simplicity of the algorithm. s = (v > 0xFFFF) << 4; v >>= s; r |= s; s = (v > 0x00FF) << 3; v >>= s; r |= s; s = (v > 0x000F) << 2; v >>= s; r |= s; s = (v > 0x0003) << 1; v >>= s; r |= s; r |= (v >> 1); // result of ⌊log2(v)⌋ is in r return r; } // For trailing bits, consider int __builtin_ctz (unsigned int x) // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear // Count the consecutive trailing zero bits inline auto trailing_zero_bits(uint64_t v) { unsigned int c; if (v & 0x3F) { v = (v ^ (v - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest for (c = 0; v; c++) { v >>= 1; } } else { c = 1; if ((v & 0xffffffff) == 0) { v >>= 32; c += 32; } if ((v & 0xffff) == 0) { v >>= 16; c += 16; } if ((v & 0xff) == 0){ v >>= 8; c += 8; } if ((v & 0xf) == 0){ v >>= 4; c += 4; } if ((v & 0x3) == 0) { v >>= 2; c += 2; } if ((v & 0x1) == 0) { v >>= 1; c += 1; } c -= v & 0x01; } return c; }