76 lines
1.9 KiB
C
76 lines
1.9 KiB
C
|
#pragma once
|
|||
|
|
|||
|
// We should template this to use __popcnt64 if available
|
|||
|
// but that is premature optimization
|
|||
|
inline uint64_t bitcount(uint64_t c) {
|
|||
|
c = c - ((c >> 1) & 0x5555555555555555);
|
|||
|
c = ((c >> 2) & 0x3333333333333333) +
|
|||
|
(c & 0x3333333333333333);
|
|||
|
c = ((c >> 4) + c) & 0x0F0F0F0F0F0F0F0F;
|
|||
|
c = ((c >> 8) + c) & 0x00FF00FF00FF00FF;
|
|||
|
c = ((c >> 16) + c) & 0x0000FFFF0000FFFF;
|
|||
|
c = ((c >> 32) + c) & 0x00000000FFFFFFFF;
|
|||
|
return c;
|
|||
|
}
|
|||
|
|
|||
|
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog "Bit Hacks"
|
|||
|
// Find ⌊log2⌋
|
|||
|
// We should template this to use lzcnt64, __builtin_clz or _BitScanReverse if available,
|
|||
|
// but that is premature optimization.
|
|||
|
inline auto rounded_log2(uint32_t v) {
|
|||
|
// This algorithm extends to 64 bits, by adding a step, shrinks to sixteen bits by removing a step.
|
|||
|
decltype(v) r{ 0 }, s;
|
|||
|
// This redundant initialization and redundant |= of r can be eliminated,
|
|||
|
// but eliminating it obfuscates the simplicity of the algorithm.
|
|||
|
s = (v > 0xFFFF) << 4; v >>= s; r |= s;
|
|||
|
s = (v > 0x00FF) << 3; v >>= s; r |= s;
|
|||
|
s = (v > 0x000F) << 2; v >>= s; r |= s;
|
|||
|
s = (v > 0x0003) << 1; v >>= s; r |= s;
|
|||
|
r |= (v >> 1);
|
|||
|
// result of ⌊log2(v)⌋ is in r
|
|||
|
return r;
|
|||
|
}
|
|||
|
|
|||
|
// For trailing bits, consider int __builtin_ctz (unsigned int x)
|
|||
|
// http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear
|
|||
|
|
|||
|
// Count the consecutive trailing zero bits
|
|||
|
inline auto trailing_zero_bits(uint64_t v) {
|
|||
|
unsigned int c;
|
|||
|
if (v & 0x3F) {
|
|||
|
v = (v ^ (v - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest
|
|||
|
for (c = 0; v; c++) {
|
|||
|
v >>= 1;
|
|||
|
}
|
|||
|
}
|
|||
|
else {
|
|||
|
c = 1;
|
|||
|
if ((v & 0xffffffff) == 0) {
|
|||
|
v >>= 32;
|
|||
|
c += 32;
|
|||
|
}
|
|||
|
if ((v & 0xffff) == 0) {
|
|||
|
v >>= 16;
|
|||
|
c += 16;
|
|||
|
}
|
|||
|
if ((v & 0xff) == 0){
|
|||
|
v >>= 8;
|
|||
|
c += 8;
|
|||
|
}
|
|||
|
if ((v & 0xf) == 0){
|
|||
|
v >>= 4;
|
|||
|
c += 4;
|
|||
|
}
|
|||
|
if ((v & 0x3) == 0) {
|
|||
|
v >>= 2;
|
|||
|
c += 2;
|
|||
|
}
|
|||
|
if ((v & 0x1) == 0) {
|
|||
|
v >>= 1;
|
|||
|
c += 1;
|
|||
|
}
|
|||
|
c -= v & 0x01;
|
|||
|
}
|
|||
|
return c;
|
|||
|
}
|