clang-r487747c/python3/include/internal/pycore_bitutils.h - platform/prebuilts/clang/host/windows-x86 - Gitiles

 /* Bit and bytes utilities.

    Bytes swap functions, reverse order of bytes:

    - _Py_bswap16(uint16_t)
    - _Py_bswap32(uint32_t)
    - _Py_bswap64(uint64_t)
 */

 #ifndef Py_INTERNAL_BITUTILS_H
 #define Py_INTERNAL_BITUTILS_H
 #ifdef __cplusplus
 extern "C" {
 #endif

 #ifndef Py_BUILD_CORE
 #  error "this header requires Py_BUILD_CORE define"
 #endif

 #if defined(__GNUC__) \
       && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
    /* __builtin_bswap16() is available since GCC 4.8,
       __builtin_bswap32() is available since GCC 4.3,
       __builtin_bswap64() is available since GCC 4.3. */
 #  define _PY_HAVE_BUILTIN_BSWAP
 #endif

 #ifdef _MSC_VER
    /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
 #  include <intrin.h>
 #endif

 static inline uint16_t
 _Py_bswap16(uint16_t word)
 {
 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
     return __builtin_bswap16(word);
 #elif defined(_MSC_VER)
     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
     return _byteswap_ushort(word);
 #else
     // Portable implementation which doesn't rely on circular bit shift
     return ( ((word & UINT16_C(0x00FF)) << 8)
            | ((word & UINT16_C(0xFF00)) >> 8));
 #endif
 }

 static inline uint32_t
 _Py_bswap32(uint32_t word)
 {
 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
     return __builtin_bswap32(word);
 #elif defined(_MSC_VER)
     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
     return _byteswap_ulong(word);
 #else
     // Portable implementation which doesn't rely on circular bit shift
     return ( ((word & UINT32_C(0x000000FF)) << 24)
            | ((word & UINT32_C(0x0000FF00)) <<  8)
            | ((word & UINT32_C(0x00FF0000)) >>  8)
            | ((word & UINT32_C(0xFF000000)) >> 24));
 #endif
 }

 static inline uint64_t
 _Py_bswap64(uint64_t word)
 {
 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
     return __builtin_bswap64(word);
 #elif defined(_MSC_VER)
     return _byteswap_uint64(word);
 #else
     // Portable implementation which doesn't rely on circular bit shift
     return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
            | ((word & UINT64_C(0x000000000000FF00)) << 40)
            | ((word & UINT64_C(0x0000000000FF0000)) << 24)
            | ((word & UINT64_C(0x00000000FF000000)) <<  8)
            | ((word & UINT64_C(0x000000FF00000000)) >>  8)
            | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
            | ((word & UINT64_C(0x00FF000000000000)) >> 40)
            | ((word & UINT64_C(0xFF00000000000000)) >> 56));
 #endif
 }


 // Population count: count the number of 1's in 'x'
 // (number of bits set to 1), also known as the hamming weight.
 //
 // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
 // can be used, to keep the implementation simple. For example, Visual Studio
 // __popcnt() is not used this reason. The clang and GCC builtin function can
 // use the x86 POPCNT instruction if the target architecture has SSE4a or
 // newer.
 static inline int
 _Py_popcount32(uint32_t x)
 {
 #if (defined(__clang__) || defined(__GNUC__))

 #if SIZEOF_INT >= 4
     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
     return __builtin_popcount(x);
 #else
     // The C standard guarantees that unsigned long will always be big enough
     // to hold a uint32_t value without losing information.
     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
     return __builtin_popcountl(x);
 #endif

 #else
     // 32-bit SWAR (SIMD Within A Register) popcount

     // Binary: 0 1 0 1 ...
     const uint32_t M1 = 0x55555555;
     // Binary: 00 11 00 11. ..
     const uint32_t M2 = 0x33333333;
     // Binary: 0000 1111 0000 1111 ...
     const uint32_t M4 = 0x0F0F0F0F;
     // 256**4 + 256**3 + 256**2 + 256**1
     const uint32_t SUM = 0x01010101;

     // Put count of each 2 bits into those 2 bits
     x = x - ((x >> 1) & M1);
     // Put count of each 4 bits into those 4 bits
     x = (x & M2) + ((x >> 2) & M2);
     // Put count of each 8 bits into those 8 bits
     x = (x + (x >> 4)) & M4;
     // Sum of the 4 byte counts
     return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24;
 #endif
 }


 // Return the index of the most significant 1 bit in 'x'. This is the smallest
 // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
 static inline int
 _Py_bit_length(unsigned long x)
 {
 #if (defined(__clang__) || defined(__GNUC__))
     if (x != 0) {
         // __builtin_clzl() is available since GCC 3.4.
         // Undefined behavior for x == 0.
         return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
     }
     else {
         return 0;
     }
 #elif defined(_MSC_VER)
     // _BitScanReverse() is documented to search 32 bits.
     Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
     unsigned long msb;
     if (_BitScanReverse(&msb, x)) {
         return (int)msb + 1;
     }
     else {
         return 0;
     }
 #else
     const int BIT_LENGTH_TABLE[32] = {
         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
     };
     int msb = 0;
     while (x >= 32) {
         msb += 6;
         x >>= 6;
     }
     msb += BIT_LENGTH_TABLE[x];
     return msb;
 #endif
 }


 #ifdef __cplusplus
 }
 #endif
 #endif /* !Py_INTERNAL_BITUTILS_H */
	/* Bit and bytes utilities.

	Bytes swap functions, reverse order of bytes:

	- _Py_bswap16(uint16_t)
	- _Py_bswap32(uint32_t)
	- _Py_bswap64(uint64_t)
	*/

	#ifndef Py_INTERNAL_BITUTILS_H
	#define Py_INTERNAL_BITUTILS_H
	#ifdef __cplusplus
	extern "C" {
	#endif

	#ifndef Py_BUILD_CORE
	# error "this header requires Py_BUILD_CORE define"
	#endif

	#if defined(__GNUC__) \
	&& ((__GNUC__ >= 5) \|\| (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
	/* __builtin_bswap16() is available since GCC 4.8,
	__builtin_bswap32() is available since GCC 4.3,
	__builtin_bswap64() is available since GCC 4.3. */
	# define _PY_HAVE_BUILTIN_BSWAP
	#endif

	#ifdef _MSC_VER
	/* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
	# include <intrin.h>
	#endif

	static inline uint16_t
	_Py_bswap16(uint16_t word)
	{
	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap16)
	return __builtin_bswap16(word);
	#elif defined(_MSC_VER)
	Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
	return _byteswap_ushort(word);
	#else
	// Portable implementation which doesn't rely on circular bit shift
	return ( ((word & UINT16_C(0x00FF)) << 8)
	\| ((word & UINT16_C(0xFF00)) >> 8));
	#endif
	}

	static inline uint32_t
	_Py_bswap32(uint32_t word)
	{
	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap32)
	return __builtin_bswap32(word);
	#elif defined(_MSC_VER)
	Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
	return _byteswap_ulong(word);
	#else
	// Portable implementation which doesn't rely on circular bit shift
	return ( ((word & UINT32_C(0x000000FF)) << 24)
	\| ((word & UINT32_C(0x0000FF00)) << 8)
	\| ((word & UINT32_C(0x00FF0000)) >> 8)
	\| ((word & UINT32_C(0xFF000000)) >> 24));
	#endif
	}

	static inline uint64_t
	_Py_bswap64(uint64_t word)
	{
	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap64)
	return __builtin_bswap64(word);
	#elif defined(_MSC_VER)
	return _byteswap_uint64(word);
	#else
	// Portable implementation which doesn't rely on circular bit shift
	return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
	\| ((word & UINT64_C(0x000000000000FF00)) << 40)
	\| ((word & UINT64_C(0x0000000000FF0000)) << 24)
	\| ((word & UINT64_C(0x00000000FF000000)) << 8)
	\| ((word & UINT64_C(0x000000FF00000000)) >> 8)
	\| ((word & UINT64_C(0x0000FF0000000000)) >> 24)
	\| ((word & UINT64_C(0x00FF000000000000)) >> 40)
	\| ((word & UINT64_C(0xFF00000000000000)) >> 56));
	#endif
	}


	// Population count: count the number of 1's in 'x'
	// (number of bits set to 1), also known as the hamming weight.
	//
	// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
	// can be used, to keep the implementation simple. For example, Visual Studio
	// __popcnt() is not used this reason. The clang and GCC builtin function can
	// use the x86 POPCNT instruction if the target architecture has SSE4a or
	// newer.
	static inline int
	_Py_popcount32(uint32_t x)
	{
	#if (defined(__clang__) \|\| defined(__GNUC__))

	#if SIZEOF_INT >= 4
	Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
	return __builtin_popcount(x);
	#else
	// The C standard guarantees that unsigned long will always be big enough
	// to hold a uint32_t value without losing information.
	Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
	return __builtin_popcountl(x);
	#endif

	#else
	// 32-bit SWAR (SIMD Within A Register) popcount

	// Binary: 0 1 0 1 ...
	const uint32_t M1 = 0x55555555;
	// Binary: 00 11 00 11. ..
	const uint32_t M2 = 0x33333333;
	// Binary: 0000 1111 0000 1111 ...
	const uint32_t M4 = 0x0F0F0F0F;
	// 2564 + 2563 + 2562 + 2561
	const uint32_t SUM = 0x01010101;

	// Put count of each 2 bits into those 2 bits
	x = x - ((x >> 1) & M1);
	// Put count of each 4 bits into those 4 bits
	x = (x & M2) + ((x >> 2) & M2);
	// Put count of each 8 bits into those 8 bits
	x = (x + (x >> 4)) & M4;
	// Sum of the 4 byte counts
	return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24;
	#endif
	}


	// Return the index of the most significant 1 bit in 'x'. This is the smallest
	// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
	static inline int
	_Py_bit_length(unsigned long x)
	{
	#if (defined(__clang__) \|\| defined(__GNUC__))
	if (x != 0) {
	// __builtin_clzl() is available since GCC 3.4.
	// Undefined behavior for x == 0.
	return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
	}
	else {
	return 0;
	}
	#elif defined(_MSC_VER)
	// _BitScanReverse() is documented to search 32 bits.
	Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
	unsigned long msb;
	if (_BitScanReverse(&msb, x)) {
	return (int)msb + 1;
	}
	else {
	return 0;
	}
	#else
	const int BIT_LENGTH_TABLE[32] = {
	0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
	};
	int msb = 0;
	while (x >= 32) {
	msb += 6;
	x >>= 6;
	}
	msb += BIT_LENGTH_TABLE[x];
	return msb;
	#endif
	}


	#ifdef __cplusplus
	}
	#endif
	#endif /* !Py_INTERNAL_BITUTILS_H */