23 #ifndef INCLUDED_volk_64u_popcnt_a_H
24 #define INCLUDED_volk_64u_popcnt_a_H
30 #ifdef LV_HAVE_GENERIC
33 static inline void volk_64u_popcnt_generic(
uint64_t* ret,
const uint64_t value) {
41 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
42 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
43 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
44 retVal = (retVal + (retVal >> 8));
45 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
49 retVal = (
uint32_t)((value & 0xFFFFFFFF00000000) >> 31);
50 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
51 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
52 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
53 retVal = (retVal + (retVal >> 8));
54 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
63 #if LV_HAVE_SSE4_2 && LV_HAVE_64
65 #include <nmmintrin.h>
67 static inline void volk_64u_popcnt_a_sse4_2(
uint64_t* ret,
const uint64_t value) {
68 *ret = _mm_popcnt_u64(value);
76 static inline void volk_64u_popcnt_neon(
uint64_t* ret,
const uint64_t value) {
77 uint8x8_t input_val, count8x8_val;
78 uint16x4_t count16x4_val;
79 uint32x2_t count32x2_val;
80 uint64x1_t count64x1_val;
82 input_val = vld1_u8((
unsigned char *) &value);
83 count8x8_val = vcnt_u8(input_val);
84 count16x4_val = vpaddl_u8(count8x8_val);
85 count32x2_val = vpaddl_u16(count16x4_val);
86 count64x1_val = vpaddl_u32(count32x2_val);
87 vst1_u64(ret, count64x1_val);
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90