23 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
24 #define INCLUDED_volk_32f_invsqrt_32f_a_H
31 static inline float Q_rsqrt(
float number )
34 const float threehalfs = 1.5F;
42 u.i = 0x5f3759df - ( u.i >> 1 );
43 u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) );
50 #include <immintrin.h>
57 static inline void volk_32f_invsqrt_32f_a_avx(
float* cVector,
const float* aVector,
unsigned int num_points){
58 unsigned int number = 0;
59 const unsigned int eighthPoints = num_points / 8;
61 float* cPtr = cVector;
62 const float* aPtr = aVector;
64 for (; number < eighthPoints; number++)
66 aVal = _mm256_load_ps(aPtr);
67 cVal = _mm256_rsqrt_ps(aVal);
68 _mm256_store_ps(cPtr, cVal);
73 number = eighthPoints * 8;
74 for(;number < num_points; number++)
81 #include <xmmintrin.h>
88 static inline void volk_32f_invsqrt_32f_a_sse(
float* cVector,
const float* aVector,
unsigned int num_points){
89 unsigned int number = 0;
90 const unsigned int quarterPoints = num_points / 4;
92 float* cPtr = cVector;
93 const float* aPtr = aVector;
96 for(;number < quarterPoints; number++){
98 aVal = _mm_load_ps(aPtr);
100 cVal = _mm_rsqrt_ps(aVal);
102 _mm_store_ps(cPtr,cVal);
108 number = quarterPoints * 4;
109 for(;number < num_points; number++){
116 #include <arm_neon.h>
123 static inline void volk_32f_invsqrt_32f_neon(
float* cVector,
const float* aVector,
unsigned int num_points){
125 const unsigned int quarter_points = num_points / 4;
127 float* cPtr = cVector;
128 const float* aPtr = aVector;
129 float32x4_t a_val, c_val;
130 for (number = 0; number < quarter_points; ++number)
132 a_val = vld1q_f32(aPtr);
133 c_val = vrsqrteq_f32(a_val);
134 vst1q_f32(cPtr, c_val);
139 for(number=quarter_points * 4;number < num_points; number++)
146 #ifdef LV_HAVE_GENERIC
153 static inline void volk_32f_invsqrt_32f_generic(
float* cVector,
const float* aVector,
unsigned int num_points){
154 float* cPtr = cVector;
155 const float* aPtr = aVector;
156 unsigned int number = 0;
157 for(number = 0; number < num_points; number++){
164 #include <immintrin.h>
171 static inline void volk_32f_invsqrt_32f_u_avx(
float* cVector,
const float* aVector,
unsigned int num_points){
172 unsigned int number = 0;
173 const unsigned int eighthPoints = num_points / 8;
175 float* cPtr = cVector;
176 const float* aPtr = aVector;
178 for (; number < eighthPoints; number++)
180 aVal = _mm256_loadu_ps(aPtr);
181 cVal = _mm256_rsqrt_ps(aVal);
182 _mm256_storeu_ps(cPtr, cVal);
187 number = eighthPoints * 8;
188 for(;number < num_points; number++)
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:31
signed int int32_t
Definition: stdint.h:77