23 #ifndef INCLUDED_volk_32f_sqrt_32f_a_H
24 #define INCLUDED_volk_32f_sqrt_32f_a_H
31 #include <xmmintrin.h>
38 static inline void volk_32f_sqrt_32f_a_sse(
float* cVector,
const float* aVector,
unsigned int num_points){
39 unsigned int number = 0;
40 const unsigned int quarterPoints = num_points / 4;
42 float* cPtr = cVector;
43 const float* aPtr = aVector;
46 for(;number < quarterPoints; number++){
48 aVal = _mm_load_ps(aPtr);
50 cVal = _mm_sqrt_ps(aVal);
52 _mm_store_ps(cPtr,cVal);
58 number = quarterPoints * 4;
59 for(;number < num_points; number++){
60 *cPtr++ = sqrtf(*aPtr++);
74 static inline void volk_32f_sqrt_32f_neon(
float* cVector,
const float* aVector,
unsigned int num_points){
75 float* cPtr = cVector;
76 const float* aPtr = aVector;
77 unsigned int number = 0;
78 unsigned int quarter_points = num_points / 4;
79 float32x4_t in_vec, out_vec;
81 for(number = 0; number < quarter_points; number++){
82 in_vec = vld1q_f32(aPtr);
84 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
85 vst1q_f32(cPtr, out_vec);
90 for(number = quarter_points * 4; number < num_points; number++){
91 *cPtr++ = sqrtf(*aPtr++);
96 #ifdef LV_HAVE_GENERIC
103 static inline void volk_32f_sqrt_32f_generic(
float* cVector,
const float* aVector,
unsigned int num_points){
104 float* cPtr = cVector;
105 const float* aPtr = aVector;
106 unsigned int number = 0;
108 for(number = 0; number < num_points; number++){
109 *cPtr++ = sqrtf(*aPtr++);
115 extern void volk_32f_sqrt_32f_a_orc_impl(
float *,
const float*,
unsigned int);
122 static inline void volk_32f_sqrt_32f_u_orc(
float* cVector,
const float* aVector,
unsigned int num_points){
123 volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);