23 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
24 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
31 #include <smmintrin.h>
33 #ifdef LV_HAVE_LIB_SIMDMATH
44 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(
float* outputVector,
const lv_32fc_t* complexVector,
const float normalizeFactor,
unsigned int num_points){
45 const float* complexVectorPtr = (
float*)complexVector;
46 float* outPtr = outputVector;
48 unsigned int number = 0;
49 const float invNormalizeFactor = 1.0 / normalizeFactor;
51 #ifdef LV_HAVE_LIB_SIMDMATH
52 const unsigned int quarterPoints = num_points / 4;
53 __m128 testVector = _mm_set_ps1(2*
M_PI);
54 __m128 correctVector = _mm_set_ps1(
M_PI);
55 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
57 __m128 complex1, complex2, iValue, qValue;
60 for (; number < quarterPoints; number++) {
62 complex1 = _mm_load_ps(complexVectorPtr);
63 complexVectorPtr += 4;
64 complex2 = _mm_load_ps(complexVectorPtr);
65 complexVectorPtr += 4;
67 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
68 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
70 phase = atan2f4(qValue, iValue);
73 keepMask = _mm_cmpneq_ps(phase,testVector);
74 phase = _mm_blendv_ps(correctVector, phase, keepMask);
76 phase = _mm_mul_ps(phase, vNormalizeFactor);
77 _mm_store_ps((
float*)outPtr, phase);
80 number = quarterPoints * 4;
83 for (; number < num_points; number++) {
84 const float real = *complexVectorPtr++;
85 const float imag = *complexVectorPtr++;
86 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
93 #include <xmmintrin.h>
95 #ifdef LV_HAVE_LIB_SIMDMATH
106 static inline void volk_32fc_s32f_atan2_32f_a_sse(
float* outputVector,
const lv_32fc_t* complexVector,
const float normalizeFactor,
unsigned int num_points){
107 const float* complexVectorPtr = (
float*)complexVector;
108 float* outPtr = outputVector;
110 unsigned int number = 0;
111 const float invNormalizeFactor = 1.0 / normalizeFactor;
113 #ifdef LV_HAVE_LIB_SIMDMATH
114 const unsigned int quarterPoints = num_points / 4;
115 __m128 testVector = _mm_set_ps1(2*
M_PI);
116 __m128 correctVector = _mm_set_ps1(
M_PI);
117 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
119 __m128 complex1, complex2, iValue, qValue;
123 for (; number < quarterPoints; number++) {
125 complex1 = _mm_load_ps(complexVectorPtr);
126 complexVectorPtr += 4;
127 complex2 = _mm_load_ps(complexVectorPtr);
128 complexVectorPtr += 4;
130 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
131 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
133 phase = atan2f4(qValue, iValue);
136 keepMask = _mm_cmpneq_ps(phase,testVector);
137 phase = _mm_and_ps(phase, keepMask);
138 mask = _mm_andnot_ps(keepMask, correctVector);
139 phase = _mm_or_ps(phase, mask);
141 phase = _mm_mul_ps(phase, vNormalizeFactor);
142 _mm_store_ps((
float*)outPtr, phase);
145 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 const float real = *complexVectorPtr++;
150 const float imag = *complexVectorPtr++;
151 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
156 #ifdef LV_HAVE_GENERIC
164 static inline void volk_32fc_s32f_atan2_32f_generic(
float* outputVector,
const lv_32fc_t* inputVector,
const float normalizeFactor,
unsigned int num_points){
165 float* outPtr = outputVector;
166 const float* inPtr = (
float*)inputVector;
167 const float invNormalizeFactor = 1.0 / normalizeFactor;
169 for ( number = 0; number < num_points; number++) {
170 const float real = *inPtr++;
171 const float imag = *inPtr++;
172 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
#define M_PI
Definition: volk/cmake/msvc/config.h:42
float complex lv_32fc_t
Definition: volk_complex.h:56