GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
24 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <math.h>
29 
30 #ifdef LV_HAVE_SSE4_1
31 #include <smmintrin.h>
32 
33 #ifdef LV_HAVE_LIB_SIMDMATH
34 #include <simdmath.h>
35 #endif /* LV_HAVE_LIB_SIMDMATH */
36 
37 /*!
38  \brief performs the atan2 on the input vector and stores the results in the output vector.
39  \param outputVector The byte-aligned vector where the results will be stored.
40  \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
41  \param normalizeFactor The atan2 results will be divided by this normalization factor.
42  \param num_points The number of complex values in the input vector.
43 */
44 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
45  const float* complexVectorPtr = (float*)complexVector;
46  float* outPtr = outputVector;
47 
48  unsigned int number = 0;
49  const float invNormalizeFactor = 1.0 / normalizeFactor;
50 
51 #ifdef LV_HAVE_LIB_SIMDMATH
52  const unsigned int quarterPoints = num_points / 4;
53  __m128 testVector = _mm_set_ps1(2*M_PI);
54  __m128 correctVector = _mm_set_ps1(M_PI);
55  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
56  __m128 phase;
57  __m128 complex1, complex2, iValue, qValue;
58  __m128 keepMask;
59 
60  for (; number < quarterPoints; number++) {
61  // Load IQ data:
62  complex1 = _mm_load_ps(complexVectorPtr);
63  complexVectorPtr += 4;
64  complex2 = _mm_load_ps(complexVectorPtr);
65  complexVectorPtr += 4;
66  // Deinterleave IQ data:
67  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
68  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
69  // Arctan to get phase:
70  phase = atan2f4(qValue, iValue);
71  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
72  // Compare to 2pi:
73  keepMask = _mm_cmpneq_ps(phase,testVector);
74  phase = _mm_blendv_ps(correctVector, phase, keepMask);
75  // done with above correction.
76  phase = _mm_mul_ps(phase, vNormalizeFactor);
77  _mm_store_ps((float*)outPtr, phase);
78  outPtr += 4;
79  }
80  number = quarterPoints * 4;
81 #endif /* LV_HAVE_SIMDMATH_H */
82 
83  for (; number < num_points; number++) {
84  const float real = *complexVectorPtr++;
85  const float imag = *complexVectorPtr++;
86  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
87  }
88 }
89 #endif /* LV_HAVE_SSE4_1 */
90 
91 
92 #ifdef LV_HAVE_SSE
93 #include <xmmintrin.h>
94 
95 #ifdef LV_HAVE_LIB_SIMDMATH
96 #include <simdmath.h>
97 #endif /* LV_HAVE_LIB_SIMDMATH */
98 
99 /*!
100  \brief performs the atan2 on the input vector and stores the results in the output vector.
101  \param outputVector The byte-aligned vector where the results will be stored.
102  \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
103  \param normalizeFactor The atan2 results will be divided by this normalization factor.
104  \param num_points The number of complex values in the input vector.
105 */
106 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
107  const float* complexVectorPtr = (float*)complexVector;
108  float* outPtr = outputVector;
109 
110  unsigned int number = 0;
111  const float invNormalizeFactor = 1.0 / normalizeFactor;
112 
113 #ifdef LV_HAVE_LIB_SIMDMATH
114  const unsigned int quarterPoints = num_points / 4;
115  __m128 testVector = _mm_set_ps1(2*M_PI);
116  __m128 correctVector = _mm_set_ps1(M_PI);
117  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
118  __m128 phase;
119  __m128 complex1, complex2, iValue, qValue;
120  __m128 mask;
121  __m128 keepMask;
122 
123  for (; number < quarterPoints; number++) {
124  // Load IQ data:
125  complex1 = _mm_load_ps(complexVectorPtr);
126  complexVectorPtr += 4;
127  complex2 = _mm_load_ps(complexVectorPtr);
128  complexVectorPtr += 4;
129  // Deinterleave IQ data:
130  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
131  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
132  // Arctan to get phase:
133  phase = atan2f4(qValue, iValue);
134  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
135  // Compare to 2pi:
136  keepMask = _mm_cmpneq_ps(phase,testVector);
137  phase = _mm_and_ps(phase, keepMask);
138  mask = _mm_andnot_ps(keepMask, correctVector);
139  phase = _mm_or_ps(phase, mask);
140  // done with above correction.
141  phase = _mm_mul_ps(phase, vNormalizeFactor);
142  _mm_store_ps((float*)outPtr, phase);
143  outPtr += 4;
144  }
145  number = quarterPoints * 4;
146 #endif /* LV_HAVE_SIMDMATH_H */
147 
148  for (; number < num_points; number++) {
149  const float real = *complexVectorPtr++;
150  const float imag = *complexVectorPtr++;
151  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
152  }
153 }
154 #endif /* LV_HAVE_SSE */
155 
156 #ifdef LV_HAVE_GENERIC
157 /*!
158  \brief performs the atan2 on the input vector and stores the results in the output vector.
159  \param outputVector The vector where the results will be stored.
160  \param inputVector Input vector containing interleaved IQ data (I = cos, Q = sin).
161  \param normalizeFactor The atan2 results will be divided by this normalization factor.
162  \param num_points The number of complex values in the input vector.
163 */
164 static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
165  float* outPtr = outputVector;
166  const float* inPtr = (float*)inputVector;
167  const float invNormalizeFactor = 1.0 / normalizeFactor;
168  unsigned int number;
169  for ( number = 0; number < num_points; number++) {
170  const float real = *inPtr++;
171  const float imag = *inPtr++;
172  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
173  }
174 }
175 #endif /* LV_HAVE_GENERIC */
176 
177 
178 
179 
180 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
#define M_PI
Definition: volk/cmake/msvc/config.h:42
float complex lv_32fc_t
Definition: volk_complex.h:56