GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
24 #define INCLUDED_volk_32fc_magnitude_32f_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <math.h>
29 
30 #ifdef LV_HAVE_AVX
31 #include <immintrin.h>
32  /*!
33  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
34  \param complexVector The vector containing the complex input values
35  \param magnitudeVector The vector containing the real output values
36  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
37  */
38 static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
39  unsigned int number = 0;
40  const unsigned int eighthPoints = num_points / 8;
41 
42  const float* complexVectorPtr = (float*)complexVector;
43  float* magnitudeVectorPtr = magnitudeVector;
44 
45  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
46  for(;number < eighthPoints; number++){
47  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
48  complexVectorPtr += 8;
49 
50  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
51  complexVectorPtr += 8;
52 
53  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
54  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
55 
56  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
57  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
58 
59  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
60 
61  result = _mm256_sqrt_ps(result);
62 
63  _mm256_storeu_ps(magnitudeVectorPtr, result);
64  magnitudeVectorPtr += 8;
65  }
66 
67  number = eighthPoints * 8;
68  for(; number < num_points; number++){
69  float val1Real = *complexVectorPtr++;
70  float val1Imag = *complexVectorPtr++;
71  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
72  }
73 }
74 #endif /* LV_HAVE_AVX */
75 
76 #ifdef LV_HAVE_SSE3
77 #include <pmmintrin.h>
78  /*!
79  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
80  \param complexVector The vector containing the complex input values
81  \param magnitudeVector The vector containing the real output values
82  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
83  */
84 static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
85  unsigned int number = 0;
86  const unsigned int quarterPoints = num_points / 4;
87 
88  const float* complexVectorPtr = (float*)complexVector;
89  float* magnitudeVectorPtr = magnitudeVector;
90 
91  __m128 cplxValue1, cplxValue2, result;
92  for(;number < quarterPoints; number++){
93  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
94  complexVectorPtr += 4;
95 
96  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
97  complexVectorPtr += 4;
98 
99  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
100  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
101 
102  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
103 
104  result = _mm_sqrt_ps(result);
105 
106  _mm_storeu_ps(magnitudeVectorPtr, result);
107  magnitudeVectorPtr += 4;
108  }
109 
110  number = quarterPoints * 4;
111  for(; number < num_points; number++){
112  float val1Real = *complexVectorPtr++;
113  float val1Imag = *complexVectorPtr++;
114  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
115  }
116 }
117 #endif /* LV_HAVE_SSE3 */
118 
119 #ifdef LV_HAVE_SSE
120 #include <xmmintrin.h>
121  /*!
122  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
123  \param complexVector The vector containing the complex input values
124  \param magnitudeVector The vector containing the real output values
125  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
126  */
127 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
128  unsigned int number = 0;
129  const unsigned int quarterPoints = num_points / 4;
130 
131  const float* complexVectorPtr = (float*)complexVector;
132  float* magnitudeVectorPtr = magnitudeVector;
133 
134  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
135  for(;number < quarterPoints; number++){
136  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
137  complexVectorPtr += 4;
138 
139  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
140  complexVectorPtr += 4;
141 
142  // Arrange in i1i2i3i4 format
143  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
144  // Arrange in q1q2q3q4 format
145  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
146 
147  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
148  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
149 
150  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
151 
152  result = _mm_sqrt_ps(result);
153 
154  _mm_storeu_ps(magnitudeVectorPtr, result);
155  magnitudeVectorPtr += 4;
156  }
157 
158  number = quarterPoints * 4;
159  for(; number < num_points; number++){
160  float val1Real = *complexVectorPtr++;
161  float val1Imag = *complexVectorPtr++;
162  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
163  }
164 }
165 #endif /* LV_HAVE_SSE */
166 
167 #ifdef LV_HAVE_GENERIC
168  /*!
169  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
170  \param complexVector The vector containing the complex input values
171  \param magnitudeVector The vector containing the real output values
172  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
173  */
174 static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
175  const float* complexVectorPtr = (float*)complexVector;
176  float* magnitudeVectorPtr = magnitudeVector;
177  unsigned int number = 0;
178  for(number = 0; number < num_points; number++){
179  const float real = *complexVectorPtr++;
180  const float imag = *complexVectorPtr++;
181  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
182  }
183 }
184 #endif /* LV_HAVE_GENERIC */
185 
186 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
187 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
188 #define INCLUDED_volk_32fc_magnitude_32f_a_H
189 
190 #include <inttypes.h>
191 #include <stdio.h>
192 #include <math.h>
193 
194 #ifdef LV_HAVE_AVX
195 #include <immintrin.h>
196  /*!
197  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
198  \param complexVector The vector containing the complex input values
199  \param magnitudeVector The vector containing the real output values
200  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
201  */
202 static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
203  unsigned int number = 0;
204  const unsigned int eighthPoints = num_points / 8;
205 
206  const float* complexVectorPtr = (float*)complexVector;
207  float* magnitudeVectorPtr = magnitudeVector;
208 
209  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
210  for(;number < eighthPoints; number++){
211  cplxValue1 = _mm256_load_ps(complexVectorPtr);
212  complexVectorPtr += 8;
213 
214  cplxValue2 = _mm256_load_ps(complexVectorPtr);
215  complexVectorPtr += 8;
216 
217  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
218  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
219 
220  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
221  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
222 
223  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
224 
225  result = _mm256_sqrt_ps(result);
226 
227  _mm256_store_ps(magnitudeVectorPtr, result);
228  magnitudeVectorPtr += 8;
229  }
230 
231  number = eighthPoints * 8;
232  for(; number < num_points; number++){
233  float val1Real = *complexVectorPtr++;
234  float val1Imag = *complexVectorPtr++;
235  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
236  }
237 }
238 #endif /* LV_HAVE_AVX */
239 
240 #ifdef LV_HAVE_SSE3
241 #include <pmmintrin.h>
242  /*!
243  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
244  \param complexVector The vector containing the complex input values
245  \param magnitudeVector The vector containing the real output values
246  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
247  */
248 static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
249  unsigned int number = 0;
250  const unsigned int quarterPoints = num_points / 4;
251 
252  const float* complexVectorPtr = (float*)complexVector;
253  float* magnitudeVectorPtr = magnitudeVector;
254 
255  __m128 cplxValue1, cplxValue2, result;
256  for(;number < quarterPoints; number++){
257  cplxValue1 = _mm_load_ps(complexVectorPtr);
258  complexVectorPtr += 4;
259 
260  cplxValue2 = _mm_load_ps(complexVectorPtr);
261  complexVectorPtr += 4;
262 
263  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
264  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
265 
266  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
267 
268  result = _mm_sqrt_ps(result);
269 
270  _mm_store_ps(magnitudeVectorPtr, result);
271  magnitudeVectorPtr += 4;
272  }
273 
274  number = quarterPoints * 4;
275  for(; number < num_points; number++){
276  float val1Real = *complexVectorPtr++;
277  float val1Imag = *complexVectorPtr++;
278  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
279  }
280 }
281 #endif /* LV_HAVE_SSE3 */
282 
283 #ifdef LV_HAVE_SSE
284 #include <xmmintrin.h>
285  /*!
286  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
287  \param complexVector The vector containing the complex input values
288  \param magnitudeVector The vector containing the real output values
289  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
290  */
291 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
292  unsigned int number = 0;
293  const unsigned int quarterPoints = num_points / 4;
294 
295  const float* complexVectorPtr = (float*)complexVector;
296  float* magnitudeVectorPtr = magnitudeVector;
297 
298  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
299  for(;number < quarterPoints; number++){
300  cplxValue1 = _mm_load_ps(complexVectorPtr);
301  complexVectorPtr += 4;
302 
303  cplxValue2 = _mm_load_ps(complexVectorPtr);
304  complexVectorPtr += 4;
305 
306  // Arrange in i1i2i3i4 format
307  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
308  // Arrange in q1q2q3q4 format
309  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
310 
311  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
312  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
313 
314  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
315 
316  result = _mm_sqrt_ps(result);
317 
318  _mm_store_ps(magnitudeVectorPtr, result);
319  magnitudeVectorPtr += 4;
320  }
321 
322  number = quarterPoints * 4;
323  for(; number < num_points; number++){
324  float val1Real = *complexVectorPtr++;
325  float val1Imag = *complexVectorPtr++;
326  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
327  }
328 }
329 #endif /* LV_HAVE_SSE */
330 
331 #ifdef LV_HAVE_GENERIC
332  /*!
333  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
334  \param complexVector The vector containing the complex input values
335  \param magnitudeVector The vector containing the real output values
336  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
337  */
338 static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
339  const float* complexVectorPtr = (float*)complexVector;
340  float* magnitudeVectorPtr = magnitudeVector;
341  unsigned int number = 0;
342  for(number = 0; number < num_points; number++){
343  const float real = *complexVectorPtr++;
344  const float imag = *complexVectorPtr++;
345  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
346  }
347 }
348 #endif /* LV_HAVE_GENERIC */
349 
350 #ifdef LV_HAVE_NEON
351 #include <arm_neon.h>
352 
353  /*!
354  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
355  \param complexVector The vector containing the complex input values
356  \param magnitudeVector The vector containing the real output values
357  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
358  */
359 static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
360  unsigned int number;
361  unsigned int quarter_points = num_points / 4;
362  const float* complexVectorPtr = (float*)complexVector;
363  float* magnitudeVectorPtr = magnitudeVector;
364 
365  float32x4x2_t complex_vec;
366  float32x4_t magnitude_vec;
367  for(number = 0; number < quarter_points; number++){
368  complex_vec = vld2q_f32(complexVectorPtr);
369  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
370  magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
371  magnitude_vec = vrsqrteq_f32(magnitude_vec);
372  magnitude_vec = vrecpeq_f32( magnitude_vec ); // no plain ol' sqrt
373  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
374 
375  complexVectorPtr += 8;
376  magnitudeVectorPtr += 4;
377  }
378 
379  for(number = quarter_points*4; number < num_points; number++){
380  const float real = *complexVectorPtr++;
381  const float imag = *complexVectorPtr++;
382  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
383  }
384 }
385 #endif /* LV_HAVE_NEON */
386 
387 #ifdef LV_HAVE_NEON
388  /*!
389  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
390 
391  This is an approximation from "Streamlining Digital Signal Processing" by
392  Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
393  The basic idea is to do a weighted sum of the abs. value of imag and real parts
394  where weight A is always assigned to max(imag, real) and B is always min(imag,real).
395  There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
396  This method is called equiripple-error magnitude estimation proposed by Filip in '73
397 
398  \param complexVector The vector containing the complex input values
399  \param magnitudeVector The vector containing the real output values
400  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
401  */
402 static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
403  unsigned int number;
404  unsigned int quarter_points = num_points / 4;
405  const float* complexVectorPtr = (float*)complexVector;
406  float* magnitudeVectorPtr = magnitudeVector;
407 
408  const float threshold = 0.4142135;
409 
410  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
411  a_high = vdupq_n_f32( 0.84 );
412  b_high = vdupq_n_f32( 0.561);
413  a_low = vdupq_n_f32( 0.99 );
414  b_low = vdupq_n_f32( 0.197);
415 
416  uint32x4_t comp0, comp1;
417 
418  float32x4x2_t complex_vec;
419  float32x4_t min_vec, max_vec, magnitude_vec;
420  float32x4_t real_abs, imag_abs;
421  for(number = 0; number < quarter_points; number++){
422  complex_vec = vld2q_f32(complexVectorPtr);
423 
424  real_abs = vabsq_f32(complex_vec.val[0]);
425  imag_abs = vabsq_f32(complex_vec.val[1]);
426 
427  min_vec = vminq_f32(real_abs, imag_abs);
428  max_vec = vmaxq_f32(real_abs, imag_abs);
429 
430  // effective branch to choose coefficient pair.
431  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
432  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
433 
434  // and 0s or 1s with coefficients from previous effective branch
435  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high), vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
436  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high), vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
437 
438  // coefficients chosen, do the weighted sum
439  min_vec = vmulq_f32(min_vec, b_vec);
440  max_vec = vmulq_f32(max_vec, a_vec);
441 
442  magnitude_vec = vaddq_f32(min_vec, max_vec);
443  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
444 
445  complexVectorPtr += 8;
446  magnitudeVectorPtr += 4;
447  }
448 
449  for(number = quarter_points*4; number < num_points; number++){
450  const float real = *complexVectorPtr++;
451  const float imag = *complexVectorPtr++;
452  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
453  }
454 }
455 #endif /* LV_HAVE_NEON */
456 
457 
458 #ifdef LV_HAVE_ORC
459  /*!
460  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
461  \param complexVector The vector containing the complex input values
462  \param magnitudeVector The vector containing the real output values
463  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
464  */
465 extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
466 static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
467  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
468 }
469 #endif /* LV_HAVE_ORC */
470 
471 
472 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56