GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_atan_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #include <stdio.h>
24 #include <math.h>
25 #include <inttypes.h>
26 
27 /* This is the number of terms of Taylor series to evaluate, increase this for more accuracy*/
28 #define TERMS 2
29 
30 #ifndef INCLUDED_volk_32f_atan_32f_a_H
31 #define INCLUDED_volk_32f_atan_32f_a_H
32 
33 #ifdef LV_HAVE_SSE4_1
34 #include <smmintrin.h>
35 /*!
36  \brief Computes arctangent of input vector and stores results in output vector
37  \param bVector The vector where results will be stored
38  \param aVector The input vector of floats
39  \param num_points Number of points for which arctangent is to be computed
40 */
41 static inline void volk_32f_atan_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
42 
43  float* bPtr = bVector;
44  const float* aPtr = aVector;
45 
46  unsigned int number = 0;
47  unsigned int quarterPoints = num_points / 4;
48  int i, j;
49 
50  __m128 aVal, pio2, x, y, z, arctangent;
51  __m128 fzeroes, fones, ftwos, ffours, condition;
52 
53  pio2 = _mm_set1_ps(3.14159265358979323846/2);
54  fzeroes = _mm_setzero_ps();
55  fones = _mm_set1_ps(1.0);
56  ftwos = _mm_set1_ps(2.0);
57  ffours = _mm_set1_ps(4.0);
58 
59  for(;number < quarterPoints; number++){
60  aVal = _mm_load_ps(aPtr);
61  z = aVal;
62  condition = _mm_cmplt_ps(z, fzeroes);
63  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
64  x = z;
65  condition = _mm_cmplt_ps(z, fones);
66  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
67 
68  for(i = 0; i < 2; i++){
69  x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
70  }
71  x = _mm_div_ps(fones, x);
72  y = fzeroes;
73  for(j = TERMS - 1; j >=0 ; j--){
74  y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
75  }
76 
77  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
78  condition = _mm_cmpgt_ps(z, fones);
79 
80  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
81  arctangent = y;
82  condition = _mm_cmplt_ps(aVal, fzeroes);
83  arctangent = _mm_sub_ps(arctangent, _mm_and_ps(_mm_mul_ps(arctangent, ftwos), condition));
84 
85  _mm_store_ps(bPtr, arctangent);
86  aPtr += 4;
87  bPtr += 4;
88  }
89 
90  number = quarterPoints * 4;
91  for(;number < num_points; number++){
92  *bPtr++ = atan(*aPtr++);
93  }
94 }
95 
96 #endif /* LV_HAVE_SSE4_1 for aligned */
97 
98 #endif /* INCLUDED_volk_32f_atan_32f_a_H */
99 
100 #ifndef INCLUDED_volk_32f_atan_32f_u_H
101 #define INCLUDED_volk_32f_atan_32f_u_H
102 
103 #ifdef LV_HAVE_SSE4_1
104 #include <smmintrin.h>
105 /*!
106  \brief Computes arctangent of input vector and stores results in output vector
107  \param bVector The vector where results will be stored
108  \param aVector The input vector of floats
109  \param num_points Number of points for which arctangent is to be computed
110 */
111 static inline void volk_32f_atan_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
112 
113  float* bPtr = bVector;
114  const float* aPtr = aVector;
115 
116  unsigned int number = 0;
117  unsigned int quarterPoints = num_points / 4;
118  int i, j;
119 
120  __m128 aVal, pio2, x, y, z, arctangent;
121  __m128 fzeroes, fones, ftwos, ffours, condition;
122 
123  pio2 = _mm_set1_ps(3.14159265358979323846/2);
124  fzeroes = _mm_setzero_ps();
125  fones = _mm_set1_ps(1.0);
126  ftwos = _mm_set1_ps(2.0);
127  ffours = _mm_set1_ps(4.0);
128 
129  for(;number < quarterPoints; number++){
130  aVal = _mm_loadu_ps(aPtr);
131  z = aVal;
132  condition = _mm_cmplt_ps(z, fzeroes);
133  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
134  x = z;
135  condition = _mm_cmplt_ps(z, fones);
136  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
137 
138  for(i = 0; i < 2; i++) x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
139  x = _mm_div_ps(fones, x);
140  y = fzeroes;
141  for(j = TERMS - 1; j >= 0; j--) y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
142 
143  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
144  condition = _mm_cmpgt_ps(z, fones);
145 
146  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
147  arctangent = y;
148  condition = _mm_cmplt_ps(aVal, fzeroes);
149  arctangent = _mm_sub_ps(arctangent, _mm_and_ps(_mm_mul_ps(arctangent, ftwos), condition));
150 
151  _mm_storeu_ps(bPtr, arctangent);
152  aPtr += 4;
153  bPtr += 4;
154  }
155 
156  number = quarterPoints * 4;
157  for(;number < num_points; number++){
158  *bPtr++ = atan(*aPtr++);
159  }
160 }
161 
162 #endif /* LV_HAVE_SSE4_1 for unaligned */
163 
164 #ifdef LV_HAVE_GENERIC
165 /*!
166  \brief Computes arctangent of input vector and stores results in output vector
167  \param bVector The vector where results will be stored
168  \param aVector The input vector of floats
169  \param num_points Number of points for which arctangent is to be computed
170 */
171 static inline void volk_32f_atan_32f_generic(float* bVector, const float* aVector, unsigned int num_points){
172  float* bPtr = bVector;
173  const float* aPtr = aVector;
174  unsigned int number = 0;
175 
176  for(number = 0; number < num_points; number++){
177  *bPtr++ = atan(*aPtr++);
178  }
179 
180 }
181 #endif /* LV_HAVE_GENERIC */
182 
183 #endif /* INCLUDED_volk_32f_atan_32f_u_H */
#define TERMS
Definition: volk_32f_atan_32f.h:28