GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_asin_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #include <stdio.h>
24 #include <math.h>
25 #include <inttypes.h>
26 
27 /* This is the number of terms of Taylor series to evaluate, increase this for more accuracy*/
28 #define ASIN_TERMS 2
29 
30 #ifndef INCLUDED_volk_32f_asin_32f_a_H
31 #define INCLUDED_volk_32f_asin_32f_a_H
32 
33 #ifdef LV_HAVE_SSE4_1
34 #include <smmintrin.h>
35 /*!
36  \brief Computes arcsine of input vector and stores results in output vector
37  \param bVector The vector where results will be stored
38  \param aVector The input vector of floats
39  \param num_points Number of points for which arcsine is to be computed
40 */
41 static inline void volk_32f_asin_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
42 
43  float* bPtr = bVector;
44  const float* aPtr = aVector;
45 
46  unsigned int number = 0;
47  unsigned int quarterPoints = num_points / 4;
48  int i, j;
49 
50  __m128 aVal, pio2, x, y, z, arcsine;
51  __m128 fzeroes, fones, ftwos, ffours, condition;
52 
53  pio2 = _mm_set1_ps(3.14159265358979323846/2);
54  fzeroes = _mm_setzero_ps();
55  fones = _mm_set1_ps(1.0);
56  ftwos = _mm_set1_ps(2.0);
57  ffours = _mm_set1_ps(4.0);
58 
59  for(;number < quarterPoints; number++){
60  aVal = _mm_load_ps(aPtr);
61  aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), _mm_sub_ps(fones, aVal))));
62  z = aVal;
63  condition = _mm_cmplt_ps(z, fzeroes);
64  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
65  x = z;
66  condition = _mm_cmplt_ps(z, fones);
67  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
68 
69  for(i = 0; i < 2; i++){
70  x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
71  }
72  x = _mm_div_ps(fones, x);
73  y = fzeroes;
74  for(j = ASIN_TERMS - 1; j >=0 ; j--){
75  y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
76  }
77 
78  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
79  condition = _mm_cmpgt_ps(z, fones);
80 
81  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
82  arcsine = y;
83  condition = _mm_cmplt_ps(aVal, fzeroes);
84  arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), condition));
85 
86  _mm_store_ps(bPtr, arcsine);
87  aPtr += 4;
88  bPtr += 4;
89  }
90 
91  number = quarterPoints * 4;
92  for(;number < num_points; number++){
93  *bPtr++ = asin(*aPtr++);
94  }
95 }
96 
97 #endif /* LV_HAVE_SSE4_1 for aligned */
98 
99 #endif /* INCLUDED_volk_32f_asin_32f_a_H */
100 
101 #ifndef INCLUDED_volk_32f_asin_32f_u_H
102 #define INCLUDED_volk_32f_asin_32f_u_H
103 
104 #ifdef LV_HAVE_SSE4_1
105 #include <smmintrin.h>
106 /*!
107  \brief Computes arcsine of input vector and stores results in output vector
108  \param bVector The vector where results will be stored
109  \param aVector The input vector of floats
110  \param num_points Number of points for which arcsine is to be computed
111 */
112 static inline void volk_32f_asin_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
113 
114  float* bPtr = bVector;
115  const float* aPtr = aVector;
116 
117  unsigned int number = 0;
118  unsigned int quarterPoints = num_points / 4;
119  int i, j;
120 
121  __m128 aVal, pio2, x, y, z, arcsine;
122  __m128 fzeroes, fones, ftwos, ffours, condition;
123 
124  pio2 = _mm_set1_ps(3.14159265358979323846/2);
125  fzeroes = _mm_setzero_ps();
126  fones = _mm_set1_ps(1.0);
127  ftwos = _mm_set1_ps(2.0);
128  ffours = _mm_set1_ps(4.0);
129 
130  for(;number < quarterPoints; number++){
131  aVal = _mm_loadu_ps(aPtr);
132  aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), _mm_sub_ps(fones, aVal))));
133  z = aVal;
134  condition = _mm_cmplt_ps(z, fzeroes);
135  z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
136  x = z;
137  condition = _mm_cmplt_ps(z, fones);
138  x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
139 
140  for(i = 0; i < 2; i++){
141  x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
142  }
143  x = _mm_div_ps(fones, x);
144  y = fzeroes;
145  for(j = ASIN_TERMS - 1; j >=0 ; j--){
146  y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
147  }
148 
149  y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
150  condition = _mm_cmpgt_ps(z, fones);
151 
152  y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), condition));
153  arcsine = y;
154  condition = _mm_cmplt_ps(aVal, fzeroes);
155  arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), condition));
156 
157  _mm_storeu_ps(bPtr, arcsine);
158  aPtr += 4;
159  bPtr += 4;
160  }
161 
162  number = quarterPoints * 4;
163  for(;number < num_points; number++){
164  *bPtr++ = asin(*aPtr++);
165  }
166 }
167 
168 #endif /* LV_HAVE_SSE4_1 for unaligned */
169 
170 #ifdef LV_HAVE_GENERIC
171 /*!
172  \brief Computes arcsine of input vector and stores results in output vector
173  \param bVector The vector where results will be stored
174  \param aVector The input vector of floats
175  \param num_points Number of points for which arcsine is to be computed
176 */
177 static inline void volk_32f_asin_32f_u_generic(float* bVector, const float* aVector, unsigned int num_points){
178  float* bPtr = bVector;
179  const float* aPtr = aVector;
180  unsigned int number = 0;
181 
182  for(number = 0; number < num_points; number++){
183  *bPtr++ = asin(*aPtr++);
184  }
185 
186 }
187 #endif /* LV_HAVE_GENERIC */
188 
189 #endif /* INCLUDED_volk_32f_asin_32f_u_H */
#define ASIN_TERMS
Definition: volk_32f_asin_32f.h:28