GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_expfast_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #include <stdio.h>
24 #include <math.h>
25 #include <inttypes.h>
26 
27 #define Mln2 0.6931471805f
28 #define A 8388608.0f
29 #define B 1065353216.0f
30 #define C 60801.0f
31 
32 
33 #ifndef INCLUDED_volk_32f_expfast_32f_a_H
34 #define INCLUDED_volk_32f_expfast_32f_a_H
35 
36 #ifdef LV_HAVE_AVX
37 #include <immintrin.h>
38 /*!
39  \brief Computes fast exp (max 7% error) of input vector and stores results in output vector
40  \param bVector The vector where results will be stored
41  \param aVector The input vector of floats
42  \param num_points Number of points for which exp is to be computed
43 */
44 static inline void volk_32f_expfast_32f_a_avx(float* bVector, const float* aVector, unsigned int num_points){
45 
46  float* bPtr = bVector;
47  const float* aPtr = aVector;
48 
49  unsigned int number = 0;
50  const unsigned int eighthPoints = num_points / 8;
51 
52  __m256 aVal, bVal, a, b;
53  __m256i exp;
54  a = _mm256_set1_ps(A/Mln2);
55  b = _mm256_set1_ps(B-C);
56 
57  for(;number < eighthPoints; number++){
58  aVal = _mm256_load_ps(aPtr);
59  exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
60  bVal = _mm256_castsi256_ps(exp);
61 
62  _mm256_store_ps(bPtr, bVal);
63  aPtr += 8;
64  bPtr += 8;
65  }
66 
67  number = eighthPoints * 8;
68  for(;number < num_points; number++){
69  *bPtr++ = expf(*aPtr++);
70  }
71 }
72 
73 #endif /* LV_HAVE_AVX for aligned */
74 
75 #ifdef LV_HAVE_SSE4_1
76 #include <smmintrin.h>
77 /*!
78  \brief Computes fast exp (max 7% error) of input vector and stores results in output vector
79  \param bVector The vector where results will be stored
80  \param aVector The input vector of floats
81  \param num_points Number of points for which exp is to be computed
82 */
83 static inline void volk_32f_expfast_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
84 
85  float* bPtr = bVector;
86  const float* aPtr = aVector;
87 
88  unsigned int number = 0;
89  const unsigned int quarterPoints = num_points / 4;
90 
91  __m128 aVal, bVal, a, b;
92  __m128i exp;
93  a = _mm_set1_ps(A/Mln2);
94  b = _mm_set1_ps(B-C);
95 
96  for(;number < quarterPoints; number++){
97  aVal = _mm_load_ps(aPtr);
98  exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
99  bVal = _mm_castsi128_ps(exp);
100 
101  _mm_store_ps(bPtr, bVal);
102  aPtr += 4;
103  bPtr += 4;
104  }
105 
106  number = quarterPoints * 4;
107  for(;number < num_points; number++){
108  *bPtr++ = expf(*aPtr++);
109  }
110 }
111 
112 #endif /* LV_HAVE_SSE4_1 for aligned */
113 
114 #endif /* INCLUDED_volk_32f_expfast_32f_a_H */
115 
116 #ifndef INCLUDED_volk_32f_expfast_32f_u_H
117 #define INCLUDED_volk_32f_expfast_32f_u_H
118 
119 #ifdef LV_HAVE_AVX
120 #include <immintrin.h>
121 /*!
122  \brief Computes fast exp (max 7% error) of input vector and stores results in output vector
123  \param bVector The vector where results will be stored
124  \param aVector The input vector of floats
125  \param num_points Number of points for which exp is to be computed
126 */
127 static inline void volk_32f_expfast_32f_u_avx(float* bVector, const float* aVector, unsigned int num_points){
128 
129  float* bPtr = bVector;
130  const float* aPtr = aVector;
131 
132  unsigned int number = 0;
133  const unsigned int eighthPoints = num_points / 8;
134 
135  __m256 aVal, bVal, a, b;
136  __m256i exp;
137  a = _mm256_set1_ps(A/Mln2);
138  b = _mm256_set1_ps(B-C);
139 
140  for(;number < eighthPoints; number++){
141  aVal = _mm256_loadu_ps(aPtr);
142  exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
143  bVal = _mm256_castsi256_ps(exp);
144 
145  _mm256_storeu_ps(bPtr, bVal);
146  aPtr += 8;
147  bPtr += 8;
148  }
149 
150  number = eighthPoints * 8;
151  for(;number < num_points; number++){
152  *bPtr++ = expf(*aPtr++);
153  }
154 }
155 
156 #endif /* LV_HAVE_AVX for aligned */
157 
158 #ifdef LV_HAVE_SSE4_1
159 #include <smmintrin.h>
160 /*!
161  \brief Computes fast exp (max 7% error) of input vector and stores results in output vector
162  \param bVector The vector where results will be stored
163  \param aVector The input vector of floats
164  \param num_points Number of points for which log is to be computed
165 */
166 static inline void volk_32f_expfast_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int num_points){
167 
168  float* bPtr = bVector;
169  const float* aPtr = aVector;
170 
171  unsigned int number = 0;
172  const unsigned int quarterPoints = num_points / 4;
173 
174  __m128 aVal, bVal, a, b;
175  __m128i exp;
176  a = _mm_set1_ps(A/Mln2);
177  b = _mm_set1_ps(B-C);
178 
179  for(;number < quarterPoints; number++){
180  aVal = _mm_loadu_ps(aPtr);
181  exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
182  bVal = _mm_castsi128_ps(exp);
183 
184  _mm_storeu_ps(bPtr, bVal);
185  aPtr += 4;
186  bPtr += 4;
187  }
188 
189  number = quarterPoints * 4;
190  for(;number < num_points; number++){
191  *bPtr++ = expf(*aPtr++);
192  }
193 }
194 
195 #endif /* LV_HAVE_SSE4_1 for unaligned */
196 
197 #ifdef LV_HAVE_GENERIC
198 /*!
199  \brief Computes fast exp (max 7% error) of input vector and stores results in output vector
200  \param bVector The vector where results will be stored
201  \param aVector The input vector of floats
202  \param num_points Number of points for which log is to be computed
203 */
204 static inline void volk_32f_expfast_32f_generic(float* bVector, const float* aVector, unsigned int num_points){
205  float* bPtr = bVector;
206  const float* aPtr = aVector;
207  unsigned int number = 0;
208 
209  for(number = 0; number < num_points; number++){
210  *bPtr++ = expf(*aPtr++);
211  }
212 
213 }
214 #endif /* LV_HAVE_GENERIC */
215 
216 #endif /* INCLUDED_volk_32f_expfast_32f_u_H */
#define B
Definition: volk_32f_expfast_32f.h:29
#define C
Definition: volk_32f_expfast_32f.h:30
#define Mln2
Definition: volk_32f_expfast_32f.h:27
#define A
Definition: volk_32f_expfast_32f.h:28