27 #define Mln2 0.6931471805f
29 #define B 1065353216.0f
33 #ifndef INCLUDED_volk_32f_expfast_32f_a_H
34 #define INCLUDED_volk_32f_expfast_32f_a_H
37 #include <immintrin.h>
44 static inline void volk_32f_expfast_32f_a_avx(
float* bVector,
const float* aVector,
unsigned int num_points){
46 float* bPtr = bVector;
47 const float* aPtr = aVector;
49 unsigned int number = 0;
50 const unsigned int eighthPoints = num_points / 8;
52 __m256 aVal, bVal, a, b;
54 a = _mm256_set1_ps(
A/
Mln2);
55 b = _mm256_set1_ps(
B-
C);
57 for(;number < eighthPoints; number++){
58 aVal = _mm256_load_ps(aPtr);
59 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
60 bVal = _mm256_castsi256_ps(exp);
62 _mm256_store_ps(bPtr, bVal);
67 number = eighthPoints * 8;
68 for(;number < num_points; number++){
69 *bPtr++ = expf(*aPtr++);
76 #include <smmintrin.h>
83 static inline void volk_32f_expfast_32f_a_sse4_1(
float* bVector,
const float* aVector,
unsigned int num_points){
85 float* bPtr = bVector;
86 const float* aPtr = aVector;
88 unsigned int number = 0;
89 const unsigned int quarterPoints = num_points / 4;
91 __m128 aVal, bVal, a, b;
93 a = _mm_set1_ps(
A/
Mln2);
96 for(;number < quarterPoints; number++){
97 aVal = _mm_load_ps(aPtr);
98 exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
99 bVal = _mm_castsi128_ps(exp);
101 _mm_store_ps(bPtr, bVal);
106 number = quarterPoints * 4;
107 for(;number < num_points; number++){
108 *bPtr++ = expf(*aPtr++);
116 #ifndef INCLUDED_volk_32f_expfast_32f_u_H
117 #define INCLUDED_volk_32f_expfast_32f_u_H
120 #include <immintrin.h>
127 static inline void volk_32f_expfast_32f_u_avx(
float* bVector,
const float* aVector,
unsigned int num_points){
129 float* bPtr = bVector;
130 const float* aPtr = aVector;
132 unsigned int number = 0;
133 const unsigned int eighthPoints = num_points / 8;
135 __m256 aVal, bVal, a, b;
137 a = _mm256_set1_ps(
A/
Mln2);
138 b = _mm256_set1_ps(
B-
C);
140 for(;number < eighthPoints; number++){
141 aVal = _mm256_loadu_ps(aPtr);
142 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
143 bVal = _mm256_castsi256_ps(exp);
145 _mm256_storeu_ps(bPtr, bVal);
150 number = eighthPoints * 8;
151 for(;number < num_points; number++){
152 *bPtr++ = expf(*aPtr++);
158 #ifdef LV_HAVE_SSE4_1
159 #include <smmintrin.h>
166 static inline void volk_32f_expfast_32f_u_sse4_1(
float* bVector,
const float* aVector,
unsigned int num_points){
168 float* bPtr = bVector;
169 const float* aPtr = aVector;
171 unsigned int number = 0;
172 const unsigned int quarterPoints = num_points / 4;
174 __m128 aVal, bVal, a, b;
176 a = _mm_set1_ps(
A/
Mln2);
177 b = _mm_set1_ps(
B-
C);
179 for(;number < quarterPoints; number++){
180 aVal = _mm_loadu_ps(aPtr);
181 exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
182 bVal = _mm_castsi128_ps(exp);
184 _mm_storeu_ps(bPtr, bVal);
189 number = quarterPoints * 4;
190 for(;number < num_points; number++){
191 *bPtr++ = expf(*aPtr++);
197 #ifdef LV_HAVE_GENERIC
204 static inline void volk_32f_expfast_32f_generic(
float* bVector,
const float* aVector,
unsigned int num_points){
205 float* bPtr = bVector;
206 const float* aPtr = aVector;
207 unsigned int number = 0;
209 for(number = 0; number < num_points; number++){
210 *bPtr++ = expf(*aPtr++);
#define B
Definition: volk_32f_expfast_32f.h:29
#define C
Definition: volk_32f_expfast_32f.h:30
#define Mln2
Definition: volk_32f_expfast_32f.h:27
#define A
Definition: volk_32f_expfast_32f.h:28