GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_s32f_power_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
24 #define INCLUDED_volk_32f_s32f_power_32f_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <math.h>
29 
30 #ifdef LV_HAVE_SSE4_1
31 #include <tmmintrin.h>
32 
33 #ifdef LV_HAVE_LIB_SIMDMATH
34 #include <simdmath.h>
35 #endif /* LV_HAVE_LIB_SIMDMATH */
36 
37 /*!
38  \brief Takes each the input vector value to the specified power and stores the results in the return vector
39  \param cVector The vector where the results will be stored
40  \param aVector The vector of values to be taken to a power
41  \param power The power value to be applied to each data point
42  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
43 */
44 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
45  unsigned int number = 0;
46 
47  float* cPtr = cVector;
48  const float* aPtr = aVector;
49 
50 #ifdef LV_HAVE_LIB_SIMDMATH
51  const unsigned int quarterPoints = num_points / 4;
52  __m128 vPower = _mm_set_ps1(power);
53  __m128 zeroValue = _mm_setzero_ps();
54  __m128 signMask;
55  __m128 negatedValues;
56  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
57  __m128 onesMask = _mm_set_ps1(1);
58 
59  __m128 aVal, cVal;
60  for(;number < quarterPoints; number++){
61 
62  aVal = _mm_load_ps(aPtr);
63  signMask = _mm_cmplt_ps(aVal, zeroValue);
64  negatedValues = _mm_sub_ps(zeroValue, aVal);
65  aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
66 
67  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
68  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
69 
70  cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
71 
72  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
73 
74  aPtr += 4;
75  cPtr += 4;
76  }
77 
78  number = quarterPoints * 4;
79 #endif /* LV_HAVE_LIB_SIMDMATH */
80 
81  for(;number < num_points; number++){
82  *cPtr++ = powf((*aPtr++), power);
83  }
84 }
85 #endif /* LV_HAVE_SSE4_1 */
86 
87 #ifdef LV_HAVE_SSE
88 #include <xmmintrin.h>
89 
90 #ifdef LV_HAVE_LIB_SIMDMATH
91 #include <simdmath.h>
92 #endif /* LV_HAVE_LIB_SIMDMATH */
93 
94 /*!
95  \brief Takes each the input vector value to the specified power and stores the results in the return vector
96  \param cVector The vector where the results will be stored
97  \param aVector The vector of values to be taken to a power
98  \param power The power value to be applied to each data point
99  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
100 */
101 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
102  unsigned int number = 0;
103 
104  float* cPtr = cVector;
105  const float* aPtr = aVector;
106 
107 #ifdef LV_HAVE_LIB_SIMDMATH
108  const unsigned int quarterPoints = num_points / 4;
109  __m128 vPower = _mm_set_ps1(power);
110  __m128 zeroValue = _mm_setzero_ps();
111  __m128 signMask;
112  __m128 negatedValues;
113  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
114  __m128 onesMask = _mm_set_ps1(1);
115 
116  __m128 aVal, cVal;
117  for(;number < quarterPoints; number++){
118 
119  aVal = _mm_load_ps(aPtr);
120  signMask = _mm_cmplt_ps(aVal, zeroValue);
121  negatedValues = _mm_sub_ps(zeroValue, aVal);
122  aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
123 
124  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
125  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
126 
127  cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
128 
129  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
130 
131  aPtr += 4;
132  cPtr += 4;
133  }
134 
135  number = quarterPoints * 4;
136 #endif /* LV_HAVE_LIB_SIMDMATH */
137 
138  for(;number < num_points; number++){
139  *cPtr++ = powf((*aPtr++), power);
140  }
141 }
142 #endif /* LV_HAVE_SSE */
143 
144 #ifdef LV_HAVE_GENERIC
145  /*!
146  \brief Takes each the input vector value to the specified power and stores the results in the return vector
147  \param cVector The vector where the results will be stored
148  \param aVector The vector of values to be taken to a power
149  \param power The power value to be applied to each data point
150  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
151  */
152 static inline void volk_32f_s32f_power_32f_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
153  float* cPtr = cVector;
154  const float* aPtr = aVector;
155  unsigned int number = 0;
156 
157  for(number = 0; number < num_points; number++){
158  *cPtr++ = powf((*aPtr++), power);
159  }
160 }
161 #endif /* LV_HAVE_GENERIC */
162 
163 
164 
165 
166 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */