23 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
24 #define INCLUDED_volk_32f_s32f_power_32f_a_H
31 #include <tmmintrin.h>
33 #ifdef LV_HAVE_LIB_SIMDMATH
44 static inline void volk_32f_s32f_power_32f_a_sse4_1(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
45 unsigned int number = 0;
47 float* cPtr = cVector;
48 const float* aPtr = aVector;
50 #ifdef LV_HAVE_LIB_SIMDMATH
51 const unsigned int quarterPoints = num_points / 4;
52 __m128 vPower = _mm_set_ps1(power);
53 __m128 zeroValue = _mm_setzero_ps();
56 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
57 __m128 onesMask = _mm_set_ps1(1);
60 for(;number < quarterPoints; number++){
62 aVal = _mm_load_ps(aPtr);
63 signMask = _mm_cmplt_ps(aVal, zeroValue);
64 negatedValues = _mm_sub_ps(zeroValue, aVal);
65 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
68 cVal = powf4(aVal, vPower);
70 cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
72 _mm_store_ps(cPtr,cVal);
78 number = quarterPoints * 4;
81 for(;number < num_points; number++){
82 *cPtr++ = powf((*aPtr++), power);
88 #include <xmmintrin.h>
90 #ifdef LV_HAVE_LIB_SIMDMATH
101 static inline void volk_32f_s32f_power_32f_a_sse(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
102 unsigned int number = 0;
104 float* cPtr = cVector;
105 const float* aPtr = aVector;
107 #ifdef LV_HAVE_LIB_SIMDMATH
108 const unsigned int quarterPoints = num_points / 4;
109 __m128 vPower = _mm_set_ps1(power);
110 __m128 zeroValue = _mm_setzero_ps();
112 __m128 negatedValues;
113 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
114 __m128 onesMask = _mm_set_ps1(1);
117 for(;number < quarterPoints; number++){
119 aVal = _mm_load_ps(aPtr);
120 signMask = _mm_cmplt_ps(aVal, zeroValue);
121 negatedValues = _mm_sub_ps(zeroValue, aVal);
122 aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
125 cVal = powf4(aVal, vPower);
127 cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
129 _mm_store_ps(cPtr,cVal);
135 number = quarterPoints * 4;
138 for(;number < num_points; number++){
139 *cPtr++ = powf((*aPtr++), power);
144 #ifdef LV_HAVE_GENERIC
152 static inline void volk_32f_s32f_power_32f_generic(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
153 float* cPtr = cVector;
154 const float* aPtr = aVector;
155 unsigned int number = 0;
157 for(number = 0; number < num_points; number++){
158 *cPtr++ = powf((*aPtr++), power);