23 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
24 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
32 #include <pmmintrin.h>
40 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
41 unsigned int number = 0;
42 const unsigned int quarterPoints = num_points / 4;
45 float* magnitudeVectorPtr = magnitudeVector;
47 __m128 invScalar = _mm_set_ps1(1.0/scalar);
49 __m128 cplxValue1, cplxValue2, result;
53 for(;number < quarterPoints; number++){
55 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
56 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
57 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
58 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
60 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
61 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
62 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
63 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
65 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
66 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
68 complexVectorPtr += 8;
70 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
71 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
73 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
74 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
76 result = _mm_hadd_ps(cplxValue1, cplxValue2);
78 result = _mm_sqrt_ps(result);
80 _mm_store_ps(magnitudeVectorPtr, result);
82 magnitudeVectorPtr += 4;
85 number = quarterPoints * 4;
86 magnitudeVectorPtr = &magnitudeVector[number];
87 complexVectorPtr = (
const int16_t*)&complexVector[number];
88 for(; number < num_points; number++){
89 float val1Real = (float)(*complexVectorPtr++) / scalar;
90 float val1Imag = (float)(*complexVectorPtr++) / scalar;
91 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
97 #include <xmmintrin.h>
105 static inline void volk_16ic_s32f_magnitude_32f_a_sse(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
106 unsigned int number = 0;
107 const unsigned int quarterPoints = num_points / 4;
110 float* magnitudeVectorPtr = magnitudeVector;
112 const float iScalar = 1.0 / scalar;
113 __m128 invScalar = _mm_set_ps1(iScalar);
115 __m128 cplxValue1, cplxValue2, result, re, im;
119 for(;number < quarterPoints; number++){
120 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
121 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
122 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
123 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
125 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
126 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
127 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
128 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
130 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
131 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
133 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
134 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
136 complexVectorPtr += 8;
138 cplxValue1 = _mm_mul_ps(re, invScalar);
139 cplxValue2 = _mm_mul_ps(im, invScalar);
141 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
142 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
144 result = _mm_add_ps(cplxValue1, cplxValue2);
146 result = _mm_sqrt_ps(result);
148 _mm_store_ps(magnitudeVectorPtr, result);
150 magnitudeVectorPtr += 4;
153 number = quarterPoints * 4;
154 magnitudeVectorPtr = &magnitudeVector[number];
155 complexVectorPtr = (
const int16_t*)&complexVector[number];
156 for(; number < num_points; number++){
157 float val1Real = (float)(*complexVectorPtr++) * iScalar;
158 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
159 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
166 #ifdef LV_HAVE_GENERIC
174 static inline void volk_16ic_s32f_magnitude_32f_generic(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
176 float* magnitudeVectorPtr = magnitudeVector;
177 unsigned int number = 0;
178 const float invScalar = 1.0 / scalar;
179 for(number = 0; number < num_points; number++){
180 float real = ( (float) (*complexVectorPtr++)) * invScalar;
181 float imag = ( (float) (*complexVectorPtr++)) * invScalar;
182 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
187 #ifdef LV_HAVE_ORC_DISABLED
195 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points);
196 static inline void volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
197 volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27