23 #ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
24 #define INCLUDED_volk_16ic_magnitude_16i_a_H
32 #include <pmmintrin.h>
39 static inline void volk_16ic_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
40 unsigned int number = 0;
41 const unsigned int quarterPoints = num_points / 4;
44 int16_t* magnitudeVectorPtr = magnitudeVector;
46 __m128 vScalar = _mm_set_ps1(32768.0);
47 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
49 __m128 cplxValue1, cplxValue2, result;
54 for(;number < quarterPoints; number++){
56 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
57 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
58 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
59 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
61 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
62 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
63 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
64 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
66 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
67 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
69 complexVectorPtr += 8;
71 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
72 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
74 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
75 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
77 result = _mm_hadd_ps(cplxValue1, cplxValue2);
79 result = _mm_sqrt_ps(result);
81 result = _mm_mul_ps(result, vScalar);
83 _mm_store_ps(outputFloatBuffer, result);
84 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
85 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
86 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
87 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
90 number = quarterPoints * 4;
91 magnitudeVectorPtr = &magnitudeVector[number];
92 complexVectorPtr = (
const int16_t*)&complexVector[number];
93 for(; number < num_points; number++){
94 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
95 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
96 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
97 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
103 #include <xmmintrin.h>
110 static inline void volk_16ic_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
111 unsigned int number = 0;
112 const unsigned int quarterPoints = num_points / 4;
115 int16_t* magnitudeVectorPtr = magnitudeVector;
117 __m128 vScalar = _mm_set_ps1(32768.0);
118 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
120 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
125 for(;number < quarterPoints; number++){
127 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
128 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
129 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
130 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
132 cplxValue1 = _mm_load_ps(inputFloatBuffer);
133 complexVectorPtr += 4;
135 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
136 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
137 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
138 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
140 cplxValue2 = _mm_load_ps(inputFloatBuffer);
141 complexVectorPtr += 4;
143 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
144 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
147 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
149 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
151 iValue = _mm_mul_ps(iValue, iValue);
152 qValue = _mm_mul_ps(qValue, qValue);
154 result = _mm_add_ps(iValue, qValue);
156 result = _mm_sqrt_ps(result);
158 result = _mm_mul_ps(result, vScalar);
160 _mm_store_ps(outputFloatBuffer, result);
161 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
162 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
163 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
164 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
167 number = quarterPoints * 4;
168 magnitudeVectorPtr = &magnitudeVector[number];
169 complexVectorPtr = (
const int16_t*)&complexVector[number];
170 for(; number < num_points; number++){
171 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
172 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
173 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
174 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
179 #ifdef LV_HAVE_GENERIC
186 static inline void volk_16ic_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
188 int16_t* magnitudeVectorPtr = magnitudeVector;
189 unsigned int number = 0;
190 const float scalar = 32768.0;
191 for(number = 0; number < num_points; number++){
192 float real = ((float)(*complexVectorPtr++)) / scalar;
193 float imag = ((float)(*complexVectorPtr++)) / scalar;
194 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
199 #ifdef LV_HAVE_ORC_DISABLED
206 extern void volk_16ic_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
float scalar,
unsigned int num_points);
207 static inline void volk_16ic_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points){
208 volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27