23 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
24 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
32 #include <pmmintrin.h>
40 static inline void volk_32fc_s32f_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
41 unsigned int number = 0;
42 const unsigned int quarterPoints = num_points / 4;
44 const float* complexVectorPtr = (
const float*)complexVector;
45 int16_t* magnitudeVectorPtr = magnitudeVector;
47 __m128 vScalar = _mm_set_ps1(scalar);
49 __m128 cplxValue1, cplxValue2, result;
53 for(;number < quarterPoints; number++){
54 cplxValue1 = _mm_load_ps(complexVectorPtr);
55 complexVectorPtr += 4;
57 cplxValue2 = _mm_load_ps(complexVectorPtr);
58 complexVectorPtr += 4;
60 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
61 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
63 result = _mm_hadd_ps(cplxValue1, cplxValue2);
65 result = _mm_sqrt_ps(result);
67 result = _mm_mul_ps(result, vScalar);
69 _mm_store_ps(floatBuffer, result);
70 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
71 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
72 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
73 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
76 number = quarterPoints * 4;
77 magnitudeVectorPtr = &magnitudeVector[number];
78 for(; number < num_points; number++){
79 float val1Real = *complexVectorPtr++;
80 float val1Imag = *complexVectorPtr++;
81 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
87 #include <xmmintrin.h>
95 static inline void volk_32fc_s32f_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
96 unsigned int number = 0;
97 const unsigned int quarterPoints = num_points / 4;
99 const float* complexVectorPtr = (
const float*)complexVector;
100 int16_t* magnitudeVectorPtr = magnitudeVector;
102 __m128 vScalar = _mm_set_ps1(scalar);
104 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
108 for(;number < quarterPoints; number++){
109 cplxValue1 = _mm_load_ps(complexVectorPtr);
110 complexVectorPtr += 4;
112 cplxValue2 = _mm_load_ps(complexVectorPtr);
113 complexVectorPtr += 4;
116 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
118 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
120 iValue = _mm_mul_ps(iValue, iValue);
121 qValue = _mm_mul_ps(qValue, qValue);
123 result = _mm_add_ps(iValue, qValue);
125 result = _mm_sqrt_ps(result);
127 result = _mm_mul_ps(result, vScalar);
129 _mm_store_ps(floatBuffer, result);
130 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
131 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
132 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
133 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
136 number = quarterPoints * 4;
137 magnitudeVectorPtr = &magnitudeVector[number];
138 for(; number < num_points; number++){
139 float val1Real = *complexVectorPtr++;
140 float val1Imag = *complexVectorPtr++;
141 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
146 #ifdef LV_HAVE_GENERIC
154 static inline void volk_32fc_s32f_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
155 const float* complexVectorPtr = (
float*)complexVector;
156 int16_t* magnitudeVectorPtr = magnitudeVector;
157 unsigned int number = 0;
158 for(number = 0; number < num_points; number++){
159 const float real = *complexVectorPtr++;
160 const float imag = *complexVectorPtr++;
161 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
174 extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points);
175 static inline void volk_32fc_s32f_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
const float scalar,
unsigned int num_points){
176 volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
float complex lv_32fc_t
Definition: volk_complex.h:56