23 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
24 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
31 #include <smmintrin.h>
39 static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
40 float* iBufferPtr = iBuffer;
42 unsigned int number = 0;
43 const unsigned int quarterPoints = num_points / 4;
47 const float iScalar= 1.0 / scalar;
48 __m128 invScalar = _mm_set_ps1(iScalar);
49 __m128i complexVal, iIntVal;
52 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
54 for(;number < quarterPoints; number++){
55 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
56 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
58 iIntVal = _mm_cvtepi16_epi32(complexVal);
59 iFloatValue = _mm_cvtepi32_ps(iIntVal);
61 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
63 _mm_store_ps(iBufferPtr, iFloatValue);
68 number = quarterPoints * 4;
69 int16_t* sixteenTComplexVectorPtr = (
int16_t*)&complexVector[number];
70 for(; number < num_points; number++){
71 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
72 sixteenTComplexVectorPtr++;
79 #include <xmmintrin.h>
87 static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(
float* iBuffer,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
88 float* iBufferPtr = iBuffer;
90 unsigned int number = 0;
91 const unsigned int quarterPoints = num_points / 4;
94 const float iScalar = 1.0/scalar;
95 __m128 invScalar = _mm_set_ps1(iScalar);
100 for(;number < quarterPoints; number++){
101 floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
102 floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
103 floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
104 floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
106 iValue = _mm_load_ps(floatBuffer);
108 iValue = _mm_mul_ps(iValue, invScalar);
110 _mm_store_ps(iBufferPtr, iValue);
115 number = quarterPoints * 4;
116 complexVectorPtr = (
int16_t*)&complexVector[number];
117 for(; number < num_points; number++){
118 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
125 #ifdef LV_HAVE_GENERIC
133 static inline void volk_16ic_s32f_deinterleave_real_32f_generic(
float* iBuffer,
const lv_16sc_t* complexVector,
const float scalar,
unsigned int num_points){
134 unsigned int number = 0;
136 float* iBufferPtr = iBuffer;
137 const float invScalar = 1.0 / scalar;
138 for(number = 0; number < num_points; number++){
139 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27