23 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
24 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
31 #include <smmintrin.h>
40 static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
const float scalar,
unsigned int num_points){
41 float* iBufferPtr = iBuffer;
42 float* qBufferPtr = qBuffer;
44 unsigned int number = 0;
45 const unsigned int eighthPoints = num_points / 8;
46 __m128 iFloatValue, qFloatValue;
48 const float iScalar= 1.0 / scalar;
49 __m128 invScalar = _mm_set_ps1(iScalar);
50 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
53 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
54 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
56 for(;number < eighthPoints; number++){
57 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
58 iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
59 qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
61 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
62 iFloatValue = _mm_cvtepi32_ps(iIntVal);
63 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
64 _mm_store_ps(iBufferPtr, iFloatValue);
67 iComplexVal = _mm_srli_si128(iComplexVal, 4);
69 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
70 iFloatValue = _mm_cvtepi32_ps(iIntVal);
71 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
72 _mm_store_ps(iBufferPtr, iFloatValue);
75 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
76 qFloatValue = _mm_cvtepi32_ps(qIntVal);
77 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
78 _mm_store_ps(qBufferPtr, qFloatValue);
81 qComplexVal = _mm_srli_si128(qComplexVal, 4);
83 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
84 qFloatValue = _mm_cvtepi32_ps(qIntVal);
85 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
86 _mm_store_ps(qBufferPtr, qFloatValue);
91 number = eighthPoints * 8;
92 for(; number < num_points; number++){
93 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
94 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
101 #include <xmmintrin.h>
110 static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
const float scalar,
unsigned int num_points){
111 float* iBufferPtr = iBuffer;
112 float* qBufferPtr = qBuffer;
114 unsigned int number = 0;
115 const unsigned int quarterPoints = num_points / 4;
116 __m128 cplxValue1, cplxValue2, iValue, qValue;
118 __m128 invScalar = _mm_set_ps1(1.0/scalar);
123 for(;number < quarterPoints; number++){
124 floatBuffer[0] = (float)(complexVectorPtr[0]);
125 floatBuffer[1] = (float)(complexVectorPtr[1]);
126 floatBuffer[2] = (float)(complexVectorPtr[2]);
127 floatBuffer[3] = (float)(complexVectorPtr[3]);
129 floatBuffer[4] = (float)(complexVectorPtr[4]);
130 floatBuffer[5] = (float)(complexVectorPtr[5]);
131 floatBuffer[6] = (float)(complexVectorPtr[6]);
132 floatBuffer[7] = (float)(complexVectorPtr[7]);
134 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
135 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
137 complexVectorPtr += 8;
139 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
140 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
143 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
144 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
146 _mm_store_ps(iBufferPtr, iValue);
147 _mm_store_ps(qBufferPtr, qValue);
153 number = quarterPoints * 4;
154 complexVectorPtr = (
int8_t*)&complexVector[number];
155 for(; number < num_points; number++){
156 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
157 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
162 #ifdef LV_HAVE_GENERIC
171 static inline void volk_8ic_s32f_deinterleave_32f_x2_generic(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
const float scalar,
unsigned int num_points){
172 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
173 float* iBufferPtr = iBuffer;
174 float* qBufferPtr = qBuffer;
176 const float invScalar = 1.0 / scalar;
177 for(number = 0; number < num_points; number++){
178 *iBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
179 *qBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
signed char int8_t
Definition: stdint.h:75
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52