23 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
24 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
30 #include <immintrin.h>
38 static inline void volk_32fc_deinterleave_32f_x2_a_avx(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
39 const float* complexVectorPtr = (
float*)complexVector;
40 float* iBufferPtr = iBuffer;
41 float* qBufferPtr = qBuffer;
43 unsigned int number = 0;
45 const unsigned int eighthPoints = num_points / 8;
46 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
47 for(;number < eighthPoints; number++){
49 cplxValue1 = _mm256_load_ps(complexVectorPtr);
50 complexVectorPtr += 8;
52 cplxValue2 = _mm256_load_ps(complexVectorPtr);
53 complexVectorPtr += 8;
55 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
56 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
59 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
61 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
63 _mm256_store_ps(iBufferPtr, iValue);
64 _mm256_store_ps(qBufferPtr, qValue);
70 number = eighthPoints * 8;
71 for(; number < num_points; number++){
72 *iBufferPtr++ = *complexVectorPtr++;
73 *qBufferPtr++ = *complexVectorPtr++;
79 #include <xmmintrin.h>
87 static inline void volk_32fc_deinterleave_32f_x2_a_sse(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
88 const float* complexVectorPtr = (
float*)complexVector;
89 float* iBufferPtr = iBuffer;
90 float* qBufferPtr = qBuffer;
92 unsigned int number = 0;
93 const unsigned int quarterPoints = num_points / 4;
94 __m128 cplxValue1, cplxValue2, iValue, qValue;
95 for(;number < quarterPoints; number++){
97 cplxValue1 = _mm_load_ps(complexVectorPtr);
98 complexVectorPtr += 4;
100 cplxValue2 = _mm_load_ps(complexVectorPtr);
101 complexVectorPtr += 4;
104 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
106 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
108 _mm_store_ps(iBufferPtr, iValue);
109 _mm_store_ps(qBufferPtr, qValue);
115 number = quarterPoints * 4;
116 for(; number < num_points; number++){
117 *iBufferPtr++ = *complexVectorPtr++;
118 *qBufferPtr++ = *complexVectorPtr++;
124 #include <arm_neon.h>
132 static inline void volk_32fc_deinterleave_32f_x2_neon(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
133 unsigned int number = 0;
134 unsigned int quarter_points = num_points / 4;
135 const float* complexVectorPtr = (
float*)complexVector;
136 float* iBufferPtr = iBuffer;
137 float* qBufferPtr = qBuffer;
138 float32x4x2_t complexInput;
140 for(number = 0; number < quarter_points; number++){
141 complexInput = vld2q_f32(complexVectorPtr);
142 vst1q_f32( iBufferPtr, complexInput.val[0] );
143 vst1q_f32( qBufferPtr, complexInput.val[1] );
144 complexVectorPtr += 8;
149 for(number = quarter_points*4; number < num_points; number++){
150 *iBufferPtr++ = *complexVectorPtr++;
151 *qBufferPtr++ = *complexVectorPtr++;
156 #ifdef LV_HAVE_GENERIC
164 static inline void volk_32fc_deinterleave_32f_x2_generic(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
165 const float* complexVectorPtr = (
float*)complexVector;
166 float* iBufferPtr = iBuffer;
167 float* qBufferPtr = qBuffer;
169 for(number = 0; number < num_points; number++){
170 *iBufferPtr++ = *complexVectorPtr++;
171 *qBufferPtr++ = *complexVectorPtr++;
float complex lv_32fc_t
Definition: volk_complex.h:56