23 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
24 #define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
30 #include <immintrin.h>
37 static inline void volk_32fc_deinterleave_imag_32f_a_avx(
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
38 unsigned int number = 0;
39 const unsigned int eighthPoints = num_points / 8;
40 const float* complexVectorPtr = (
const float*)complexVector;
41 float* qBufferPtr = qBuffer;
43 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
44 for(;number < eighthPoints; number++){
46 cplxValue1 = _mm256_load_ps(complexVectorPtr);
47 complexVectorPtr += 8;
49 cplxValue2 = _mm256_load_ps(complexVectorPtr);
50 complexVectorPtr += 8;
52 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
53 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
56 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
58 _mm256_store_ps(qBufferPtr, qValue);
63 number = eighthPoints * 8;
64 for(; number < num_points; number++){
66 *qBufferPtr++ = *complexVectorPtr++;
72 #include <xmmintrin.h>
79 static inline void volk_32fc_deinterleave_imag_32f_a_sse(
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
80 unsigned int number = 0;
81 const unsigned int quarterPoints = num_points / 4;
83 const float* complexVectorPtr = (
const float*)complexVector;
84 float* qBufferPtr = qBuffer;
86 __m128 cplxValue1, cplxValue2, iValue;
87 for(;number < quarterPoints; number++){
89 cplxValue1 = _mm_load_ps(complexVectorPtr);
90 complexVectorPtr += 4;
92 cplxValue2 = _mm_load_ps(complexVectorPtr);
93 complexVectorPtr += 4;
96 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
98 _mm_store_ps(qBufferPtr, iValue);
103 number = quarterPoints * 4;
104 for(; number < num_points; number++){
106 *qBufferPtr++ = *complexVectorPtr++;
112 #include <arm_neon.h>
119 static inline void volk_32fc_deinterleave_imag_32f_neon(
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
120 unsigned int number = 0;
121 unsigned int quarter_points = num_points / 4;
122 const float* complexVectorPtr = (
float*)complexVector;
123 float* qBufferPtr = qBuffer;
124 float32x4x2_t complexInput;
126 for(number = 0; number < quarter_points; number++){
127 complexInput = vld2q_f32(complexVectorPtr);
128 vst1q_f32( qBufferPtr, complexInput.val[1] );
129 complexVectorPtr += 8;
133 for(number = quarter_points*4; number < num_points; number++){
135 *qBufferPtr++ = *complexVectorPtr++;
140 #ifdef LV_HAVE_GENERIC
147 static inline void volk_32fc_deinterleave_imag_32f_generic(
float* qBuffer,
const lv_32fc_t* complexVector,
unsigned int num_points){
148 unsigned int number = 0;
149 const float* complexVectorPtr = (
float*)complexVector;
150 float* qBufferPtr = qBuffer;
151 for(number = 0; number < num_points; number++){
153 *qBufferPtr++ = *complexVectorPtr++;
float complex lv_32fc_t
Definition: volk_complex.h:56