23 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
24 #define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
30 #include <tmmintrin.h>
38 static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points){
39 unsigned int number = 0;
44 __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
45 __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
47 __m128i qMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
48 __m128i qMoveMask2 = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
50 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
52 unsigned int eighthPoints = num_points / 8;
54 for(number = 0; number < eighthPoints; number++){
55 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
56 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
58 iOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, iMoveMask1) , _mm_shuffle_epi8(complexVal2, iMoveMask2));
59 qOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, qMoveMask1) , _mm_shuffle_epi8(complexVal2, qMoveMask2));
61 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
62 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
68 number = eighthPoints * 8;
70 for(; number < num_points; number++){
71 *iBufferPtr++ = *int16ComplexVectorPtr++;
72 *qBufferPtr++ = *int16ComplexVectorPtr++;
78 #include <emmintrin.h>
86 static inline void volk_16ic_deinterleave_16i_x2_a_sse2(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points){
87 unsigned int number = 0;
91 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1, qComplexVal2, iOutputVal, qOutputVal;
92 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
93 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
95 unsigned int eighthPoints = num_points / 8;
97 for(number = 0; number < eighthPoints; number++){
98 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
99 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
101 iComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
103 iComplexVal1 = _mm_shufflehi_epi16(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
105 iComplexVal1 = _mm_shuffle_epi32(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
107 iComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
109 iComplexVal2 = _mm_shufflehi_epi16(iComplexVal2, _MM_SHUFFLE(3,1,2,0));
111 iComplexVal2 = _mm_shuffle_epi32(iComplexVal2, _MM_SHUFFLE(2,0,3,1));
113 iOutputVal = _mm_or_si128(_mm_and_si128(iComplexVal1, lowMask), _mm_and_si128(iComplexVal2, highMask));
115 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
117 qComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(2,0,3,1));
119 qComplexVal1 = _mm_shufflehi_epi16(qComplexVal1, _MM_SHUFFLE(2,0,3,1));
121 qComplexVal1 = _mm_shuffle_epi32(qComplexVal1, _MM_SHUFFLE(3,1,2,0));
123 qComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(2,0,3,1));
125 qComplexVal2 = _mm_shufflehi_epi16(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
127 qComplexVal2 = _mm_shuffle_epi32(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
129 qOutputVal = _mm_or_si128(_mm_and_si128(qComplexVal1, lowMask), _mm_and_si128(qComplexVal2, highMask));
131 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
137 number = eighthPoints * 8;
138 for(; number < num_points; number++){
139 *iBufferPtr++ = *complexVectorPtr++;
140 *qBufferPtr++ = *complexVectorPtr++;
145 #ifdef LV_HAVE_GENERIC
153 static inline void volk_16ic_deinterleave_16i_x2_generic(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points){
158 for(number = 0; number < num_points; number++){
159 *iBufferPtr++ = *complexVectorPtr++;
160 *qBufferPtr++ = *complexVectorPtr++;
173 extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points);
174 static inline void volk_16ic_deinterleave_16i_x2_u_orc(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points){
175 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75