23 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
24 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
30 #include <smmintrin.h>
39 static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
40 unsigned int number = 0;
44 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
45 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
46 __m128i complexVal, iOutputVal, qOutputVal;
48 unsigned int eighthPoints = num_points / 8;
50 for(number = 0; number < eighthPoints; number++){
51 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
53 iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask);
54 qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
56 iOutputVal = _mm_cvtepi8_epi16(iOutputVal);
57 iOutputVal = _mm_slli_epi16(iOutputVal, 8);
59 qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
60 qOutputVal = _mm_slli_epi16(qOutputVal, 8);
62 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
63 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
69 number = eighthPoints * 8;
70 for(; number < num_points; number++){
71 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
72 *qBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
78 #include <immintrin.h>
86 static inline void volk_8ic_deinterleave_16i_x2_a_avx(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
87 unsigned int number = 0;
91 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
92 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
93 __m256i complexVal, iOutputVal, qOutputVal;
94 __m128i complexVal1, complexVal0;
95 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
97 unsigned int sixteenthPoints = num_points / 16;
99 for(number = 0; number < sixteenthPoints; number++){
100 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
103 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
104 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
106 iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask);
107 iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
108 qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
109 qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
111 iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1);
112 iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8);
113 iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
114 iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
116 qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
117 qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
118 qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
119 qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
122 __m256i dummy = _mm256_setzero_si256();
123 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
124 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
125 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
126 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
128 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
129 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
135 number = sixteenthPoints * 16;
136 for(; number < num_points; number++){
137 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
138 *qBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
143 #ifdef LV_HAVE_GENERIC
151 static inline void volk_8ic_deinterleave_16i_x2_generic(
int16_t* iBuffer,
int16_t* qBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
152 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
156 for(number = 0; number < num_points; number++){
157 *iBufferPtr++ = (
int16_t)(*complexVectorPtr++)*256;
158 *qBufferPtr++ = (
int16_t)(*complexVectorPtr++)*256;
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52