23 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
24 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
30 #include <smmintrin.h>
37 static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
38 unsigned int number = 0;
41 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
42 __m128i complexVal, outputVal;
44 unsigned int eighthPoints = num_points / 8;
46 for(number = 0; number < eighthPoints; number++){
47 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
49 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
51 outputVal = _mm_cvtepi8_epi16(complexVal);
52 outputVal = _mm_slli_epi16(outputVal, 7);
54 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
58 number = eighthPoints * 8;
59 for(; number < num_points; number++){
60 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 128;
67 #include <immintrin.h>
74 static inline void volk_8ic_deinterleave_real_16i_a_avx(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
75 unsigned int number = 0;
78 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
79 __m256i complexVal, outputVal;
80 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
82 unsigned int sixteenthPoints = num_points / 16;
84 for(number = 0; number < sixteenthPoints; number++){
85 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
87 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
88 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
90 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
91 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
93 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
94 outputVal1 = _mm_slli_epi16(outputVal1, 7);
95 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
96 outputVal0 = _mm_slli_epi16(outputVal0, 7);
98 __m256i dummy = _mm256_setzero_si256();
99 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
100 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
101 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
106 number = sixteenthPoints * 16;
107 for(; number < num_points; number++){
108 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 128;
115 #ifdef LV_HAVE_GENERIC
122 static inline void volk_8ic_deinterleave_real_16i_generic(
int16_t* iBuffer,
const lv_8sc_t* complexVector,
unsigned int num_points){
123 unsigned int number = 0;
124 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
126 for(number = 0; number < num_points; number++){
127 *iBufferPtr++ = ((
int16_t)(*complexVectorPtr++)) * 128;
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52