GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_8ic_deinterleave_16i_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
24 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE4_1
30 #include <smmintrin.h>
31 /*!
32  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
33  \param complexVector The complex input vector
34  \param iBuffer The I buffer output data
35  \param qBuffer The Q buffer output data
36  \param num_points The number of complex data values to be deinterleaved
37 */
38 
39 static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
40  unsigned int number = 0;
41  const int8_t* complexVectorPtr = (int8_t*)complexVector;
42  int16_t* iBufferPtr = iBuffer;
43  int16_t* qBufferPtr = qBuffer;
44  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); // set 16 byte values
45  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
46  __m128i complexVal, iOutputVal, qOutputVal;
47 
48  unsigned int eighthPoints = num_points / 8;
49 
50  for(number = 0; number < eighthPoints; number++){
51  complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16; // aligned load
52 
53  iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask); // shuffle 16 bytes of 128bit complexVal
54  qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
55 
56  iOutputVal = _mm_cvtepi8_epi16(iOutputVal); // fills 2-byte sign extended versions of lower 8 bytes of input to output
57  iOutputVal = _mm_slli_epi16(iOutputVal, 8); // shift in left by 8 bits, each of the 8 16-bit integers, shift in with zeros
58 
59  qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
60  qOutputVal = _mm_slli_epi16(qOutputVal, 8);
61 
62  _mm_store_si128((__m128i*)iBufferPtr, iOutputVal); // aligned store
63  _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
64 
65  iBufferPtr += 8;
66  qBufferPtr += 8;
67  }
68 
69  number = eighthPoints * 8;
70  for(; number < num_points; number++){
71  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256; // load 8 bit Complexvector into 16 bit, shift left by 8 bits and store
72  *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
73  }
74 }
75 #endif /* LV_HAVE_SSE4_1 */
76 
77 #ifdef LV_HAVE_AVX
78 #include <immintrin.h>
79 /*!
80  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
81  \param complexVector The complex input vector
82  \param iBuffer The I buffer output data
83  \param qBuffer The Q buffer output data
84  \param num_points The number of complex data values to be deinterleaved
85 */
86 static inline void volk_8ic_deinterleave_16i_x2_a_avx(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
87  unsigned int number = 0;
88  const int8_t* complexVectorPtr = (int8_t*)complexVector;
89  int16_t* iBufferPtr = iBuffer;
90  int16_t* qBufferPtr = qBuffer;
91  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); // set 16 byte values
92  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
93  __m256i complexVal, iOutputVal, qOutputVal;
94  __m128i complexVal1, complexVal0;
95  __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
96 
97  unsigned int sixteenthPoints = num_points / 16;
98 
99  for(number = 0; number < sixteenthPoints; number++){
100  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32; // aligned load
101 
102  // Extract from complexVal to iOutputVal and qOutputVal
103  complexVal1 = _mm256_extractf128_si256(complexVal, 1);
104  complexVal0 = _mm256_extractf128_si256(complexVal, 0);
105 
106  iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask); // shuffle 16 bytes of 128bit complexVal
107  iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
108  qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
109  qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
110 
111  iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1); // fills 2-byte sign extended versions of lower 8 bytes of input to output
112  iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8); // shift in left by 8 bits, each of the 8 16-bit integers, shift in with zeros
113  iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
114  iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
115 
116  qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
117  qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
118  qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
119  qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
120 
121  // Pack iOutputVal0,1 to iOutputVal
122  __m256i dummy = _mm256_setzero_si256();
123  iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
124  iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
125  qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
126  qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
127 
128  _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal); // aligned store
129  _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
130 
131  iBufferPtr += 16;
132  qBufferPtr += 16;
133  }
134 
135  number = sixteenthPoints * 16;
136  for(; number < num_points; number++){
137  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256; // load 8 bit Complexvector into 16 bit, shift left by 8 bits and store
138  *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
139  }
140 }
141 #endif /* LV_HAVE_AVX */
142 
143 #ifdef LV_HAVE_GENERIC
144 /*!
145  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
146  \param complexVector The complex input vector
147  \param iBuffer The I buffer output data
148  \param qBuffer The Q buffer output data
149  \param num_points The number of complex data values to be deinterleaved
150 */
151 static inline void volk_8ic_deinterleave_16i_x2_generic(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
152  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
153  int16_t* iBufferPtr = iBuffer;
154  int16_t* qBufferPtr = qBuffer;
155  unsigned int number;
156  for(number = 0; number < num_points; number++){
157  *iBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
158  *qBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
159  }
160 }
161 #endif /* LV_HAVE_GENERIC */
162 
163 
164 
165 
166 #endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52