GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_16u_byteswap.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_16u_byteswap_u_H
24 #define INCLUDED_volk_16u_byteswap_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE2
30 #include <emmintrin.h>
31 
32 /*!
33  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
34  \param intsToSwap The vector of data to byte swap
35  \param numDataPoints The number of data points
36 */
37 static inline void volk_16u_byteswap_u_sse2(uint16_t* intsToSwap, unsigned int num_points){
38  unsigned int number = 0;
39  uint16_t* inputPtr = intsToSwap;
40  __m128i input, left, right, output;
41 
42  const unsigned int eighthPoints = num_points / 8;
43  for(;number < eighthPoints; number++){
44  // Load the 16t values, increment inputPtr later since we're doing it in-place.
45  input = _mm_loadu_si128((__m128i*)inputPtr);
46  // Do the two shifts
47  left = _mm_slli_epi16(input, 8);
48  right = _mm_srli_epi16(input, 8);
49  // Or the left and right halves together
50  output = _mm_or_si128(left, right);
51  // Store the results
52  _mm_storeu_si128((__m128i*)inputPtr, output);
53  inputPtr += 8;
54  }
55 
56  // Byteswap any remaining points:
57  number = eighthPoints*8;
58  for(; number < num_points; number++){
59  uint16_t outputVal = *inputPtr;
60  outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
61  *inputPtr = outputVal;
62  inputPtr++;
63  }
64 }
65 #endif /* LV_HAVE_SSE2 */
66 
67 #ifdef LV_HAVE_GENERIC
68 /*!
69  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
70  \param intsToSwap The vector of data to byte swap
71  \param numDataPoints The number of data points
72 */
73 static inline void volk_16u_byteswap_generic(uint16_t* intsToSwap, unsigned int num_points){
74  unsigned int point;
75  uint16_t* inputPtr = intsToSwap;
76  for(point = 0; point < num_points; point++){
77  uint16_t output = *inputPtr;
78  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
79  *inputPtr = output;
80  inputPtr++;
81  }
82 }
83 #endif /* LV_HAVE_GENERIC */
84 
85 #endif /* INCLUDED_volk_16u_byteswap_u_H */
86 #ifndef INCLUDED_volk_16u_byteswap_a_H
87 #define INCLUDED_volk_16u_byteswap_a_H
88 
89 #include <inttypes.h>
90 #include <stdio.h>
91 
92 #ifdef LV_HAVE_SSE2
93 #include <emmintrin.h>
94 
95 /*!
96  \brief Byteswaps (in-place) an aligned vector of int16_t's.
97  \param intsToSwap The vector of data to byte swap
98  \param numDataPoints The number of data points
99 */
100 static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int num_points){
101  unsigned int number = 0;
102  uint16_t* inputPtr = intsToSwap;
103  __m128i input, left, right, output;
104 
105  const unsigned int eighthPoints = num_points / 8;
106  for(;number < eighthPoints; number++){
107  // Load the 16t values, increment inputPtr later since we're doing it in-place.
108  input = _mm_load_si128((__m128i*)inputPtr);
109  // Do the two shifts
110  left = _mm_slli_epi16(input, 8);
111  right = _mm_srli_epi16(input, 8);
112  // Or the left and right halves together
113  output = _mm_or_si128(left, right);
114  // Store the results
115  _mm_store_si128((__m128i*)inputPtr, output);
116  inputPtr += 8;
117  }
118 
119 
120  // Byteswap any remaining points:
121  number = eighthPoints*8;
122  for(; number < num_points; number++){
123  uint16_t outputVal = *inputPtr;
124  outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
125  *inputPtr = outputVal;
126  inputPtr++;
127  }
128 }
129 #endif /* LV_HAVE_SSE2 */
130 
131 #ifdef LV_HAVE_NEON
132 #include <arm_neon.h>
133 /*!
134  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
135  \param intsToSwap The vector of data to byte swap
136  \param numDataPoints The number of data points
137 */
138 static inline void volk_16u_byteswap_neon(uint16_t* intsToSwap, unsigned int num_points){
139  unsigned int number;
140  unsigned int eighth_points = num_points / 8;
141  uint16x8_t input, output;
142  uint16_t* inputPtr = intsToSwap;
143 
144  for(number = 0; number < eighth_points; number++) {
145  input = vld1q_u16(inputPtr);
146  output = vsriq_n_u16(output, input, 8);
147  output = vsliq_n_u16(output, input, 8);
148  vst1q_u16(inputPtr, output);
149  inputPtr += 8;
150  }
151 
152  for(number = eighth_points * 8; number < num_points; number++){
153  uint16_t output = *inputPtr;
154  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
155  *inputPtr = output;
156  inputPtr++;
157  }
158 }
159 #endif /* LV_HAVE_NEON */
160 
161 #ifdef LV_HAVE_NEON
162 #include <arm_neon.h>
163 /*!
164  \brief Byteswaps (in-place) an aligned vector of int32_t's.
165  \param intsToSwap The vector of data to byte swap
166  \param numDataPoints The number of data points
167 */
168 static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap, unsigned int num_points){
169  uint16_t* inputPtr = intsToSwap;
170  unsigned int number = 0;
171  unsigned int n16points = num_points / 16;
172 
173  uint8x8x4_t input_table;
174  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
175  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
176 
177  /* these magic numbers are used as byte-indeces in the LUT.
178  they are pre-computed to save time. A simple C program
179  can calculate them; for example for lookup01:
180  uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
181  for(ii=0; ii < 8; ++ii) {
182  index += ((uint64_t)(*(chars+ii))) << (ii*8);
183  }
184  */
185  int_lookup01 = vcreate_u8(1232017111498883080);
186  int_lookup23 = vcreate_u8(1376697457175036426);
187  int_lookup45 = vcreate_u8(1521377802851189772);
188  int_lookup67 = vcreate_u8(1666058148527343118);
189 
190  for(number = 0; number < n16points; ++number){
191  input_table = vld4_u8((uint8_t*) inputPtr);
192  swapped_int01 = vtbl4_u8(input_table, int_lookup01);
193  swapped_int23 = vtbl4_u8(input_table, int_lookup23);
194  swapped_int45 = vtbl4_u8(input_table, int_lookup45);
195  swapped_int67 = vtbl4_u8(input_table, int_lookup67);
196  vst1_u8((uint8_t*)inputPtr, swapped_int01);
197  vst1_u8((uint8_t*)(inputPtr+4), swapped_int23);
198  vst1_u8((uint8_t*)(inputPtr+8), swapped_int45);
199  vst1_u8((uint8_t*)(inputPtr+12), swapped_int67);
200 
201  inputPtr += 16;
202  }
203 
204  for(number = n16points * 16; number < num_points; ++number){
205  uint16_t output = *inputPtr;
206  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
207  *inputPtr = output;
208  inputPtr++;
209  }
210 }
211 #endif /* LV_HAVE_NEON */
212 
213 #ifdef LV_HAVE_GENERIC
214 /*!
215  \brief Byteswaps (in-place) an aligned vector of int16_t's.
216  \param intsToSwap The vector of data to byte swap
217  \param numDataPoints The number of data points
218 */
219 static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned int num_points){
220  unsigned int point;
221  uint16_t* inputPtr = intsToSwap;
222  for(point = 0; point < num_points; point++){
223  uint16_t output = *inputPtr;
224  output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
225  *inputPtr = output;
226  inputPtr++;
227  }
228 }
229 #endif /* LV_HAVE_GENERIC */
230 
231 #ifdef LV_HAVE_ORC
232 /*!
233  \brief Byteswaps (in-place) an aligned vector of int16_t's.
234  \param intsToSwap The vector of data to byte swap
235  \param numDataPoints The number of data points
236 */
237 extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int num_points);
238 static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int num_points){
239  volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
240 }
241 #endif /* LV_HAVE_ORC */
242 
243 
244 #endif /* INCLUDED_volk_16u_byteswap_a_H */
unsigned short uint16_t
Definition: stdint.h:79
unsigned char uint8_t
Definition: stdint.h:78