GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_64u_byteswap.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_64u_byteswap_u_H
24 #define INCLUDED_volk_64u_byteswap_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE2
30 #include <emmintrin.h>
31 
32 /*!
33  \brief Byteswaps (in-place) an aligned vector of int64_t's.
34  \param intsToSwap The vector of data to byte swap
35  \param numDataPoints The number of data points
36 */
37 static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int num_points){
38  uint32_t* inputPtr = (uint32_t*)intsToSwap;
39  __m128i input, byte1, byte2, byte3, byte4, output;
40  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
41  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
42  uint64_t number = 0;
43  const unsigned int halfPoints = num_points / 2;
44  for(;number < halfPoints; number++){
45  // Load the 32t values, increment inputPtr later since we're doing it in-place.
46  input = _mm_loadu_si128((__m128i*)inputPtr);
47 
48  // Do the four shifts
49  byte1 = _mm_slli_epi32(input, 24);
50  byte2 = _mm_slli_epi32(input, 8);
51  byte3 = _mm_srli_epi32(input, 8);
52  byte4 = _mm_srli_epi32(input, 24);
53  // Or bytes together
54  output = _mm_or_si128(byte1, byte4);
55  byte2 = _mm_and_si128(byte2, byte2mask);
56  output = _mm_or_si128(output, byte2);
57  byte3 = _mm_and_si128(byte3, byte3mask);
58  output = _mm_or_si128(output, byte3);
59 
60  // Reorder the two words
61  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
62 
63  // Store the results
64  _mm_storeu_si128((__m128i*)inputPtr, output);
65  inputPtr += 4;
66  }
67 
68  // Byteswap any remaining points:
69  number = halfPoints*2;
70  for(; number < num_points; number++){
71  uint32_t output1 = *inputPtr;
72  uint32_t output2 = inputPtr[1];
73 
74  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
75 
76  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
77 
78  *inputPtr++ = output2;
79  *inputPtr++ = output1;
80  }
81 }
82 #endif /* LV_HAVE_SSE2 */
83 
84 #ifdef LV_HAVE_GENERIC
85 /*!
86  \brief Byteswaps (in-place) an aligned vector of int64_t's.
87  \param intsToSwap The vector of data to byte swap
88  \param numDataPoints The number of data points
89 */
90 static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned int num_points){
91  uint32_t* inputPtr = (uint32_t*)intsToSwap;
92  unsigned int point;
93  for(point = 0; point < num_points; point++){
94  uint32_t output1 = *inputPtr;
95  uint32_t output2 = inputPtr[1];
96 
97  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
98 
99  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
100 
101  *inputPtr++ = output2;
102  *inputPtr++ = output1;
103  }
104 }
105 #endif /* LV_HAVE_GENERIC */
106 
107 #ifdef LV_HAVE_NEON
108 #include <arm_neon.h>
109 /*!
110  \brief Byteswaps (in-place) a vector of int64_t's.
111  \param intsToSwap The vector of data to byte swap
112  \param numDataPoints The number of data points
113 */
114 static inline void volk_64u_byteswap_neon(uint64_t* intsToSwap, unsigned int num_points){
115  uint32_t* inputPtr = (uint32_t*)intsToSwap;
116  unsigned int number = 0;
117  unsigned int n8points = num_points / 4;
118 
119  uint8x8x4_t input_table;
120  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
121  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
122 
123  /* these magic numbers are used as byte-indeces in the LUT.
124  they are pre-computed to save time. A simple C program
125  can calculate them; for example for lookup01:
126  uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
127  for(ii=0; ii < 8; ++ii) {
128  index += ((uint64_t)(*(chars+ii))) << (ii*8);
129  }
130  */
131  int_lookup01 = vcreate_u8(2269495096316185);
132  int_lookup23 = vcreate_u8(146949840772469531);
133  int_lookup45 = vcreate_u8(291630186448622877);
134  int_lookup67 = vcreate_u8(436310532124776223);
135 
136  for(number = 0; number < n8points; ++number){
137  input_table = vld4_u8((uint8_t*) inputPtr);
138  swapped_int01 = vtbl4_u8(input_table, int_lookup01);
139  swapped_int23 = vtbl4_u8(input_table, int_lookup23);
140  swapped_int45 = vtbl4_u8(input_table, int_lookup45);
141  swapped_int67 = vtbl4_u8(input_table, int_lookup67);
142  vst1_u8((uint8_t*) inputPtr, swapped_int01);
143  vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
144  vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
145  vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
146 
147  inputPtr += 4;
148  }
149 
150  for(number = n8points * 4; number < num_points; ++number){
151  uint32_t output1 = *inputPtr;
152  uint32_t output2 = inputPtr[1];
153 
154  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
155  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
156 
157  *inputPtr++ = output2;
158  *inputPtr++ = output1;
159  }
160 
161 }
162 #endif /* LV_HAVE_NEON */
163 
164 
165 #endif /* INCLUDED_volk_64u_byteswap_u_H */
166 #ifndef INCLUDED_volk_64u_byteswap_a_H
167 #define INCLUDED_volk_64u_byteswap_a_H
168 
169 #include <inttypes.h>
170 #include <stdio.h>
171 
172 #ifdef LV_HAVE_SSE2
173 #include <emmintrin.h>
174 
175 /*!
176  \brief Byteswaps (in-place) an aligned vector of int64_t's.
177  \param intsToSwap The vector of data to byte swap
178  \param numDataPoints The number of data points
179 */
180 static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){
181  uint32_t* inputPtr = (uint32_t*)intsToSwap;
182  __m128i input, byte1, byte2, byte3, byte4, output;
183  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
184  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
185  uint64_t number = 0;
186  const unsigned int halfPoints = num_points / 2;
187  for(;number < halfPoints; number++){
188  // Load the 32t values, increment inputPtr later since we're doing it in-place.
189  input = _mm_load_si128((__m128i*)inputPtr);
190 
191  // Do the four shifts
192  byte1 = _mm_slli_epi32(input, 24);
193  byte2 = _mm_slli_epi32(input, 8);
194  byte3 = _mm_srli_epi32(input, 8);
195  byte4 = _mm_srli_epi32(input, 24);
196  // Or bytes together
197  output = _mm_or_si128(byte1, byte4);
198  byte2 = _mm_and_si128(byte2, byte2mask);
199  output = _mm_or_si128(output, byte2);
200  byte3 = _mm_and_si128(byte3, byte3mask);
201  output = _mm_or_si128(output, byte3);
202 
203  // Reorder the two words
204  output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
205 
206  // Store the results
207  _mm_store_si128((__m128i*)inputPtr, output);
208  inputPtr += 4;
209  }
210 
211  // Byteswap any remaining points:
212  number = halfPoints*2;
213  for(; number < num_points; number++){
214  uint32_t output1 = *inputPtr;
215  uint32_t output2 = inputPtr[1];
216 
217  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
218 
219  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
220 
221  *inputPtr++ = output2;
222  *inputPtr++ = output1;
223  }
224 }
225 #endif /* LV_HAVE_SSE2 */
226 
227 #ifdef LV_HAVE_GENERIC
228 /*!
229  \brief Byteswaps (in-place) an aligned vector of int64_t's.
230  \param intsToSwap The vector of data to byte swap
231  \param numDataPoints The number of data points
232 */
233 static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){
234  uint32_t* inputPtr = (uint32_t*)intsToSwap;
235  unsigned int point;
236  for(point = 0; point < num_points; point++){
237  uint32_t output1 = *inputPtr;
238  uint32_t output2 = inputPtr[1];
239 
240  output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
241 
242  output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
243 
244  *inputPtr++ = output2;
245  *inputPtr++ = output1;
246  }
247 }
248 #endif /* LV_HAVE_GENERIC */
249 
250 
251 
252 
253 #endif /* INCLUDED_volk_64u_byteswap_a_H */
unsigned char uint8_t
Definition: stdint.h:78
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90