GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32u_byteswap.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32u_byteswap_u_H
24 #define INCLUDED_volk_32u_byteswap_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE2
30 #include <emmintrin.h>
31 
32 /*!
33  \brief Byteswaps (in-place) an aligned vector of int32_t's.
34  \param intsToSwap The vector of data to byte swap
35  \param numDataPoints The number of data points
36 */
37 static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int num_points){
38  unsigned int number = 0;
39 
40  uint32_t* inputPtr = intsToSwap;
41  __m128i input, byte1, byte2, byte3, byte4, output;
42  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
43  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
44 
45  const uint64_t quarterPoints = num_points / 4;
46  for(;number < quarterPoints; number++){
47  // Load the 32t values, increment inputPtr later since we're doing it in-place.
48  input = _mm_loadu_si128((__m128i*)inputPtr);
49  // Do the four shifts
50  byte1 = _mm_slli_epi32(input, 24);
51  byte2 = _mm_slli_epi32(input, 8);
52  byte3 = _mm_srli_epi32(input, 8);
53  byte4 = _mm_srli_epi32(input, 24);
54  // Or bytes together
55  output = _mm_or_si128(byte1, byte4);
56  byte2 = _mm_and_si128(byte2, byte2mask);
57  output = _mm_or_si128(output, byte2);
58  byte3 = _mm_and_si128(byte3, byte3mask);
59  output = _mm_or_si128(output, byte3);
60  // Store the results
61  _mm_storeu_si128((__m128i*)inputPtr, output);
62  inputPtr += 4;
63  }
64 
65  // Byteswap any remaining points:
66  number = quarterPoints*4;
67  for(; number < num_points; number++){
68  uint32_t outputVal = *inputPtr;
69  outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
70  *inputPtr = outputVal;
71  inputPtr++;
72  }
73 }
74 #endif /* LV_HAVE_SSE2 */
75 
76 #ifdef LV_HAVE_NEON
77 #include <arm_neon.h>
78 /*!
79  \brief Byteswaps (in-place) an aligned vector of int32_t's.
80  \param intsToSwap The vector of data to byte swap
81  \param numDataPoints The number of data points
82 */
83 static inline void volk_32u_byteswap_neon(uint32_t* intsToSwap, unsigned int num_points){
84  uint32_t* inputPtr = intsToSwap;
85  unsigned int number = 0;
86  unsigned int n8points = num_points / 8;
87 
88  uint8x8x4_t input_table;
89  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
90  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
91 
92  /* these magic numbers are used as byte-indeces in the LUT.
93  they are pre-computed to save time. A simple C program
94  can calculate them; for example for lookup01:
95  uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
96  for(ii=0; ii < 8; ++ii) {
97  index += ((uint64_t)(*(chars+ii))) << (ii*8);
98  }
99  */
100  int_lookup01 = vcreate_u8(74609667900706840);
101  int_lookup23 = vcreate_u8(219290013576860186);
102  int_lookup45 = vcreate_u8(363970359253013532);
103  int_lookup67 = vcreate_u8(508650704929166878);
104 
105  for(number = 0; number < n8points; ++number){
106  input_table = vld4_u8((uint8_t*) inputPtr);
107  swapped_int01 = vtbl4_u8(input_table, int_lookup01);
108  swapped_int23 = vtbl4_u8(input_table, int_lookup23);
109  swapped_int45 = vtbl4_u8(input_table, int_lookup45);
110  swapped_int67 = vtbl4_u8(input_table, int_lookup67);
111  vst1_u8((uint8_t*) inputPtr, swapped_int01);
112  vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
113  vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
114  vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
115 
116  inputPtr += 8;
117  }
118 
119  for(number = n8points * 8; number < num_points; ++number){
120  uint32_t output = *inputPtr;
121  output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
122 
123  *inputPtr = output;
124  inputPtr++;
125  }
126 }
127 #endif /* LV_HAVE_NEON */
128 
129 #ifdef LV_HAVE_GENERIC
130 /*!
131  \brief Byteswaps (in-place) an aligned vector of int32_t's.
132  \param intsToSwap The vector of data to byte swap
133  \param numDataPoints The number of data points
134 */
135 static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned int num_points){
136  uint32_t* inputPtr = intsToSwap;
137 
138  unsigned int point;
139  for(point = 0; point < num_points; point++){
140  uint32_t output = *inputPtr;
141  output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
142 
143  *inputPtr = output;
144  inputPtr++;
145  }
146 }
147 #endif /* LV_HAVE_GENERIC */
148 
149 
150 #endif /* INCLUDED_volk_32u_byteswap_u_H */
151 #ifndef INCLUDED_volk_32u_byteswap_a_H
152 #define INCLUDED_volk_32u_byteswap_a_H
153 
154 #include <inttypes.h>
155 #include <stdio.h>
156 
157 #ifdef LV_HAVE_SSE2
158 #include <emmintrin.h>
159 
160 /*!
161  \brief Byteswaps (in-place) an aligned vector of int32_t's.
162  \param intsToSwap The vector of data to byte swap
163  \param numDataPoints The number of data points
164 */
165 static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
166  unsigned int number = 0;
167 
168  uint32_t* inputPtr = intsToSwap;
169  __m128i input, byte1, byte2, byte3, byte4, output;
170  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
171  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
172 
173  const uint64_t quarterPoints = num_points / 4;
174  for(;number < quarterPoints; number++){
175  // Load the 32t values, increment inputPtr later since we're doing it in-place.
176  input = _mm_load_si128((__m128i*)inputPtr);
177  // Do the four shifts
178  byte1 = _mm_slli_epi32(input, 24);
179  byte2 = _mm_slli_epi32(input, 8);
180  byte3 = _mm_srli_epi32(input, 8);
181  byte4 = _mm_srli_epi32(input, 24);
182  // Or bytes together
183  output = _mm_or_si128(byte1, byte4);
184  byte2 = _mm_and_si128(byte2, byte2mask);
185  output = _mm_or_si128(output, byte2);
186  byte3 = _mm_and_si128(byte3, byte3mask);
187  output = _mm_or_si128(output, byte3);
188  // Store the results
189  _mm_store_si128((__m128i*)inputPtr, output);
190  inputPtr += 4;
191  }
192 
193  // Byteswap any remaining points:
194  number = quarterPoints*4;
195  for(; number < num_points; number++){
196  uint32_t outputVal = *inputPtr;
197  outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
198  *inputPtr = outputVal;
199  inputPtr++;
200  }
201 }
202 #endif /* LV_HAVE_SSE2 */
203 
204 #ifdef LV_HAVE_GENERIC
205 /*!
206  \brief Byteswaps (in-place) an aligned vector of int32_t's.
207  \param intsToSwap The vector of data to byte swap
208  \param numDataPoints The number of data points
209 */
210 static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
211  uint32_t* inputPtr = intsToSwap;
212 
213  unsigned int point;
214  for(point = 0; point < num_points; point++){
215  uint32_t output = *inputPtr;
216  output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
217 
218  *inputPtr = output;
219  inputPtr++;
220  }
221 }
222 #endif /* LV_HAVE_GENERIC */
223 
224 
225 
226 
227 #endif /* INCLUDED_volk_32u_byteswap_a_H */
unsigned char uint8_t
Definition: stdint.h:78
unsigned int uint32_t
Definition: stdint.h:80
unsigned __int64 uint64_t
Definition: stdint.h:90