23 #ifndef INCLUDED_volk_16u_byteswap_u_H
24 #define INCLUDED_volk_16u_byteswap_u_H
30 #include <emmintrin.h>
37 static inline void volk_16u_byteswap_u_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
38 unsigned int number = 0;
40 __m128i input, left, right, output;
42 const unsigned int eighthPoints = num_points / 8;
43 for(;number < eighthPoints; number++){
45 input = _mm_loadu_si128((__m128i*)inputPtr);
47 left = _mm_slli_epi16(input, 8);
48 right = _mm_srli_epi16(input, 8);
50 output = _mm_or_si128(left, right);
52 _mm_storeu_si128((__m128i*)inputPtr, output);
57 number = eighthPoints*8;
58 for(; number < num_points; number++){
60 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
61 *inputPtr = outputVal;
67 #ifdef LV_HAVE_GENERIC
73 static inline void volk_16u_byteswap_generic(
uint16_t* intsToSwap,
unsigned int num_points){
76 for(point = 0; point < num_points; point++){
78 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
86 #ifndef INCLUDED_volk_16u_byteswap_a_H
87 #define INCLUDED_volk_16u_byteswap_a_H
93 #include <emmintrin.h>
100 static inline void volk_16u_byteswap_a_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
101 unsigned int number = 0;
103 __m128i input, left, right, output;
105 const unsigned int eighthPoints = num_points / 8;
106 for(;number < eighthPoints; number++){
108 input = _mm_load_si128((__m128i*)inputPtr);
110 left = _mm_slli_epi16(input, 8);
111 right = _mm_srli_epi16(input, 8);
113 output = _mm_or_si128(left, right);
115 _mm_store_si128((__m128i*)inputPtr, output);
121 number = eighthPoints*8;
122 for(; number < num_points; number++){
124 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
125 *inputPtr = outputVal;
132 #include <arm_neon.h>
138 static inline void volk_16u_byteswap_neon(
uint16_t* intsToSwap,
unsigned int num_points){
140 unsigned int eighth_points = num_points / 8;
141 uint16x8_t input, output;
144 for(number = 0; number < eighth_points; number++) {
145 input = vld1q_u16(inputPtr);
146 output = vsriq_n_u16(output, input, 8);
147 output = vsliq_n_u16(output, input, 8);
148 vst1q_u16(inputPtr, output);
152 for(number = eighth_points * 8; number < num_points; number++){
154 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
162 #include <arm_neon.h>
168 static inline void volk_16u_byteswap_neon_table(
uint16_t* intsToSwap,
unsigned int num_points){
170 unsigned int number = 0;
171 unsigned int n16points = num_points / 16;
173 uint8x8x4_t input_table;
174 uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
175 uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
185 int_lookup01 = vcreate_u8(1232017111498883080);
186 int_lookup23 = vcreate_u8(1376697457175036426);
187 int_lookup45 = vcreate_u8(1521377802851189772);
188 int_lookup67 = vcreate_u8(1666058148527343118);
190 for(number = 0; number < n16points; ++number){
191 input_table = vld4_u8((
uint8_t*) inputPtr);
192 swapped_int01 = vtbl4_u8(input_table, int_lookup01);
193 swapped_int23 = vtbl4_u8(input_table, int_lookup23);
194 swapped_int45 = vtbl4_u8(input_table, int_lookup45);
195 swapped_int67 = vtbl4_u8(input_table, int_lookup67);
196 vst1_u8((
uint8_t*)inputPtr, swapped_int01);
197 vst1_u8((
uint8_t*)(inputPtr+4), swapped_int23);
198 vst1_u8((
uint8_t*)(inputPtr+8), swapped_int45);
199 vst1_u8((
uint8_t*)(inputPtr+12), swapped_int67);
204 for(number = n16points * 16; number < num_points; ++number){
206 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
213 #ifdef LV_HAVE_GENERIC
219 static inline void volk_16u_byteswap_a_generic(
uint16_t* intsToSwap,
unsigned int num_points){
222 for(point = 0; point < num_points; point++){
224 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
237 extern void volk_16u_byteswap_a_orc_impl(
uint16_t* intsToSwap,
unsigned int num_points);
238 static inline void volk_16u_byteswap_u_orc(
uint16_t* intsToSwap,
unsigned int num_points){
239 volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
unsigned short uint16_t
Definition: stdint.h:79
unsigned char uint8_t
Definition: stdint.h:78