23 #ifndef INCLUDED_volk_8i_convert_16i_u_H
24 #define INCLUDED_volk_8i_convert_16i_u_H
30 #include <smmintrin.h>
39 static inline void volk_8i_convert_16i_u_sse4_1(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
40 unsigned int number = 0;
41 const unsigned int sixteenthPoints = num_points / 16;
43 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
44 __m128i* outputVectorPtr = (__m128i*)outputVector;
48 for(;number < sixteenthPoints; number++){
49 inputVal = _mm_loadu_si128(inputVectorPtr);
50 ret = _mm_cvtepi8_epi16(inputVal);
51 ret = _mm_slli_epi16(ret, 8);
52 _mm_storeu_si128(outputVectorPtr, ret);
56 inputVal = _mm_srli_si128(inputVal, 8);
57 ret = _mm_cvtepi8_epi16(inputVal);
58 ret = _mm_slli_epi16(ret, 8);
59 _mm_storeu_si128(outputVectorPtr, ret);
66 number = sixteenthPoints * 16;
67 for(; number < num_points; number++){
68 outputVector[number] = (
int16_t)(inputVector[number])*256;
73 #ifdef LV_HAVE_GENERIC
81 static inline void volk_8i_convert_16i_generic(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
82 int16_t* outputVectorPtr = outputVector;
83 const int8_t* inputVectorPtr = inputVector;
84 unsigned int number = 0;
86 for(number = 0; number < num_points; number++){
87 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
96 #ifndef INCLUDED_volk_8i_convert_16i_a_H
97 #define INCLUDED_volk_8i_convert_16i_a_H
102 #ifdef LV_HAVE_SSE4_1
103 #include <smmintrin.h>
111 static inline void volk_8i_convert_16i_a_sse4_1(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
112 unsigned int number = 0;
113 const unsigned int sixteenthPoints = num_points / 16;
115 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
116 __m128i* outputVectorPtr = (__m128i*)outputVector;
120 for(;number < sixteenthPoints; number++){
121 inputVal = _mm_load_si128(inputVectorPtr);
122 ret = _mm_cvtepi8_epi16(inputVal);
123 ret = _mm_slli_epi16(ret, 8);
124 _mm_store_si128(outputVectorPtr, ret);
128 inputVal = _mm_srli_si128(inputVal, 8);
129 ret = _mm_cvtepi8_epi16(inputVal);
130 ret = _mm_slli_epi16(ret, 8);
131 _mm_store_si128(outputVectorPtr, ret);
138 number = sixteenthPoints * 16;
139 for(; number < num_points; number++){
140 outputVector[number] = (
int16_t)(inputVector[number])*256;
145 #ifdef LV_HAVE_GENERIC
152 static inline void volk_8i_convert_16i_a_generic(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
153 int16_t* outputVectorPtr = outputVector;
154 const int8_t* inputVectorPtr = inputVector;
155 unsigned int number = 0;
157 for(number = 0; number < num_points; number++){
158 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
164 #include <arm_neon.h>
173 static inline void volk_8i_convert_16i_neon(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
174 int16_t* outputVectorPtr = outputVector;
175 const int8_t* inputVectorPtr = inputVector;
177 const unsigned int eighth_points = num_points / 8;
180 int16x8_t converted_vec;
185 for(number = 0; number < eighth_points; ++number) {
186 input_vec = vld1_s8(inputVectorPtr);
187 converted_vec = vmovl_s8(input_vec);
189 converted_vec = vshlq_n_s16(converted_vec, 8);
190 vst1q_s16( outputVectorPtr, converted_vec);
193 outputVectorPtr += 8;
196 for(number = eighth_points * 8; number < num_points; number++){
197 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
209 extern void volk_8i_convert_16i_a_orc_impl(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points);
210 static inline void volk_8i_convert_16i_u_orc(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points){
211 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75