23 #ifndef INCLUDED_volk_16i_convert_8i_u_H
24 #define INCLUDED_volk_16i_convert_8i_u_H
30 #include <emmintrin.h>
38 static inline void volk_16i_convert_8i_u_sse2(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points){
39 unsigned int number = 0;
40 const unsigned int sixteenthPoints = num_points / 16;
42 int8_t* outputVectorPtr = outputVector;
48 for(;number < sixteenthPoints; number++){
51 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
52 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
54 inputVal1 = _mm_srai_epi16(inputVal1, 8);
55 inputVal2 = _mm_srai_epi16(inputVal2, 8);
57 ret = _mm_packs_epi16(inputVal1, inputVal2);
59 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
61 outputVectorPtr += 16;
64 number = sixteenthPoints * 16;
65 for(; number < num_points; number++){
66 outputVector[number] =(
int8_t)(inputVector[number] >> 8);
71 #ifdef LV_HAVE_GENERIC
79 static inline void volk_16i_convert_8i_generic(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points){
80 int8_t* outputVectorPtr = outputVector;
81 const int16_t* inputVectorPtr = inputVector;
82 unsigned int number = 0;
84 for(number = 0; number < num_points; number++){
85 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
94 #ifndef INCLUDED_volk_16i_convert_8i_a_H
95 #define INCLUDED_volk_16i_convert_8i_a_H
101 #include <emmintrin.h>
108 static inline void volk_16i_convert_8i_a_sse2(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points){
109 unsigned int number = 0;
110 const unsigned int sixteenthPoints = num_points / 16;
112 int8_t* outputVectorPtr = outputVector;
118 for(;number < sixteenthPoints; number++){
121 inputVal1 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
122 inputVal2 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
124 inputVal1 = _mm_srai_epi16(inputVal1, 8);
125 inputVal2 = _mm_srai_epi16(inputVal2, 8);
127 ret = _mm_packs_epi16(inputVal1, inputVal2);
129 _mm_store_si128((__m128i*)outputVectorPtr, ret);
131 outputVectorPtr += 16;
134 number = sixteenthPoints * 16;
135 for(; number < num_points; number++){
136 outputVector[number] =(
int8_t)(inputVector[number] >> 8);
142 #include <arm_neon.h>
149 static inline void volk_16i_convert_8i_neon(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points){
150 int8_t* outputVectorPtr = outputVector;
151 const int16_t* inputVectorPtr = inputVector;
152 unsigned int number = 0;
153 unsigned int sixteenth_points = num_points / 16;
161 for(number = 0; number < sixteenth_points; number++){
163 inputVal0 = vld1q_s16(inputVectorPtr);
164 inputVal1 = vld1q_s16(inputVectorPtr+8);
166 outputVal0 = vshrn_n_s16(inputVal0, 8);
167 outputVal1 = vshrn_n_s16(inputVal1, 8);
169 outputVal = vcombine_s8(outputVal0, outputVal1);
170 vst1q_s8(outputVectorPtr, outputVal);
171 inputVectorPtr += 16;
172 outputVectorPtr += 16;
175 for(number = sixteenth_points * 16; number < num_points; number++){
176 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
181 #ifdef LV_HAVE_GENERIC
188 static inline void volk_16i_convert_8i_a_generic(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points){
189 int8_t* outputVectorPtr = outputVector;
190 const int16_t* inputVectorPtr = inputVector;
191 unsigned int number = 0;
193 for(number = 0; number < num_points; number++){
194 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75