23 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
24 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
31 #include <immintrin.h>
39 static inline void volk_32f_s32f_convert_16i_u_avx(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
40 unsigned int number = 0;
42 const unsigned int eighthPoints = num_points / 8;
44 const float* inputVectorPtr = (
const float*)inputVector;
45 int16_t* outputVectorPtr = outputVector;
47 float min_val = -32768;
48 float max_val = 32767;
51 __m256 vScalar = _mm256_set1_ps(scalar);
54 __m128i intInputVal1, intInputVal2;
55 __m256 vmin_val = _mm256_set1_ps(min_val);
56 __m256 vmax_val = _mm256_set1_ps(max_val);
58 for(;number < eighthPoints; number++){
59 inputVal = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
62 ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
64 intInputVal = _mm256_cvtps_epi32(ret);
66 intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
67 intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
69 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
71 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
75 number = eighthPoints * 8;
76 for(; number < num_points; number++){
77 r = inputVector[number] * scalar;
88 #include <emmintrin.h>
97 static inline void volk_32f_s32f_convert_16i_u_sse2(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
98 unsigned int number = 0;
100 const unsigned int eighthPoints = num_points / 8;
102 const float* inputVectorPtr = (
const float*)inputVector;
103 int16_t* outputVectorPtr = outputVector;
105 float min_val = -32768;
106 float max_val = 32767;
109 __m128 vScalar = _mm_set_ps1(scalar);
110 __m128 inputVal1, inputVal2;
111 __m128i intInputVal1, intInputVal2;
113 __m128 vmin_val = _mm_set_ps1(min_val);
114 __m128 vmax_val = _mm_set_ps1(max_val);
116 for(;number < eighthPoints; number++){
117 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
118 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
121 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
122 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
124 intInputVal1 = _mm_cvtps_epi32(ret1);
125 intInputVal2 = _mm_cvtps_epi32(ret2);
127 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
129 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
130 outputVectorPtr += 8;
133 number = eighthPoints * 8;
134 for(; number < num_points; number++){
135 r = inputVector[number] * scalar;
146 #include <xmmintrin.h>
155 static inline void volk_32f_s32f_convert_16i_u_sse(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
156 unsigned int number = 0;
158 const unsigned int quarterPoints = num_points / 4;
160 const float* inputVectorPtr = (
const float*)inputVector;
161 int16_t* outputVectorPtr = outputVector;
163 float min_val = -32768;
164 float max_val = 32767;
167 __m128 vScalar = _mm_set_ps1(scalar);
169 __m128 vmin_val = _mm_set_ps1(min_val);
170 __m128 vmax_val = _mm_set_ps1(max_val);
174 for(;number < quarterPoints; number++){
175 ret = _mm_loadu_ps(inputVectorPtr);
179 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
181 _mm_store_ps(outputFloatBuffer, ret);
182 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
183 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
184 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
185 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
188 number = quarterPoints * 4;
189 for(; number < num_points; number++){
190 r = inputVector[number] * scalar;
200 #ifdef LV_HAVE_GENERIC
209 static inline void volk_32f_s32f_convert_16i_generic(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
210 int16_t* outputVectorPtr = outputVector;
211 const float* inputVectorPtr = inputVector;
212 unsigned int number = 0;
213 float min_val = -32768;
214 float max_val = 32767;
217 for(number = 0; number < num_points; number++){
218 r = *inputVectorPtr++ * scalar;
232 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
233 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
241 #include <immintrin.h>
249 static inline void volk_32f_s32f_convert_16i_a_avx(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
250 unsigned int number = 0;
252 const unsigned int eighthPoints = num_points / 8;
254 const float* inputVectorPtr = (
const float*)inputVector;
255 int16_t* outputVectorPtr = outputVector;
257 float min_val = -32768;
258 float max_val = 32767;
261 __m256 vScalar = _mm256_set1_ps(scalar);
262 __m256 inputVal, ret;
264 __m128i intInputVal1, intInputVal2;
265 __m256 vmin_val = _mm256_set1_ps(min_val);
266 __m256 vmax_val = _mm256_set1_ps(max_val);
268 for(;number < eighthPoints; number++){
269 inputVal = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
272 ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
274 intInputVal = _mm256_cvtps_epi32(ret);
276 intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
277 intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
279 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
281 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
282 outputVectorPtr += 8;
285 number = eighthPoints * 8;
286 for(; number < num_points; number++){
287 r = inputVector[number] * scalar;
298 #include <emmintrin.h>
306 static inline void volk_32f_s32f_convert_16i_a_sse2(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
307 unsigned int number = 0;
309 const unsigned int eighthPoints = num_points / 8;
311 const float* inputVectorPtr = (
const float*)inputVector;
312 int16_t* outputVectorPtr = outputVector;
314 float min_val = -32768;
315 float max_val = 32767;
318 __m128 vScalar = _mm_set_ps1(scalar);
319 __m128 inputVal1, inputVal2;
320 __m128i intInputVal1, intInputVal2;
322 __m128 vmin_val = _mm_set_ps1(min_val);
323 __m128 vmax_val = _mm_set_ps1(max_val);
325 for(;number < eighthPoints; number++){
326 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
327 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
330 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
331 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
333 intInputVal1 = _mm_cvtps_epi32(ret1);
334 intInputVal2 = _mm_cvtps_epi32(ret2);
336 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
338 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
339 outputVectorPtr += 8;
342 number = eighthPoints * 8;
343 for(; number < num_points; number++){
344 r = inputVector[number] * scalar;
355 #include <xmmintrin.h>
363 static inline void volk_32f_s32f_convert_16i_a_sse(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
364 unsigned int number = 0;
366 const unsigned int quarterPoints = num_points / 4;
368 const float* inputVectorPtr = (
const float*)inputVector;
369 int16_t* outputVectorPtr = outputVector;
371 float min_val = -32768;
372 float max_val = 32767;
375 __m128 vScalar = _mm_set_ps1(scalar);
377 __m128 vmin_val = _mm_set_ps1(min_val);
378 __m128 vmax_val = _mm_set_ps1(max_val);
382 for(;number < quarterPoints; number++){
383 ret = _mm_load_ps(inputVectorPtr);
387 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
389 _mm_store_ps(outputFloatBuffer, ret);
390 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
391 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
392 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
393 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
396 number = quarterPoints * 4;
397 for(; number < num_points; number++){
398 r = inputVector[number] * scalar;
408 #ifdef LV_HAVE_GENERIC
416 static inline void volk_32f_s32f_convert_16i_a_generic(
int16_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
417 int16_t* outputVectorPtr = outputVector;
418 const float* inputVectorPtr = inputVector;
419 unsigned int number = 0;
420 float min_val = -32768;
421 float max_val = 32767;
424 for(number = 0; number < num_points; number++){
425 r = *inputVectorPtr++ * scalar;
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
static float rintf(float x)
Definition: volk/cmake/msvc/config.h:30