GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_s32f_convert_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
24 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <math.h>
29 
30 #ifdef LV_HAVE_AVX
31 #include <immintrin.h>
32  /*!
33  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
34  \param inputVector The floating point input data buffer
35  \param outputVector The 16 bit output data buffer
36  \param scalar The value multiplied against each point in the input buffer
37  \param num_points The number of data values to be converted
38  */
39 static inline void volk_32f_s32f_convert_16i_u_avx(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
40  unsigned int number = 0;
41 
42  const unsigned int eighthPoints = num_points / 8;
43 
44  const float* inputVectorPtr = (const float*)inputVector;
45  int16_t* outputVectorPtr = outputVector;
46 
47  float min_val = -32768;
48  float max_val = 32767;
49  float r;
50 
51  __m256 vScalar = _mm256_set1_ps(scalar);
52  __m256 inputVal, ret;
53  __m256i intInputVal;
54  __m128i intInputVal1, intInputVal2;
55  __m256 vmin_val = _mm256_set1_ps(min_val);
56  __m256 vmax_val = _mm256_set1_ps(max_val);
57 
58  for(;number < eighthPoints; number++){
59  inputVal = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
60 
61  // Scale and clip
62  ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
63 
64  intInputVal = _mm256_cvtps_epi32(ret);
65 
66  intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
67  intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
68 
69  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
70 
71  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
72  outputVectorPtr += 8;
73  }
74 
75  number = eighthPoints * 8;
76  for(; number < num_points; number++){
77  r = inputVector[number] * scalar;
78  if(r > max_val)
79  r = max_val;
80  else if(r < min_val)
81  r = min_val;
82  outputVector[number] = (int16_t)rintf(r);
83  }
84 }
85 #endif /* LV_HAVE_AVX */
86 
87 #ifdef LV_HAVE_SSE2
88 #include <emmintrin.h>
89  /*!
90  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
91  \param inputVector The floating point input data buffer
92  \param outputVector The 16 bit output data buffer
93  \param scalar The value multiplied against each point in the input buffer
94  \param num_points The number of data values to be converted
95  \note Input buffer does NOT need to be properly aligned
96  */
97 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
98  unsigned int number = 0;
99 
100  const unsigned int eighthPoints = num_points / 8;
101 
102  const float* inputVectorPtr = (const float*)inputVector;
103  int16_t* outputVectorPtr = outputVector;
104 
105  float min_val = -32768;
106  float max_val = 32767;
107  float r;
108 
109  __m128 vScalar = _mm_set_ps1(scalar);
110  __m128 inputVal1, inputVal2;
111  __m128i intInputVal1, intInputVal2;
112  __m128 ret1, ret2;
113  __m128 vmin_val = _mm_set_ps1(min_val);
114  __m128 vmax_val = _mm_set_ps1(max_val);
115 
116  for(;number < eighthPoints; number++){
117  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
118  inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
119 
120  // Scale and clip
121  ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
122  ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
123 
124  intInputVal1 = _mm_cvtps_epi32(ret1);
125  intInputVal2 = _mm_cvtps_epi32(ret2);
126 
127  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
128 
129  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
130  outputVectorPtr += 8;
131  }
132 
133  number = eighthPoints * 8;
134  for(; number < num_points; number++){
135  r = inputVector[number] * scalar;
136  if(r > max_val)
137  r = max_val;
138  else if(r < min_val)
139  r = min_val;
140  outputVector[number] = (int16_t)rintf(r);
141  }
142 }
143 #endif /* LV_HAVE_SSE2 */
144 
145 #ifdef LV_HAVE_SSE
146 #include <xmmintrin.h>
147  /*!
148  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
149  \param inputVector The floating point input data buffer
150  \param outputVector The 16 bit output data buffer
151  \param scalar The value multiplied against each point in the input buffer
152  \param num_points The number of data values to be converted
153  \note Input buffer does NOT need to be properly aligned
154  */
155 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
156  unsigned int number = 0;
157 
158  const unsigned int quarterPoints = num_points / 4;
159 
160  const float* inputVectorPtr = (const float*)inputVector;
161  int16_t* outputVectorPtr = outputVector;
162 
163  float min_val = -32768;
164  float max_val = 32767;
165  float r;
166 
167  __m128 vScalar = _mm_set_ps1(scalar);
168  __m128 ret;
169  __m128 vmin_val = _mm_set_ps1(min_val);
170  __m128 vmax_val = _mm_set_ps1(max_val);
171 
172  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
173 
174  for(;number < quarterPoints; number++){
175  ret = _mm_loadu_ps(inputVectorPtr);
176  inputVectorPtr += 4;
177 
178  // Scale and clip
179  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
180 
181  _mm_store_ps(outputFloatBuffer, ret);
182  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
183  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
184  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
185  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
186  }
187 
188  number = quarterPoints * 4;
189  for(; number < num_points; number++){
190  r = inputVector[number] * scalar;
191  if(r > max_val)
192  r = max_val;
193  else if(r < min_val)
194  r = min_val;
195  outputVector[number] = (int16_t)rintf(r);
196  }
197 }
198 #endif /* LV_HAVE_SSE */
199 
200 #ifdef LV_HAVE_GENERIC
201  /*!
202  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
203  \param inputVector The floating point input data buffer
204  \param outputVector The 16 bit output data buffer
205  \param scalar The value multiplied against each point in the input buffer
206  \param num_points The number of data values to be converted
207  \note Input buffer does NOT need to be properly aligned
208  */
209 static inline void volk_32f_s32f_convert_16i_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
210  int16_t* outputVectorPtr = outputVector;
211  const float* inputVectorPtr = inputVector;
212  unsigned int number = 0;
213  float min_val = -32768;
214  float max_val = 32767;
215  float r;
216 
217  for(number = 0; number < num_points; number++){
218  r = *inputVectorPtr++ * scalar;
219  if(r > max_val)
220  r = max_val;
221  else if(r < min_val)
222  r = min_val;
223  *outputVectorPtr++ = (int16_t)rintf(r);
224  }
225 }
226 #endif /* LV_HAVE_GENERIC */
227 
228 
229 
230 
231 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */
232 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
233 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
234 
235 #include <volk/volk_common.h>
236 #include <inttypes.h>
237 #include <stdio.h>
238 #include <math.h>
239 
240 #ifdef LV_HAVE_AVX
241 #include <immintrin.h>
242  /*!
243  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
244  \param inputVector The floating point input data buffer
245  \param outputVector The 16 bit output data buffer
246  \param scalar The value multiplied against each point in the input buffer
247  \param num_points The number of data values to be converted
248  */
249 static inline void volk_32f_s32f_convert_16i_a_avx(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
250  unsigned int number = 0;
251 
252  const unsigned int eighthPoints = num_points / 8;
253 
254  const float* inputVectorPtr = (const float*)inputVector;
255  int16_t* outputVectorPtr = outputVector;
256 
257  float min_val = -32768;
258  float max_val = 32767;
259  float r;
260 
261  __m256 vScalar = _mm256_set1_ps(scalar);
262  __m256 inputVal, ret;
263  __m256i intInputVal;
264  __m128i intInputVal1, intInputVal2;
265  __m256 vmin_val = _mm256_set1_ps(min_val);
266  __m256 vmax_val = _mm256_set1_ps(max_val);
267 
268  for(;number < eighthPoints; number++){
269  inputVal = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
270 
271  // Scale and clip
272  ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
273 
274  intInputVal = _mm256_cvtps_epi32(ret);
275 
276  intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
277  intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
278 
279  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
280 
281  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
282  outputVectorPtr += 8;
283  }
284 
285  number = eighthPoints * 8;
286  for(; number < num_points; number++){
287  r = inputVector[number] * scalar;
288  if(r > max_val)
289  r = max_val;
290  else if(r < min_val)
291  r = min_val;
292  outputVector[number] = (int16_t)rintf(r);
293  }
294 }
295 #endif /* LV_HAVE_AVX */
296 
297 #ifdef LV_HAVE_SSE2
298 #include <emmintrin.h>
299  /*!
300  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
301  \param inputVector The floating point input data buffer
302  \param outputVector The 16 bit output data buffer
303  \param scalar The value multiplied against each point in the input buffer
304  \param num_points The number of data values to be converted
305  */
306 static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
307  unsigned int number = 0;
308 
309  const unsigned int eighthPoints = num_points / 8;
310 
311  const float* inputVectorPtr = (const float*)inputVector;
312  int16_t* outputVectorPtr = outputVector;
313 
314  float min_val = -32768;
315  float max_val = 32767;
316  float r;
317 
318  __m128 vScalar = _mm_set_ps1(scalar);
319  __m128 inputVal1, inputVal2;
320  __m128i intInputVal1, intInputVal2;
321  __m128 ret1, ret2;
322  __m128 vmin_val = _mm_set_ps1(min_val);
323  __m128 vmax_val = _mm_set_ps1(max_val);
324 
325  for(;number < eighthPoints; number++){
326  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
327  inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
328 
329  // Scale and clip
330  ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
331  ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
332 
333  intInputVal1 = _mm_cvtps_epi32(ret1);
334  intInputVal2 = _mm_cvtps_epi32(ret2);
335 
336  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
337 
338  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
339  outputVectorPtr += 8;
340  }
341 
342  number = eighthPoints * 8;
343  for(; number < num_points; number++){
344  r = inputVector[number] * scalar;
345  if(r > max_val)
346  r = max_val;
347  else if(r < min_val)
348  r = min_val;
349  outputVector[number] = (int16_t)rintf(r);
350  }
351 }
352 #endif /* LV_HAVE_SSE2 */
353 
354 #ifdef LV_HAVE_SSE
355 #include <xmmintrin.h>
356  /*!
357  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
358  \param inputVector The floating point input data buffer
359  \param outputVector The 16 bit output data buffer
360  \param scalar The value multiplied against each point in the input buffer
361  \param num_points The number of data values to be converted
362  */
363 static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
364  unsigned int number = 0;
365 
366  const unsigned int quarterPoints = num_points / 4;
367 
368  const float* inputVectorPtr = (const float*)inputVector;
369  int16_t* outputVectorPtr = outputVector;
370 
371  float min_val = -32768;
372  float max_val = 32767;
373  float r;
374 
375  __m128 vScalar = _mm_set_ps1(scalar);
376  __m128 ret;
377  __m128 vmin_val = _mm_set_ps1(min_val);
378  __m128 vmax_val = _mm_set_ps1(max_val);
379 
380  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
381 
382  for(;number < quarterPoints; number++){
383  ret = _mm_load_ps(inputVectorPtr);
384  inputVectorPtr += 4;
385 
386  // Scale and clip
387  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
388 
389  _mm_store_ps(outputFloatBuffer, ret);
390  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
391  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
392  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
393  *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
394  }
395 
396  number = quarterPoints * 4;
397  for(; number < num_points; number++){
398  r = inputVector[number] * scalar;
399  if(r > max_val)
400  r = max_val;
401  else if(r < min_val)
402  r = min_val;
403  outputVector[number] = (int16_t)rintf(r);
404  }
405 }
406 #endif /* LV_HAVE_SSE */
407 
408 #ifdef LV_HAVE_GENERIC
409  /*!
410  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
411  \param inputVector The floating point input data buffer
412  \param outputVector The 16 bit output data buffer
413  \param scalar The value multiplied against each point in the input buffer
414  \param num_points The number of data values to be converted
415  */
416 static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
417  int16_t* outputVectorPtr = outputVector;
418  const float* inputVectorPtr = inputVector;
419  unsigned int number = 0;
420  float min_val = -32768;
421  float max_val = 32767;
422  float r;
423 
424  for(number = 0; number < num_points; number++){
425  r = *inputVectorPtr++ * scalar;
426  if(r < min_val)
427  r = min_val;
428  else if(r > max_val)
429  r = max_val;
430  *outputVectorPtr++ = (int16_t)rintf(r);
431  }
432 }
433 #endif /* LV_HAVE_GENERIC */
434 
435 
436 
437 
438 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
static float rintf(float x)
Definition: volk/cmake/msvc/config.h:30