GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_16i_s32f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
24 #define INCLUDED_volk_16i_s32f_convert_32f_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_AVX
30 #include <immintrin.h>
31 
32  /*!
33  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
34  \param inputVector The 16 bit input data buffer
35  \param outputVector The floating point output data buffer
36  \param scalar The value divided against each point in the output buffer
37  \param num_points The number of data values to be converted
38  */
39 static inline void volk_16i_s32f_convert_32f_u_avx(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
40  unsigned int number = 0;
41  const unsigned int eighthPoints = num_points / 8;
42 
43  float* outputVectorPtr = outputVector;
44  __m128 invScalar = _mm_set_ps1(1.0/scalar);
45  int16_t* inputPtr = (int16_t*)inputVector;
46  __m128i inputVal, inputVal2;
47  __m128 ret;
48  __m256 output;
49  __m256 dummy = _mm256_setzero_ps();
50 
51  for(;number < eighthPoints; number++){
52 
53  // Load the 8 values
54  //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
55  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
56 
57  // Shift the input data to the right by 64 bits ( 8 bytes )
58  inputVal2 = _mm_srli_si128(inputVal, 8);
59 
60  // Convert the lower 4 values into 32 bit words
61  inputVal = _mm_cvtepi16_epi32(inputVal);
62  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
63 
64  ret = _mm_cvtepi32_ps(inputVal);
65  ret = _mm_mul_ps(ret, invScalar);
66  output = _mm256_insertf128_ps(dummy, ret, 0);
67 
68  ret = _mm_cvtepi32_ps(inputVal2);
69  ret = _mm_mul_ps(ret, invScalar);
70  output = _mm256_insertf128_ps(output, ret, 1);
71 
72  _mm256_storeu_ps(outputVectorPtr, output);
73 
74  outputVectorPtr += 8;
75 
76  inputPtr += 8;
77  }
78 
79  number = eighthPoints * 8;
80  for(; number < num_points; number++){
81  outputVector[number] =((float)(inputVector[number])) / scalar;
82  }
83 }
84 #endif /* LV_HAVE_AVX */
85 
86 #ifdef LV_HAVE_SSE4_1
87 #include <smmintrin.h>
88 
89  /*!
90  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
91  \param inputVector The 16 bit input data buffer
92  \param outputVector The floating point output data buffer
93  \param scalar The value divided against each point in the output buffer
94  \param num_points The number of data values to be converted
95  \note Output buffer does NOT need to be properly aligned
96  */
97 static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
98  unsigned int number = 0;
99  const unsigned int eighthPoints = num_points / 8;
100 
101  float* outputVectorPtr = outputVector;
102  __m128 invScalar = _mm_set_ps1(1.0/scalar);
103  int16_t* inputPtr = (int16_t*)inputVector;
104  __m128i inputVal;
105  __m128i inputVal2;
106  __m128 ret;
107 
108  for(;number < eighthPoints; number++){
109 
110  // Load the 8 values
111  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
112 
113  // Shift the input data to the right by 64 bits ( 8 bytes )
114  inputVal2 = _mm_srli_si128(inputVal, 8);
115 
116  // Convert the lower 4 values into 32 bit words
117  inputVal = _mm_cvtepi16_epi32(inputVal);
118  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
119 
120  ret = _mm_cvtepi32_ps(inputVal);
121  ret = _mm_mul_ps(ret, invScalar);
122  _mm_storeu_ps(outputVectorPtr, ret);
123  outputVectorPtr += 4;
124 
125  ret = _mm_cvtepi32_ps(inputVal2);
126  ret = _mm_mul_ps(ret, invScalar);
127  _mm_storeu_ps(outputVectorPtr, ret);
128 
129  outputVectorPtr += 4;
130 
131  inputPtr += 8;
132  }
133 
134  number = eighthPoints * 8;
135  for(; number < num_points; number++){
136  outputVector[number] =((float)(inputVector[number])) / scalar;
137  }
138 }
139 #endif /* LV_HAVE_SSE4_1 */
140 
141 #ifdef LV_HAVE_SSE
142 #include <xmmintrin.h>
143 
144  /*!
145  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
146  \param inputVector The 16 bit input data buffer
147  \param outputVector The floating point output data buffer
148  \param scalar The value divided against each point in the output buffer
149  \param num_points The number of data values to be converted
150  \note Output buffer does NOT need to be properly aligned
151  */
152 static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
153  unsigned int number = 0;
154  const unsigned int quarterPoints = num_points / 4;
155 
156  float* outputVectorPtr = outputVector;
157  __m128 invScalar = _mm_set_ps1(1.0/scalar);
158  int16_t* inputPtr = (int16_t*)inputVector;
159  __m128 ret;
160 
161  for(;number < quarterPoints; number++){
162  ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
163 
164  ret = _mm_mul_ps(ret, invScalar);
165  _mm_storeu_ps(outputVectorPtr, ret);
166 
167  inputPtr += 4;
168  outputVectorPtr += 4;
169  }
170 
171  number = quarterPoints * 4;
172  for(; number < num_points; number++){
173  outputVector[number] = (float)(inputVector[number]) / scalar;
174  }
175 }
176 #endif /* LV_HAVE_SSE */
177 
178 #ifdef LV_HAVE_GENERIC
179  /*!
180  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
181  \param inputVector The 16 bit input data buffer
182  \param outputVector The floating point output data buffer
183  \param scalar The value divided against each point in the output buffer
184  \param num_points The number of data values to be converted
185  \note Output buffer does NOT need to be properly aligned
186  */
187 static inline void volk_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
188  float* outputVectorPtr = outputVector;
189  const int16_t* inputVectorPtr = inputVector;
190  unsigned int number = 0;
191 
192  for(number = 0; number < num_points; number++){
193  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
194  }
195 }
196 #endif /* LV_HAVE_GENERIC */
197 
198 #ifdef LV_HAVE_NEON
199  /*!
200  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
201  \param inputVector The 16 bit input data buffer
202  \param outputVector The floating point output data buffer
203  \param scalar The value divided against each point in the output buffer
204  \param num_points The number of data values to be converted
205  \note Output buffer does NOT need to be properly aligned
206  */
207 static inline void volk_16i_s32f_convert_32f_neon(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
208  float* outputPtr = outputVector;
209  const int16_t* inputPtr = inputVector;
210  unsigned int number = 0;
211  unsigned int eighth_points = num_points / 8;
212 
213  int16x4x2_t input16;
214  int32x4_t input32_0, input32_1;
215  float32x4_t input_float_0, input_float_1;
216  float32x4x2_t output_float;
217  float32x4_t inv_scale;
218 
219  inv_scale = vdupq_n_f32(1.0/scalar);
220 
221  // the generic disassembles to a 128-bit load
222  // and duplicates every instruction to operate on 64-bits
223  // at a time. This is only possible with lanes, which is faster
224  // than just doing a vld1_s16, but still slower.
225  for(number = 0; number < eighth_points; number++){
226  input16 = vld2_s16(inputPtr);
227  // widen 16-bit int to 32-bit int
228  input32_0 = vmovl_s16(input16.val[0]);
229  input32_1 = vmovl_s16(input16.val[1]);
230  // convert 32-bit int to float with scale
231  input_float_0 = vcvtq_f32_s32(input32_0);
232  input_float_1 = vcvtq_f32_s32(input32_1);
233  output_float.val[0] = vmulq_f32(input_float_0, inv_scale);
234  output_float.val[1] = vmulq_f32(input_float_1, inv_scale);
235  vst2q_f32(outputPtr, output_float);
236  inputPtr += 8;
237  outputPtr += 8;
238  }
239 
240  for(number = eighth_points*8; number < num_points; number++){
241  *outputPtr++ = ((float)(*inputPtr++)) / scalar;
242  }
243 }
244 #endif /* LV_HAVE_NEON */
245 
246 
247 #endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */
248 #ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
249 #define INCLUDED_volk_16i_s32f_convert_32f_a_H
250 
251 #include <inttypes.h>
252 #include <stdio.h>
253 
254 #ifdef LV_HAVE_AVX
255 #include <immintrin.h>
256 
257  /*!
258  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
259  \param inputVector The 16 bit input data buffer
260  \param outputVector The floating point output data buffer
261  \param scalar The value divided against each point in the output buffer
262  \param num_points The number of data values to be converted
263  */
264 static inline void volk_16i_s32f_convert_32f_a_avx(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
265  unsigned int number = 0;
266  const unsigned int eighthPoints = num_points / 8;
267 
268  float* outputVectorPtr = outputVector;
269  __m128 invScalar = _mm_set_ps1(1.0/scalar);
270  int16_t* inputPtr = (int16_t*)inputVector;
271  __m128i inputVal, inputVal2;
272  __m128 ret;
273  __m256 output;
274  __m256 dummy = _mm256_setzero_ps();
275 
276  for(;number < eighthPoints; number++){
277 
278  // Load the 8 values
279  //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
280  inputVal = _mm_load_si128((__m128i*)inputPtr);
281 
282  // Shift the input data to the right by 64 bits ( 8 bytes )
283  inputVal2 = _mm_srli_si128(inputVal, 8);
284 
285  // Convert the lower 4 values into 32 bit words
286  inputVal = _mm_cvtepi16_epi32(inputVal);
287  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
288 
289  ret = _mm_cvtepi32_ps(inputVal);
290  ret = _mm_mul_ps(ret, invScalar);
291  output = _mm256_insertf128_ps(dummy, ret, 0);
292 
293  ret = _mm_cvtepi32_ps(inputVal2);
294  ret = _mm_mul_ps(ret, invScalar);
295  output = _mm256_insertf128_ps(output, ret, 1);
296 
297  _mm256_store_ps(outputVectorPtr, output);
298 
299  outputVectorPtr += 8;
300 
301  inputPtr += 8;
302  }
303 
304  number = eighthPoints * 8;
305  for(; number < num_points; number++){
306  outputVector[number] =((float)(inputVector[number])) / scalar;
307  }
308 }
309 #endif /* LV_HAVE_AVX */
310 
311 #ifdef LV_HAVE_SSE4_1
312 #include <smmintrin.h>
313 
314  /*!
315  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
316  \param inputVector The 16 bit input data buffer
317  \param outputVector The floating point output data buffer
318  \param scalar The value divided against each point in the output buffer
319  \param num_points The number of data values to be converted
320  */
321 static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
322  unsigned int number = 0;
323  const unsigned int eighthPoints = num_points / 8;
324 
325  float* outputVectorPtr = outputVector;
326  __m128 invScalar = _mm_set_ps1(1.0/scalar);
327  int16_t* inputPtr = (int16_t*)inputVector;
328  __m128i inputVal;
329  __m128i inputVal2;
330  __m128 ret;
331 
332  for(;number < eighthPoints; number++){
333 
334  // Load the 8 values
335  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
336 
337  // Shift the input data to the right by 64 bits ( 8 bytes )
338  inputVal2 = _mm_srli_si128(inputVal, 8);
339 
340  // Convert the lower 4 values into 32 bit words
341  inputVal = _mm_cvtepi16_epi32(inputVal);
342  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
343 
344  ret = _mm_cvtepi32_ps(inputVal);
345  ret = _mm_mul_ps(ret, invScalar);
346  _mm_storeu_ps(outputVectorPtr, ret);
347  outputVectorPtr += 4;
348 
349  ret = _mm_cvtepi32_ps(inputVal2);
350  ret = _mm_mul_ps(ret, invScalar);
351  _mm_storeu_ps(outputVectorPtr, ret);
352 
353  outputVectorPtr += 4;
354 
355  inputPtr += 8;
356  }
357 
358  number = eighthPoints * 8;
359  for(; number < num_points; number++){
360  outputVector[number] =((float)(inputVector[number])) / scalar;
361  }
362 }
363 #endif /* LV_HAVE_SSE4_1 */
364 
365 #ifdef LV_HAVE_SSE
366 #include <xmmintrin.h>
367 
368  /*!
369  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
370  \param inputVector The 16 bit input data buffer
371  \param outputVector The floating point output data buffer
372  \param scalar The value divided against each point in the output buffer
373  \param num_points The number of data values to be converted
374  */
375 static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
376  unsigned int number = 0;
377  const unsigned int quarterPoints = num_points / 4;
378 
379  float* outputVectorPtr = outputVector;
380  __m128 invScalar = _mm_set_ps1(1.0/scalar);
381  int16_t* inputPtr = (int16_t*)inputVector;
382  __m128 ret;
383 
384  for(;number < quarterPoints; number++){
385  ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
386 
387  ret = _mm_mul_ps(ret, invScalar);
388  _mm_storeu_ps(outputVectorPtr, ret);
389 
390  inputPtr += 4;
391  outputVectorPtr += 4;
392  }
393 
394  number = quarterPoints * 4;
395  for(; number < num_points; number++){
396  outputVector[number] = (float)(inputVector[number]) / scalar;
397  }
398 }
399 #endif /* LV_HAVE_SSE */
400 
401 #ifdef LV_HAVE_GENERIC
402  /*!
403  \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
404  \param inputVector The 16 bit input data buffer
405  \param outputVector The floating point output data buffer
406  \param scalar The value divided against each point in the output buffer
407  \param num_points The number of data values to be converted
408  */
409 static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
410  float* outputVectorPtr = outputVector;
411  const int16_t* inputVectorPtr = inputVector;
412  unsigned int number = 0;
413 
414  for(number = 0; number < num_points; number++){
415  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
416  }
417 }
418 #endif /* LV_HAVE_GENERIC */
419 
420 
421 
422 
423 #endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
signed short int16_t
Definition: stdint.h:76