23 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
24 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
30 #include <emmintrin.h>
39 static inline void volk_32f_s32f_convert_32i_u_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
40 unsigned int number = 0;
42 const unsigned int quarterPoints = num_points / 4;
44 const float* inputVectorPtr = (
const float*)inputVector;
45 int32_t* outputVectorPtr = outputVector;
47 float min_val = -2147483647;
48 float max_val = 2147483647;
51 __m128 vScalar = _mm_set_ps1(scalar);
54 __m128 vmin_val = _mm_set_ps1(min_val);
55 __m128 vmax_val = _mm_set_ps1(max_val);
57 for(;number < quarterPoints; number++){
58 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
60 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
61 intInputVal1 = _mm_cvtps_epi32(inputVal1);
63 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
67 number = quarterPoints * 4;
68 for(; number < num_points; number++){
69 r = inputVector[number] * scalar;
74 outputVector[number] = (
int32_t)(r);
80 #include <xmmintrin.h>
89 static inline void volk_32f_s32f_convert_32i_u_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
90 unsigned int number = 0;
92 const unsigned int quarterPoints = num_points / 4;
94 const float* inputVectorPtr = (
const float*)inputVector;
95 int32_t* outputVectorPtr = outputVector;
97 float min_val = -2147483647;
98 float max_val = 2147483647;
101 __m128 vScalar = _mm_set_ps1(scalar);
103 __m128 vmin_val = _mm_set_ps1(min_val);
104 __m128 vmax_val = _mm_set_ps1(max_val);
108 for(;number < quarterPoints; number++){
109 ret = _mm_loadu_ps(inputVectorPtr);
112 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
114 _mm_store_ps(outputFloatBuffer, ret);
115 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
116 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
117 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
118 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
121 number = quarterPoints * 4;
122 for(; number < num_points; number++){
123 r = inputVector[number] * scalar;
128 outputVector[number] = (
int32_t)(r);
133 #ifdef LV_HAVE_GENERIC
142 static inline void volk_32f_s32f_convert_32i_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
143 int32_t* outputVectorPtr = outputVector;
144 const float* inputVectorPtr = inputVector;
145 unsigned int number = 0;
146 float min_val = -2147483647;
147 float max_val = 2147483647;
150 for(number = 0; number < num_points; number++){
151 r = *inputVectorPtr++ * scalar;
156 *outputVectorPtr++ = (
int32_t)(r);
165 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
166 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
173 #include <immintrin.h>
181 static inline void volk_32f_s32f_convert_32i_a_avx(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
182 unsigned int number = 0;
184 const unsigned int eighthPoints = num_points / 8;
186 const float* inputVectorPtr = (
const float*)inputVector;
187 int32_t* outputVectorPtr = outputVector;
189 float min_val = -2147483647;
190 float max_val = 2147483647;
193 __m256 vScalar = _mm256_set1_ps(scalar);
195 __m256i intInputVal1;
196 __m256 vmin_val = _mm256_set1_ps(min_val);
197 __m256 vmax_val = _mm256_set1_ps(max_val);
199 for(;number < eighthPoints; number++){
200 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
202 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
203 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
205 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
206 outputVectorPtr += 8;
209 number = eighthPoints * 8;
210 for(; number < num_points; number++){
211 r = inputVector[number] * scalar;
216 outputVector[number] = (
int32_t)(r);
222 #include <emmintrin.h>
230 static inline void volk_32f_s32f_convert_32i_a_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
231 unsigned int number = 0;
233 const unsigned int quarterPoints = num_points / 4;
235 const float* inputVectorPtr = (
const float*)inputVector;
236 int32_t* outputVectorPtr = outputVector;
238 float min_val = -2147483647;
239 float max_val = 2147483647;
242 __m128 vScalar = _mm_set_ps1(scalar);
244 __m128i intInputVal1;
245 __m128 vmin_val = _mm_set_ps1(min_val);
246 __m128 vmax_val = _mm_set_ps1(max_val);
248 for(;number < quarterPoints; number++){
249 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
251 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
252 intInputVal1 = _mm_cvtps_epi32(inputVal1);
254 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
255 outputVectorPtr += 4;
258 number = quarterPoints * 4;
259 for(; number < num_points; number++){
260 r = inputVector[number] * scalar;
265 outputVector[number] = (
int32_t)(r);
271 #include <xmmintrin.h>
279 static inline void volk_32f_s32f_convert_32i_a_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
280 unsigned int number = 0;
282 const unsigned int quarterPoints = num_points / 4;
284 const float* inputVectorPtr = (
const float*)inputVector;
285 int32_t* outputVectorPtr = outputVector;
287 float min_val = -2147483647;
288 float max_val = 2147483647;
291 __m128 vScalar = _mm_set_ps1(scalar);
293 __m128 vmin_val = _mm_set_ps1(min_val);
294 __m128 vmax_val = _mm_set_ps1(max_val);
298 for(;number < quarterPoints; number++){
299 ret = _mm_load_ps(inputVectorPtr);
302 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
304 _mm_store_ps(outputFloatBuffer, ret);
305 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
306 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
307 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
308 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
311 number = quarterPoints * 4;
312 for(; number < num_points; number++){
313 r = inputVector[number] * scalar;
318 outputVector[number] = (
int32_t)(r);
323 #ifdef LV_HAVE_GENERIC
331 static inline void volk_32f_s32f_convert_32i_a_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
332 int32_t* outputVectorPtr = outputVector;
333 const float* inputVectorPtr = inputVector;
334 unsigned int number = 0;
335 float min_val = -2147483647;
336 float max_val = 2147483647;
339 for(number = 0; number < num_points; number++){
340 r = *inputVectorPtr++ * scalar;
345 *outputVectorPtr++ = (
int32_t)(r);
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
signed int int32_t
Definition: stdint.h:77