23 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
24 #define INCLUDED_volk_32f_binary_slicer_32i_H
27 #ifdef LV_HAVE_GENERIC
34 static inline void volk_32f_binary_slicer_32i_generic(
int* cVector,
const float* aVector,
unsigned int num_points){
36 const float* aPtr = aVector;
37 unsigned int number = 0;
39 for(number = 0; number < num_points; number++){
51 #ifdef LV_HAVE_GENERIC
58 static inline void volk_32f_binary_slicer_32i_generic_branchless(
int* cVector,
const float* aVector,
unsigned int num_points){
60 const float* aPtr = aVector;
61 unsigned int number = 0;
63 for(number = 0; number < num_points; number++){
64 *cPtr++ = (*aPtr++ >= 0);
71 #include <emmintrin.h>
78 static inline void volk_32f_binary_slicer_32i_a_sse2(
int* cVector,
const float* aVector,
unsigned int num_points){
80 const float* aPtr = aVector;
81 unsigned int number = 0;
83 unsigned int quarter_points = num_points / 4;
85 __m128i res_i, binary_i;
87 zero_val = _mm_set1_ps (0.0f);
89 for(number = 0; number < quarter_points; number++){
90 a_val = _mm_load_ps(aPtr);
92 res_f = _mm_cmpge_ps (a_val, zero_val);
93 res_i = _mm_cvtps_epi32 (res_f);
94 binary_i = _mm_srli_epi32 (res_i, 31);
97 _mm_store_si128((__m128i*)cPtr, binary_i);
104 for(number = quarter_points * 4; number < num_points; number++){
117 #include <immintrin.h>
124 static inline void volk_32f_binary_slicer_32i_a_avx(
int* cVector,
const float* aVector,
unsigned int num_points){
126 const float* aPtr = aVector;
127 unsigned int number = 0;
129 unsigned int quarter_points = num_points / 8;
130 __m256 a_val, res_f, binary_f;
132 __m256 zero_val, one_val;
133 zero_val = _mm256_set1_ps (0.0f);
134 one_val = _mm256_set1_ps (1.0f);
136 for(number = 0; number < quarter_points; number++){
137 a_val = _mm256_load_ps(aPtr);
139 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
140 binary_f = _mm256_and_ps (res_f, one_val);
141 binary_i = _mm256_cvtps_epi32(binary_f);
145 _mm256_store_si256((__m256i *)cPtr, binary_i);
152 for(number = quarter_points * 8; number < num_points; number++){
165 #include <emmintrin.h>
172 static inline void volk_32f_binary_slicer_32i_u_sse2(
int* cVector,
const float* aVector,
unsigned int num_points){
174 const float* aPtr = aVector;
175 unsigned int number = 0;
177 unsigned int quarter_points = num_points / 4;
179 __m128i res_i, binary_i;
181 zero_val = _mm_set1_ps (0.0f);
183 for(number = 0; number < quarter_points; number++){
184 a_val = _mm_loadu_ps(aPtr);
186 res_f = _mm_cmpge_ps (a_val, zero_val);
187 res_i = _mm_cvtps_epi32 (res_f);
188 binary_i = _mm_srli_epi32 (res_i, 31);
191 _mm_storeu_si128((__m128i*)cPtr, binary_i);
198 for(number = quarter_points * 4; number < num_points; number++){
211 #include <immintrin.h>
218 static inline void volk_32f_binary_slicer_32i_u_avx(
int* cVector,
const float* aVector,
unsigned int num_points){
220 const float* aPtr = aVector;
221 unsigned int number = 0;
223 unsigned int quarter_points = num_points / 8;
224 __m256 a_val, res_f, binary_f;
226 __m256 zero_val, one_val;
227 zero_val = _mm256_set1_ps (0.0f);
228 one_val = _mm256_set1_ps (1.0f);
230 for(number = 0; number < quarter_points; number++){
231 a_val = _mm256_loadu_ps(aPtr);
233 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
234 binary_f = _mm256_and_ps (res_f, one_val);
235 binary_i = _mm256_cvtps_epi32(binary_f);
239 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
246 for(number = quarter_points * 8; number < num_points; number++){