23 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
24 #define INCLUDED_volk_8i_s32f_convert_32f_u_H
30 #include <smmintrin.h>
40 static inline void volk_8i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
41 unsigned int number = 0;
42 const unsigned int sixteenthPoints = num_points / 16;
44 float* outputVectorPtr = outputVector;
45 const float iScalar = 1.0 / scalar;
46 __m128 invScalar = _mm_set_ps1( iScalar );
47 const int8_t* inputVectorPtr = inputVector;
52 for(;number < sixteenthPoints; number++){
53 inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
55 interimVal = _mm_cvtepi8_epi32(inputVal);
56 ret = _mm_cvtepi32_ps(interimVal);
57 ret = _mm_mul_ps(ret, invScalar);
58 _mm_storeu_ps(outputVectorPtr, ret);
61 inputVal = _mm_srli_si128(inputVal, 4);
62 interimVal = _mm_cvtepi8_epi32(inputVal);
63 ret = _mm_cvtepi32_ps(interimVal);
64 ret = _mm_mul_ps(ret, invScalar);
65 _mm_storeu_ps(outputVectorPtr, ret);
68 inputVal = _mm_srli_si128(inputVal, 4);
69 interimVal = _mm_cvtepi8_epi32(inputVal);
70 ret = _mm_cvtepi32_ps(interimVal);
71 ret = _mm_mul_ps(ret, invScalar);
72 _mm_storeu_ps(outputVectorPtr, ret);
75 inputVal = _mm_srli_si128(inputVal, 4);
76 interimVal = _mm_cvtepi8_epi32(inputVal);
77 ret = _mm_cvtepi32_ps(interimVal);
78 ret = _mm_mul_ps(ret, invScalar);
79 _mm_storeu_ps(outputVectorPtr, ret);
85 number = sixteenthPoints * 16;
86 for(; number < num_points; number++){
87 outputVector[number] = (float)(inputVector[number]) * iScalar;
92 #ifdef LV_HAVE_GENERIC
101 static inline void volk_8i_s32f_convert_32f_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
102 float* outputVectorPtr = outputVector;
103 const int8_t* inputVectorPtr = inputVector;
104 unsigned int number = 0;
105 const float iScalar = 1.0 / scalar;
107 for(number = 0; number < num_points; number++){
108 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
117 #ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
118 #define INCLUDED_volk_8i_s32f_convert_32f_a_H
123 #ifdef LV_HAVE_SSE4_1
124 #include <smmintrin.h>
133 static inline void volk_8i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
134 unsigned int number = 0;
135 const unsigned int sixteenthPoints = num_points / 16;
137 float* outputVectorPtr = outputVector;
138 const float iScalar = 1.0 / scalar;
139 __m128 invScalar = _mm_set_ps1(iScalar);
140 const int8_t* inputVectorPtr = inputVector;
145 for(;number < sixteenthPoints; number++){
146 inputVal = _mm_load_si128((__m128i*)inputVectorPtr);
148 interimVal = _mm_cvtepi8_epi32(inputVal);
149 ret = _mm_cvtepi32_ps(interimVal);
150 ret = _mm_mul_ps(ret, invScalar);
151 _mm_store_ps(outputVectorPtr, ret);
152 outputVectorPtr += 4;
154 inputVal = _mm_srli_si128(inputVal, 4);
155 interimVal = _mm_cvtepi8_epi32(inputVal);
156 ret = _mm_cvtepi32_ps(interimVal);
157 ret = _mm_mul_ps(ret, invScalar);
158 _mm_store_ps(outputVectorPtr, ret);
159 outputVectorPtr += 4;
161 inputVal = _mm_srli_si128(inputVal, 4);
162 interimVal = _mm_cvtepi8_epi32(inputVal);
163 ret = _mm_cvtepi32_ps(interimVal);
164 ret = _mm_mul_ps(ret, invScalar);
165 _mm_store_ps(outputVectorPtr, ret);
166 outputVectorPtr += 4;
168 inputVal = _mm_srli_si128(inputVal, 4);
169 interimVal = _mm_cvtepi8_epi32(inputVal);
170 ret = _mm_cvtepi32_ps(interimVal);
171 ret = _mm_mul_ps(ret, invScalar);
172 _mm_store_ps(outputVectorPtr, ret);
173 outputVectorPtr += 4;
175 inputVectorPtr += 16;
178 number = sixteenthPoints * 16;
179 for(; number < num_points; number++){
180 outputVector[number] = (float)(inputVector[number]) * iScalar;
185 #ifdef LV_HAVE_GENERIC
193 static inline void volk_8i_s32f_convert_32f_a_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
194 float* outputVectorPtr = outputVector;
195 const int8_t* inputVectorPtr = inputVector;
196 unsigned int number = 0;
197 const float iScalar = 1.0 / scalar;
199 for(number = 0; number < num_points; number++){
200 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
213 extern void volk_8i_s32f_convert_32f_a_orc_impl(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points);
214 static inline void volk_8i_s32f_convert_32f_u_orc(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
215 float invscalar = 1.0 / scalar;
216 volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points);
signed char int8_t
Definition: stdint.h:75