23 #ifndef INCLUDED_volk_32f_convert_64f_u_H
24 #define INCLUDED_volk_32f_convert_64f_u_H
30 #include <immintrin.h>
38 static inline void volk_32f_convert_64f_u_avx(
double* outputVector,
const float* inputVector,
unsigned int num_points){
39 unsigned int number = 0;
41 const unsigned int quarterPoints = num_points / 4;
43 const float* inputVectorPtr = (
const float*)inputVector;
44 double* outputVectorPtr = outputVector;
48 for(;number < quarterPoints; number++){
49 inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
51 ret = _mm256_cvtps_pd(inputVal);
52 _mm256_storeu_pd(outputVectorPtr, ret);
57 number = quarterPoints * 4;
58 for(; number < num_points; number++){
59 outputVector[number] = (double)(inputVector[number]);
66 #include <emmintrin.h>
73 static inline void volk_32f_convert_64f_u_sse2(
double* outputVector,
const float* inputVector,
unsigned int num_points){
74 unsigned int number = 0;
76 const unsigned int quarterPoints = num_points / 4;
78 const float* inputVectorPtr = (
const float*)inputVector;
79 double* outputVectorPtr = outputVector;
83 for(;number < quarterPoints; number++){
84 inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
86 ret = _mm_cvtps_pd(inputVal);
88 _mm_storeu_pd(outputVectorPtr, ret);
91 inputVal = _mm_movehl_ps(inputVal, inputVal);
93 ret = _mm_cvtps_pd(inputVal);
95 _mm_storeu_pd(outputVectorPtr, ret);
99 number = quarterPoints * 4;
100 for(; number < num_points; number++){
101 outputVector[number] = (double)(inputVector[number]);
107 #ifdef LV_HAVE_GENERIC
114 static inline void volk_32f_convert_64f_generic(
double* outputVector,
const float* inputVector,
unsigned int num_points){
115 double* outputVectorPtr = outputVector;
116 const float* inputVectorPtr = inputVector;
117 unsigned int number = 0;
119 for(number = 0; number < num_points; number++){
120 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
131 #ifndef INCLUDED_volk_32f_convert_64f_a_H
132 #define INCLUDED_volk_32f_convert_64f_a_H
138 #include <immintrin.h>
145 static inline void volk_32f_convert_64f_a_avx(
double* outputVector,
const float* inputVector,
unsigned int num_points){
146 unsigned int number = 0;
148 const unsigned int quarterPoints = num_points / 4;
150 const float* inputVectorPtr = (
const float*)inputVector;
151 double* outputVectorPtr = outputVector;
155 for(;number < quarterPoints; number++){
156 inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
158 ret = _mm256_cvtps_pd(inputVal);
159 _mm256_store_pd(outputVectorPtr, ret);
161 outputVectorPtr += 4;
164 number = quarterPoints * 4;
165 for(; number < num_points; number++){
166 outputVector[number] = (double)(inputVector[number]);
172 #include <emmintrin.h>
179 static inline void volk_32f_convert_64f_a_sse2(
double* outputVector,
const float* inputVector,
unsigned int num_points){
180 unsigned int number = 0;
182 const unsigned int quarterPoints = num_points / 4;
184 const float* inputVectorPtr = (
const float*)inputVector;
185 double* outputVectorPtr = outputVector;
189 for(;number < quarterPoints; number++){
190 inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
192 ret = _mm_cvtps_pd(inputVal);
194 _mm_store_pd(outputVectorPtr, ret);
195 outputVectorPtr += 2;
197 inputVal = _mm_movehl_ps(inputVal, inputVal);
199 ret = _mm_cvtps_pd(inputVal);
201 _mm_store_pd(outputVectorPtr, ret);
202 outputVectorPtr += 2;
205 number = quarterPoints * 4;
206 for(; number < num_points; number++){
207 outputVector[number] = (double)(inputVector[number]);
213 #ifdef LV_HAVE_GENERIC
220 static inline void volk_32f_convert_64f_a_generic(
double* outputVector,
const float* inputVector,
unsigned int num_points){
221 double* outputVectorPtr = outputVector;
222 const float* inputVectorPtr = inputVector;
223 unsigned int number = 0;
225 for(number = 0; number < num_points; number++){
226 *outputVectorPtr++ = ((double)(*inputVectorPtr++));