23 #ifndef INCLUDED_volk_64f_convert_32f_u_H
24 #define INCLUDED_volk_64f_convert_32f_u_H
30 #include <emmintrin.h>
37 static inline void volk_64f_convert_32f_u_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
38 unsigned int number = 0;
40 const unsigned int quarterPoints = num_points / 4;
42 const double* inputVectorPtr = (
const double*)inputVector;
43 float* outputVectorPtr = outputVector;
45 __m128d inputVal1, inputVal2;
47 for(;number < quarterPoints; number++){
48 inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
49 inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
51 ret = _mm_cvtpd_ps(inputVal1);
52 ret2 = _mm_cvtpd_ps(inputVal2);
54 ret = _mm_movelh_ps(ret, ret2);
56 _mm_storeu_ps(outputVectorPtr, ret);
60 number = quarterPoints * 4;
61 for(; number < num_points; number++){
62 outputVector[number] = (float)(inputVector[number]);
68 #ifdef LV_HAVE_GENERIC
75 static inline void volk_64f_convert_32f_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
76 float* outputVectorPtr = outputVector;
77 const double* inputVectorPtr = inputVector;
78 unsigned int number = 0;
80 for(number = 0; number < num_points; number++){
81 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
90 #ifndef INCLUDED_volk_64f_convert_32f_a_H
91 #define INCLUDED_volk_64f_convert_32f_a_H
97 #include <emmintrin.h>
104 static inline void volk_64f_convert_32f_a_sse2(
float* outputVector,
const double* inputVector,
unsigned int num_points){
105 unsigned int number = 0;
107 const unsigned int quarterPoints = num_points / 4;
109 const double* inputVectorPtr = (
const double*)inputVector;
110 float* outputVectorPtr = outputVector;
112 __m128d inputVal1, inputVal2;
114 for(;number < quarterPoints; number++){
115 inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
116 inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
118 ret = _mm_cvtpd_ps(inputVal1);
119 ret2 = _mm_cvtpd_ps(inputVal2);
121 ret = _mm_movelh_ps(ret, ret2);
123 _mm_store_ps(outputVectorPtr, ret);
124 outputVectorPtr += 4;
127 number = quarterPoints * 4;
128 for(; number < num_points; number++){
129 outputVector[number] = (float)(inputVector[number]);
135 #ifdef LV_HAVE_GENERIC
142 static inline void volk_64f_convert_32f_a_generic(
float* outputVector,
const double* inputVector,
unsigned int num_points){
143 float* outputVectorPtr = outputVector;
144 const double* inputVectorPtr = inputVector;
145 unsigned int number = 0;
147 for(number = 0; number < num_points; number++){
148 *outputVectorPtr++ = ((float)(*inputVectorPtr++));