23 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
24 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
31 #include <emmintrin.h>
40 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
41 unsigned int number = 0;
42 const float* iBufferPtr = iBuffer;
43 const float* qBufferPtr = qBuffer;
45 __m128 vScalar = _mm_set_ps1(scalar);
47 const unsigned int quarterPoints = num_points / 4;
49 __m128 iValue, qValue, cplxValue1, cplxValue2;
50 __m128i intValue1, intValue2;
54 for(;number < quarterPoints; number++){
55 iValue = _mm_load_ps(iBufferPtr);
56 qValue = _mm_load_ps(qBufferPtr);
59 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
60 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
63 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
64 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
66 intValue1 = _mm_cvtps_epi32(cplxValue1);
67 intValue2 = _mm_cvtps_epi32(cplxValue2);
69 intValue1 = _mm_packs_epi32(intValue1, intValue2);
71 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
72 complexVectorPtr += 8;
78 number = quarterPoints * 4;
79 complexVectorPtr = (
int16_t*)(&complexVector[number]);
80 for(; number < num_points; number++){
81 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
82 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
89 #include <xmmintrin.h>
98 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
99 unsigned int number = 0;
100 const float* iBufferPtr = iBuffer;
101 const float* qBufferPtr = qBuffer;
103 __m128 vScalar = _mm_set_ps1(scalar);
105 const unsigned int quarterPoints = num_points / 4;
107 __m128 iValue, qValue, cplxValue;
113 for(;number < quarterPoints; number++){
114 iValue = _mm_load_ps(iBufferPtr);
115 qValue = _mm_load_ps(qBufferPtr);
118 cplxValue = _mm_unpacklo_ps(iValue, qValue);
119 cplxValue = _mm_mul_ps(cplxValue, vScalar);
121 _mm_store_ps(floatBuffer, cplxValue);
123 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
124 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
125 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
126 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
129 cplxValue = _mm_unpackhi_ps(iValue, qValue);
130 cplxValue = _mm_mul_ps(cplxValue, vScalar);
132 _mm_store_ps(floatBuffer, cplxValue);
134 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
135 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
136 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
137 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
143 number = quarterPoints * 4;
144 complexVectorPtr = (
int16_t*)(&complexVector[number]);
145 for(; number < num_points; number++){
146 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
147 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
153 #ifdef LV_HAVE_GENERIC
162 static inline void volk_32f_x2_s32f_interleave_16ic_generic(
lv_16sc_t* complexVector,
const float* iBuffer,
const float* qBuffer,
const float scalar,
unsigned int num_points){
164 const float* iBufferPtr = iBuffer;
165 const float* qBufferPtr = qBuffer;
166 unsigned int number = 0;
168 for(number = 0; number < num_points; number++){
169 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
170 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27