23 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
24 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
31 #include <immintrin.h>
38 static inline void volk_32fc_magnitude_squared_32f_u_avx(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
39 unsigned int number = 0;
40 const unsigned int eighthPoints = num_points / 8;
42 const float* complexVectorPtr = (
float*)complexVector;
43 float* magnitudeVectorPtr = magnitudeVector;
45 __m256 cplxValue1, cplxValue2, complex1, complex2, result;
46 for(;number < eighthPoints; number++){
47 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
48 complexVectorPtr += 8;
50 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
51 complexVectorPtr += 8;
53 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
54 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
56 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
57 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
59 result = _mm256_hadd_ps(complex1, complex2);
61 _mm256_storeu_ps(magnitudeVectorPtr, result);
62 magnitudeVectorPtr += 8;
65 number = eighthPoints * 8;
66 for(; number < num_points; number++){
67 float val1Real = *complexVectorPtr++;
68 float val1Imag = *complexVectorPtr++;
69 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
75 #include <pmmintrin.h>
82 static inline void volk_32fc_magnitude_squared_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
83 unsigned int number = 0;
84 const unsigned int quarterPoints = num_points / 4;
86 const float* complexVectorPtr = (
float*)complexVector;
87 float* magnitudeVectorPtr = magnitudeVector;
89 __m128 cplxValue1, cplxValue2, result;
90 for(;number < quarterPoints; number++){
91 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
92 complexVectorPtr += 4;
94 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
95 complexVectorPtr += 4;
97 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
98 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
100 result = _mm_hadd_ps(cplxValue1, cplxValue2);
102 _mm_storeu_ps(magnitudeVectorPtr, result);
103 magnitudeVectorPtr += 4;
106 number = quarterPoints * 4;
107 for(; number < num_points; number++){
108 float val1Real = *complexVectorPtr++;
109 float val1Imag = *complexVectorPtr++;
110 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
116 #include <xmmintrin.h>
123 static inline void volk_32fc_magnitude_squared_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
124 unsigned int number = 0;
125 const unsigned int quarterPoints = num_points / 4;
127 const float* complexVectorPtr = (
float*)complexVector;
128 float* magnitudeVectorPtr = magnitudeVector;
130 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
131 for(;number < quarterPoints; number++){
132 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
133 complexVectorPtr += 4;
135 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
136 complexVectorPtr += 4;
139 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
141 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
143 iValue = _mm_mul_ps(iValue, iValue);
144 qValue = _mm_mul_ps(qValue, qValue);
146 result = _mm_add_ps(iValue, qValue);
148 _mm_storeu_ps(magnitudeVectorPtr, result);
149 magnitudeVectorPtr += 4;
152 number = quarterPoints * 4;
153 for(; number < num_points; number++){
154 float val1Real = *complexVectorPtr++;
155 float val1Imag = *complexVectorPtr++;
156 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
161 #ifdef LV_HAVE_GENERIC
168 static inline void volk_32fc_magnitude_squared_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
169 const float* complexVectorPtr = (
float*)complexVector;
170 float* magnitudeVectorPtr = magnitudeVector;
171 unsigned int number = 0;
172 for(number = 0; number < num_points; number++){
173 const float real = *complexVectorPtr++;
174 const float imag = *complexVectorPtr++;
175 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
181 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
182 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
189 #include <immintrin.h>
196 static inline void volk_32fc_magnitude_squared_32f_a_avx(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
197 unsigned int number = 0;
198 const unsigned int eighthPoints = num_points / 8;
200 const float* complexVectorPtr = (
float*)complexVector;
201 float* magnitudeVectorPtr = magnitudeVector;
203 __m256 cplxValue1, cplxValue2, complex1, complex2, result;
204 for(;number < eighthPoints; number++){
205 cplxValue1 = _mm256_load_ps(complexVectorPtr);
206 complexVectorPtr += 8;
208 cplxValue2 = _mm256_load_ps(complexVectorPtr);
209 complexVectorPtr += 8;
211 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
212 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
214 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
215 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
217 result = _mm256_hadd_ps(complex1, complex2);
219 _mm256_store_ps(magnitudeVectorPtr, result);
220 magnitudeVectorPtr += 8;
223 number = eighthPoints * 8;
224 for(; number < num_points; number++){
225 float val1Real = *complexVectorPtr++;
226 float val1Imag = *complexVectorPtr++;
227 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
233 #include <pmmintrin.h>
240 static inline void volk_32fc_magnitude_squared_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
241 unsigned int number = 0;
242 const unsigned int quarterPoints = num_points / 4;
244 const float* complexVectorPtr = (
float*)complexVector;
245 float* magnitudeVectorPtr = magnitudeVector;
247 __m128 cplxValue1, cplxValue2, result;
248 for(;number < quarterPoints; number++){
249 cplxValue1 = _mm_load_ps(complexVectorPtr);
250 complexVectorPtr += 4;
252 cplxValue2 = _mm_load_ps(complexVectorPtr);
253 complexVectorPtr += 4;
255 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
256 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
258 result = _mm_hadd_ps(cplxValue1, cplxValue2);
260 _mm_store_ps(magnitudeVectorPtr, result);
261 magnitudeVectorPtr += 4;
264 number = quarterPoints * 4;
265 for(; number < num_points; number++){
266 float val1Real = *complexVectorPtr++;
267 float val1Imag = *complexVectorPtr++;
268 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
274 #include <xmmintrin.h>
281 static inline void volk_32fc_magnitude_squared_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
282 unsigned int number = 0;
283 const unsigned int quarterPoints = num_points / 4;
285 const float* complexVectorPtr = (
float*)complexVector;
286 float* magnitudeVectorPtr = magnitudeVector;
288 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
289 for(;number < quarterPoints; number++){
290 cplxValue1 = _mm_load_ps(complexVectorPtr);
291 complexVectorPtr += 4;
293 cplxValue2 = _mm_load_ps(complexVectorPtr);
294 complexVectorPtr += 4;
297 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
299 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
301 iValue = _mm_mul_ps(iValue, iValue);
302 qValue = _mm_mul_ps(qValue, qValue);
304 result = _mm_add_ps(iValue, qValue);
306 _mm_store_ps(magnitudeVectorPtr, result);
307 magnitudeVectorPtr += 4;
310 number = quarterPoints * 4;
311 for(; number < num_points; number++){
312 float val1Real = *complexVectorPtr++;
313 float val1Imag = *complexVectorPtr++;
314 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
320 #include <arm_neon.h>
329 static inline void volk_32fc_magnitude_squared_32f_neon(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
330 unsigned int number = 0;
331 const unsigned int quarterPoints = num_points / 4;
333 const float* complexVectorPtr = (
float*)complexVector;
334 float* magnitudeVectorPtr = magnitudeVector;
336 float32x4x2_t cmplx_val;
338 for(;number < quarterPoints; number++){
339 cmplx_val = vld2q_f32(complexVectorPtr);
340 complexVectorPtr += 8;
342 cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
343 cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
345 result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
347 vst1q_f32(magnitudeVectorPtr, result);
348 magnitudeVectorPtr += 4;
351 number = quarterPoints * 4;
352 for(; number < num_points; number++){
353 float val1Real = *complexVectorPtr++;
354 float val1Imag = *complexVectorPtr++;
355 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
361 #ifdef LV_HAVE_GENERIC
368 static inline void volk_32fc_magnitude_squared_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points){
369 const float* complexVectorPtr = (
float*)complexVector;
370 float* magnitudeVectorPtr = magnitudeVector;
371 unsigned int number = 0;
372 for(number = 0; number < num_points; number++){
373 const float real = *complexVectorPtr++;
374 const float imag = *complexVectorPtr++;
375 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
float complex lv_32fc_t
Definition: volk_complex.h:56