GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32fc_s32fc_x2_rotator_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
24 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
25 
26 
27 #include <volk/volk_complex.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #define ROTATOR_RELOAD 512
31 
32 
33 #ifdef LV_HAVE_GENERIC
34 
35 /*!
36  \brief rotate input vector at fixed rate per sample from initial phase offset
37  \param outVector The vector where the results will be stored
38  \param inVector Vector to be rotated
39  \param phase_inc rotational velocity
40  \param phase initial phase offset
41  \param num_points The number of values in inVector to be rotated and stored into cVector
42 */
43 static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
44  unsigned int i = 0;
45  int j = 0;
46  for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
47  for(j = 0; j < ROTATOR_RELOAD; ++j) {
48  *outVector++ = *inVector++ * (*phase);
49  (*phase) *= phase_inc;
50  }
51 #ifdef __cplusplus
52  (*phase) /= std::abs((*phase));
53 #else
54  (*phase) /= cabsf((*phase));
55 #endif
56  }
57  for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
58  *outVector++ = *inVector++ * (*phase);
59  (*phase) *= phase_inc;
60  }
61 
62 }
63 
64 #endif /* LV_HAVE_GENERIC */
65 
66 
67 #ifdef LV_HAVE_SSE4_1
68 #include <smmintrin.h>
69 
70 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
71  lv_32fc_t* cPtr = outVector;
72  const lv_32fc_t* aPtr = inVector;
73  lv_32fc_t incr = 1;
74  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
75 
76  unsigned int i, j = 0;
77 
78  for(i = 0; i < 2; ++i) {
79  phase_Ptr[i] *= incr;
80  incr *= (phase_inc);
81  }
82 
83  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
84  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
85  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
86  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
87 
88  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
89  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
90 
91  const unsigned int halfPoints = num_points / 2;
92 
93 
94  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
95  for(j = 0; j < ROTATOR_RELOAD; ++j) {
96 
97  aVal = _mm_load_ps((float*)aPtr);
98 
99  yl = _mm_moveldup_ps(phase_Val);
100  yh = _mm_movehdup_ps(phase_Val);
101  ylp = _mm_moveldup_ps(inc_Val);
102  yhp = _mm_movehdup_ps(inc_Val);
103 
104  tmp1 = _mm_mul_ps(aVal, yl);
105  tmp1p = _mm_mul_ps(phase_Val, ylp);
106 
107  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
108  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
109  tmp2 = _mm_mul_ps(aVal, yh);
110  tmp2p = _mm_mul_ps(phase_Val, yhp);
111 
112  z = _mm_addsub_ps(tmp1, tmp2);
113  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
114 
115  _mm_store_ps((float*)cPtr, z);
116 
117  aPtr += 2;
118  cPtr += 2;
119  }
120  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
121  tmp2 = _mm_hadd_ps(tmp1, tmp1);
122  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
123  tmp2 = _mm_sqrt_ps(tmp1);
124  phase_Val = _mm_div_ps(phase_Val, tmp2);
125  }
126  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
127  aVal = _mm_load_ps((float*)aPtr);
128 
129  yl = _mm_moveldup_ps(phase_Val);
130  yh = _mm_movehdup_ps(phase_Val);
131  ylp = _mm_moveldup_ps(inc_Val);
132  yhp = _mm_movehdup_ps(inc_Val);
133 
134  tmp1 = _mm_mul_ps(aVal, yl);
135 
136  tmp1p = _mm_mul_ps(phase_Val, ylp);
137 
138  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
139  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
140  tmp2 = _mm_mul_ps(aVal, yh);
141  tmp2p = _mm_mul_ps(phase_Val, yhp);
142 
143  z = _mm_addsub_ps(tmp1, tmp2);
144  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
145 
146  _mm_store_ps((float*)cPtr, z);
147 
148  aPtr += 2;
149  cPtr += 2;
150  }
151 
152  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
153  for(i = 0; i < num_points%2; ++i) {
154  *cPtr++ = *aPtr++ * phase_Ptr[0];
155  phase_Ptr[0] *= (phase_inc);
156  }
157 
158  (*phase) = phase_Ptr[0];
159 
160 }
161 
162 #endif /* LV_HAVE_SSE4_1 for aligned */
163 
164 
165 #ifdef LV_HAVE_SSE4_1
166 #include <smmintrin.h>
167 
168 /*!
169  \brief rotate input vector at fixed rate per sample from initial phase offset
170  \param outVector The vector where the results will be stored
171  \param inVector Vector to be rotated
172  \param phase_inc rotational velocity
173  \param phase initial phase offset
174  \param num_points The number of values in inVector to be rotated and stored into cVector
175 */
176 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
177  lv_32fc_t* cPtr = outVector;
178  const lv_32fc_t* aPtr = inVector;
179  lv_32fc_t incr = 1;
180  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
181 
182  unsigned int i, j = 0;
183 
184  for(i = 0; i < 2; ++i) {
185  phase_Ptr[i] *= incr;
186  incr *= (phase_inc);
187  }
188 
189  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
190  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
191  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
192  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
193 
194  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
195  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
196 
197  const unsigned int halfPoints = num_points / 2;
198 
199 
200  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
201  for(j = 0; j < ROTATOR_RELOAD; ++j) {
202 
203  aVal = _mm_loadu_ps((float*)aPtr);
204 
205  yl = _mm_moveldup_ps(phase_Val);
206  yh = _mm_movehdup_ps(phase_Val);
207  ylp = _mm_moveldup_ps(inc_Val);
208  yhp = _mm_movehdup_ps(inc_Val);
209 
210  tmp1 = _mm_mul_ps(aVal, yl);
211  tmp1p = _mm_mul_ps(phase_Val, ylp);
212 
213  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
214  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
215  tmp2 = _mm_mul_ps(aVal, yh);
216  tmp2p = _mm_mul_ps(phase_Val, yhp);
217 
218  z = _mm_addsub_ps(tmp1, tmp2);
219  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
220 
221  _mm_storeu_ps((float*)cPtr, z);
222 
223  aPtr += 2;
224  cPtr += 2;
225  }
226  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
227  tmp2 = _mm_hadd_ps(tmp1, tmp1);
228  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
229  tmp2 = _mm_sqrt_ps(tmp1);
230  phase_Val = _mm_div_ps(phase_Val, tmp2);
231  }
232  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
233  aVal = _mm_loadu_ps((float*)aPtr);
234 
235  yl = _mm_moveldup_ps(phase_Val);
236  yh = _mm_movehdup_ps(phase_Val);
237  ylp = _mm_moveldup_ps(inc_Val);
238  yhp = _mm_movehdup_ps(inc_Val);
239 
240  tmp1 = _mm_mul_ps(aVal, yl);
241 
242  tmp1p = _mm_mul_ps(phase_Val, ylp);
243 
244  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
245  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
246  tmp2 = _mm_mul_ps(aVal, yh);
247  tmp2p = _mm_mul_ps(phase_Val, yhp);
248 
249  z = _mm_addsub_ps(tmp1, tmp2);
250  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
251 
252  _mm_storeu_ps((float*)cPtr, z);
253 
254  aPtr += 2;
255  cPtr += 2;
256  }
257 
258  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
259  for(i = 0; i < num_points%2; ++i) {
260  *cPtr++ = *aPtr++ * phase_Ptr[0];
261  phase_Ptr[0] *= (phase_inc);
262  }
263 
264  (*phase) = phase_Ptr[0];
265 
266 }
267 
268 #endif /* LV_HAVE_SSE4_1 */
269 
270 
271 #ifdef LV_HAVE_AVX
272 #include <immintrin.h>
273 
274 /*!
275  \brief rotate input vector at fixed rate per sample from initial phase offset
276  \param outVector The vector where the results will be stored
277  \param inVector Vector to be rotated
278  \param phase_inc rotational velocity
279  \param phase initial phase offset
280  \param num_points The number of values in inVector to be rotated and stored into cVector
281 */
282 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
283  lv_32fc_t* cPtr = outVector;
284  const lv_32fc_t* aPtr = inVector;
285  lv_32fc_t incr = 1;
286  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
287 
288  unsigned int i, j = 0;
289 
290  for(i = 0; i < 4; ++i) {
291  phase_Ptr[i] *= incr;
292  incr *= (phase_inc);
293  }
294 
295  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
296  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
297  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
298  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
299  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
300  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
301 
302  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
303  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
304  const unsigned int fourthPoints = num_points / 4;
305 
306 
307  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
308  for(j = 0; j < ROTATOR_RELOAD; ++j) {
309 
310  aVal = _mm256_load_ps((float*)aPtr);
311 
312  yl = _mm256_moveldup_ps(phase_Val);
313  yh = _mm256_movehdup_ps(phase_Val);
314  ylp = _mm256_moveldup_ps(inc_Val);
315  yhp = _mm256_movehdup_ps(inc_Val);
316 
317  tmp1 = _mm256_mul_ps(aVal, yl);
318  tmp1p = _mm256_mul_ps(phase_Val, ylp);
319 
320  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
321  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
322  tmp2 = _mm256_mul_ps(aVal, yh);
323  tmp2p = _mm256_mul_ps(phase_Val, yhp);
324 
325  z = _mm256_addsub_ps(tmp1, tmp2);
326  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
327 
328  _mm256_store_ps((float*)cPtr, z);
329 
330  aPtr += 4;
331  cPtr += 4;
332  }
333  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
334  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
335  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
336  tmp2 = _mm256_sqrt_ps(tmp1);
337  phase_Val = _mm256_div_ps(phase_Val, tmp2);
338  }
339  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
340  aVal = _mm256_load_ps((float*)aPtr);
341 
342  yl = _mm256_moveldup_ps(phase_Val);
343  yh = _mm256_movehdup_ps(phase_Val);
344  ylp = _mm256_moveldup_ps(inc_Val);
345  yhp = _mm256_movehdup_ps(inc_Val);
346 
347  tmp1 = _mm256_mul_ps(aVal, yl);
348 
349  tmp1p = _mm256_mul_ps(phase_Val, ylp);
350 
351  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
352  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
353  tmp2 = _mm256_mul_ps(aVal, yh);
354  tmp2p = _mm256_mul_ps(phase_Val, yhp);
355 
356  z = _mm256_addsub_ps(tmp1, tmp2);
357  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
358 
359  _mm256_store_ps((float*)cPtr, z);
360 
361  aPtr += 4;
362  cPtr += 4;
363  }
364 
365  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
366  for(i = 0; i < num_points%4; ++i) {
367  *cPtr++ = *aPtr++ * phase_Ptr[0];
368  phase_Ptr[0] *= (phase_inc);
369  }
370 
371  (*phase) = phase_Ptr[0];
372 
373 }
374 
375 #endif /* LV_HAVE_AVX for aligned */
376 
377 
378 #ifdef LV_HAVE_AVX
379 #include <immintrin.h>
380 
381 /*!
382  \brief rotate input vector at fixed rate per sample from initial phase offset
383  \param outVector The vector where the results will be stored
384  \param inVector Vector to be rotated
385  \param phase_inc rotational velocity
386  \param phase initial phase offset
387  \param num_points The number of values in inVector to be rotated and stored into cVector
388 */
389 static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
390  lv_32fc_t* cPtr = outVector;
391  const lv_32fc_t* aPtr = inVector;
392  lv_32fc_t incr = 1;
393  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
394 
395  unsigned int i, j = 0;
396 
397  for(i = 0; i < 4; ++i) {
398  phase_Ptr[i] *= incr;
399  incr *= (phase_inc);
400  }
401 
402  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
403  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
404  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
405  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
406  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
407  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
408 
409  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
410  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
411  const unsigned int fourthPoints = num_points / 4;
412 
413 
414  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
415  for(j = 0; j < ROTATOR_RELOAD; ++j) {
416 
417  aVal = _mm256_loadu_ps((float*)aPtr);
418 
419  yl = _mm256_moveldup_ps(phase_Val);
420  yh = _mm256_movehdup_ps(phase_Val);
421  ylp = _mm256_moveldup_ps(inc_Val);
422  yhp = _mm256_movehdup_ps(inc_Val);
423 
424  tmp1 = _mm256_mul_ps(aVal, yl);
425  tmp1p = _mm256_mul_ps(phase_Val, ylp);
426 
427  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
428  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
429  tmp2 = _mm256_mul_ps(aVal, yh);
430  tmp2p = _mm256_mul_ps(phase_Val, yhp);
431 
432  z = _mm256_addsub_ps(tmp1, tmp2);
433  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
434 
435  _mm256_storeu_ps((float*)cPtr, z);
436 
437  aPtr += 4;
438  cPtr += 4;
439  }
440  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
441  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
442  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
443  tmp2 = _mm256_sqrt_ps(tmp1);
444  phase_Val = _mm256_div_ps(phase_Val, tmp2);
445  }
446  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
447  aVal = _mm256_loadu_ps((float*)aPtr);
448 
449  yl = _mm256_moveldup_ps(phase_Val);
450  yh = _mm256_movehdup_ps(phase_Val);
451  ylp = _mm256_moveldup_ps(inc_Val);
452  yhp = _mm256_movehdup_ps(inc_Val);
453 
454  tmp1 = _mm256_mul_ps(aVal, yl);
455 
456  tmp1p = _mm256_mul_ps(phase_Val, ylp);
457 
458  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
459  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
460  tmp2 = _mm256_mul_ps(aVal, yh);
461  tmp2p = _mm256_mul_ps(phase_Val, yhp);
462 
463  z = _mm256_addsub_ps(tmp1, tmp2);
464  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
465 
466  _mm256_storeu_ps((float*)cPtr, z);
467 
468  aPtr += 4;
469  cPtr += 4;
470  }
471 
472  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
473  for(i = 0; i < num_points%4; ++i) {
474  *cPtr++ = *aPtr++ * phase_Ptr[0];
475  phase_Ptr[0] *= (phase_inc);
476  }
477 
478  (*phase) = phase_Ptr[0];
479 
480 }
481 
482 #endif /* LV_HAVE_AVX */
483 
484 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define ROTATOR_RELOAD
Definition: volk_32fc_s32fc_x2_rotator_32fc.h:30
#define lv_cimag(x)
Definition: volk_complex.h:78
uint32_t i[4]
Definition: volk_common.h:80