GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32fc_s32fc_multiply_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
24 #define INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <volk/volk_complex.h>
29 #include <float.h>
30 
31 #ifdef LV_HAVE_AVX
32 #include <immintrin.h>
33  /*!
34  \brief Multiplies the two input complex vectors and stores their results in the third vector
35  \param cVector The vector where the results will be stored
36  \param aVector One of the vectors to be multiplied
37  \param bVector One of the vectors to be multiplied
38  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
39  */
40 static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
41  unsigned int number = 0;
42  unsigned int i = 0;
43  const unsigned int quarterPoints = num_points / 4;
44  unsigned int isodd = num_points & 3;
45  __m256 x, yl, yh, z, tmp1, tmp2;
46  lv_32fc_t* c = cVector;
47  const lv_32fc_t* a = aVector;
48 
49  // Set up constant scalar vector
50  yl = _mm256_set1_ps(lv_creal(scalar));
51  yh = _mm256_set1_ps(lv_cimag(scalar));
52 
53  for(;number < quarterPoints; number++){
54  x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
55 
56  tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
57 
58  x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
59 
60  tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
61 
62  z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
63 
64  _mm256_storeu_ps((float*)c,z); // Store the results back into the C container
65 
66  a += 4;
67  c += 4;
68  }
69 
70  for(i = num_points-isodd; i < num_points; i++) {
71  *c++ = (*a++) * scalar;
72  }
73 
74 }
75 #endif /* LV_HAVE_AVX */
76 
77 #ifdef LV_HAVE_SSE3
78 #include <pmmintrin.h>
79 /*!
80  \brief Multiplies the input vector by a scalar and stores the results in the third vector
81  \param cVector The vector where the results will be stored
82  \param aVector The vector to be multiplied
83  \param scalar The complex scalar to multiply aVector
84  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
85 */
86 static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
87  unsigned int number = 0;
88  const unsigned int halfPoints = num_points / 2;
89 
90  __m128 x, yl, yh, z, tmp1, tmp2;
91  lv_32fc_t* c = cVector;
92  const lv_32fc_t* a = aVector;
93 
94  // Set up constant scalar vector
95  yl = _mm_set_ps1(lv_creal(scalar));
96  yh = _mm_set_ps1(lv_cimag(scalar));
97 
98  for(;number < halfPoints; number++){
99 
100  x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
101 
102  tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
103 
104  x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
105 
106  tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
107 
108  z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
109 
110  _mm_storeu_ps((float*)c,z); // Store the results back into the C container
111 
112  a += 2;
113  c += 2;
114  }
115 
116  if((num_points % 2) != 0) {
117  *c = (*a) * scalar;
118  }
119 }
120 #endif /* LV_HAVE_SSE */
121 
122 #ifdef LV_HAVE_GENERIC
123 /*!
124  \brief Multiplies the input vector by a scalar and stores the results in the third vector
125  \param cVector The vector where the results will be stored
126  \param aVector The vector to be multiplied
127  \param scalar The complex scalar to multiply aVector
128  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
129 */
130 static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
131  lv_32fc_t* cPtr = cVector;
132  const lv_32fc_t* aPtr = aVector;
133  unsigned int number = num_points;
134 
135  // unwrap loop
136  while (number >= 8){
137  *cPtr++ = (*aPtr++) * scalar;
138  *cPtr++ = (*aPtr++) * scalar;
139  *cPtr++ = (*aPtr++) * scalar;
140  *cPtr++ = (*aPtr++) * scalar;
141  *cPtr++ = (*aPtr++) * scalar;
142  *cPtr++ = (*aPtr++) * scalar;
143  *cPtr++ = (*aPtr++) * scalar;
144  *cPtr++ = (*aPtr++) * scalar;
145  number -= 8;
146  }
147 
148  // clean up any remaining
149  while (number-- > 0)
150  *cPtr++ = *aPtr++ * scalar;
151 }
152 #endif /* LV_HAVE_GENERIC */
153 
154 
155 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
156 #ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
157 #define INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
158 
159 #include <inttypes.h>
160 #include <stdio.h>
161 #include <volk/volk_complex.h>
162 #include <float.h>
163 
164 #ifdef LV_HAVE_AVX
165 #include <immintrin.h>
166  /*!
167  \brief Multiplies the two input complex vectors and stores their results in the third vector
168  \param cVector The vector where the results will be stored
169  \param aVector One of the vectors to be multiplied
170  \param bVector One of the vectors to be multiplied
171  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
172  */
173 static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
174  unsigned int number = 0;
175  unsigned int i = 0;
176  const unsigned int quarterPoints = num_points / 4;
177  unsigned int isodd = num_points & 3;
178  __m256 x, yl, yh, z, tmp1, tmp2;
179  lv_32fc_t* c = cVector;
180  const lv_32fc_t* a = aVector;
181 
182  // Set up constant scalar vector
183  yl = _mm256_set1_ps(lv_creal(scalar));
184  yh = _mm256_set1_ps(lv_cimag(scalar));
185 
186  for(;number < quarterPoints; number++){
187  x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
188 
189  tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
190 
191  x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
192 
193  tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
194 
195  z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
196 
197  _mm256_store_ps((float*)c,z); // Store the results back into the C container
198 
199  a += 4;
200  c += 4;
201  }
202 
203  for(i = num_points-isodd; i < num_points; i++) {
204  *c++ = (*a++) * scalar;
205  }
206 
207 }
208 #endif /* LV_HAVE_AVX */
209 
210 #ifdef LV_HAVE_SSE3
211 #include <pmmintrin.h>
212  /*!
213  \brief Multiplies the two input complex vectors and stores their results in the third vector
214  \param cVector The vector where the results will be stored
215  \param aVector One of the vectors to be multiplied
216  \param bVector One of the vectors to be multiplied
217  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
218  */
219 static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
220  unsigned int number = 0;
221  const unsigned int halfPoints = num_points / 2;
222 
223  __m128 x, yl, yh, z, tmp1, tmp2;
224  lv_32fc_t* c = cVector;
225  const lv_32fc_t* a = aVector;
226 
227  // Set up constant scalar vector
228  yl = _mm_set_ps1(lv_creal(scalar));
229  yh = _mm_set_ps1(lv_cimag(scalar));
230 
231  for(;number < halfPoints; number++){
232 
233  x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
234 
235  tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
236 
237  x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
238 
239  tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
240 
241  z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
242 
243  _mm_store_ps((float*)c,z); // Store the results back into the C container
244 
245  a += 2;
246  c += 2;
247  }
248 
249  if((num_points % 2) != 0) {
250  *c = (*a) * scalar;
251  }
252 }
253 #endif /* LV_HAVE_SSE */
254 
255 #ifdef LV_HAVE_NEON
256 #include <arm_neon.h>
257  /*!
258  \brief Multiplies the two input complex vectors and stores their results in the third vector
259  \param cVector The vector where the results will be stored
260  \param aVector One of the vectors to be multiplied
261  \param bVector One of the vectors to be multiplied
262  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
263  */
264 static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
265  lv_32fc_t* cPtr = cVector;
266  const lv_32fc_t* aPtr = aVector;
267  unsigned int number = num_points;
268  unsigned int quarter_points = num_points / 4;
269 
270  float32x4x2_t a_val, scalar_val;
271  float32x4x2_t tmp_imag;
272 
273  scalar_val = vld2q_f32((const float*)&scalar);
274  for(number = 0; number < quarter_points; ++number) {
275  a_val = vld2q_f32((float*)aPtr);
276  tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
277  tmp_imag.val[0] = vmulq_f32(a_val.val[0], scalar_val.val[0]);
278 
279  tmp_imag.val[1] = vmlaq_f32(tmp_imag.val[1], a_val.val[0], scalar_val.val[1]);
280  tmp_imag.val[0] = vmlaq_f32(tmp_imag.val[0], a_val.val[1], scalar_val.val[1]);
281 
282  vst2q_f32((float*)cVector, tmp_imag);
283  aPtr += 4;
284  cVector += 4;
285  }
286 
287  for(number = quarter_points*4; number < num_points; number++){
288  *cPtr++ = *aPtr++ * scalar;
289  }
290 }
291 #endif /* LV_HAVE_NEON */
292 
293 #ifdef LV_HAVE_GENERIC
294  /*!
295  \brief Multiplies the two input complex vectors and stores their results in the third vector
296  \param cVector The vector where the results will be stored
297  \param aVector One of the vectors to be multiplied
298  \param bVector One of the vectors to be multiplied
299  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
300  */
301 static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
302  lv_32fc_t* cPtr = cVector;
303  const lv_32fc_t* aPtr = aVector;
304  unsigned int number = num_points;
305 
306  // unwrap loop
307  while (number >= 8){
308  *cPtr++ = (*aPtr++) * scalar;
309  *cPtr++ = (*aPtr++) * scalar;
310  *cPtr++ = (*aPtr++) * scalar;
311  *cPtr++ = (*aPtr++) * scalar;
312  *cPtr++ = (*aPtr++) * scalar;
313  *cPtr++ = (*aPtr++) * scalar;
314  *cPtr++ = (*aPtr++) * scalar;
315  *cPtr++ = (*aPtr++) * scalar;
316  number -= 8;
317  }
318 
319  // clean up any remaining
320  while (number-- > 0)
321  *cPtr++ = *aPtr++ * scalar;
322 }
323 #endif /* LV_HAVE_GENERIC */
324 
325 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define lv_cimag(x)
Definition: volk_complex.h:78
uint32_t i[4]
Definition: volk_common.h:80