GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_x2_multiply_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
24 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE
30 #include <xmmintrin.h>
31 /*!
32  \brief Multiplys the two input vectors and store their results in the third vector
33  \param cVector The vector where the results will be stored
34  \param aVector One of the vectors to be multiplied
35  \param bVector One of the vectors to be multiplied
36  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
37 */
38 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
39  unsigned int number = 0;
40  const unsigned int quarterPoints = num_points / 4;
41 
42  float* cPtr = cVector;
43  const float* aPtr = aVector;
44  const float* bPtr= bVector;
45 
46  __m128 aVal, bVal, cVal;
47  for(;number < quarterPoints; number++){
48 
49  aVal = _mm_loadu_ps(aPtr);
50  bVal = _mm_loadu_ps(bPtr);
51 
52  cVal = _mm_mul_ps(aVal, bVal);
53 
54  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
55 
56  aPtr += 4;
57  bPtr += 4;
58  cPtr += 4;
59  }
60 
61  number = quarterPoints * 4;
62  for(;number < num_points; number++){
63  *cPtr++ = (*aPtr++) * (*bPtr++);
64  }
65 }
66 #endif /* LV_HAVE_SSE */
67 
68 #ifdef LV_HAVE_AVX
69 #include <immintrin.h>
70 /*!
71  \brief Multiplies the two input vectors and store their results in the third vector
72  \param cVector The vector where the results will be stored
73  \param aVector One of the vectors to be multiplied
74  \param bVector One of the vectors to be multiplied
75  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
76 */
77 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
78  unsigned int number = 0;
79  const unsigned int eighthPoints = num_points / 8;
80 
81  float* cPtr = cVector;
82  const float* aPtr = aVector;
83  const float* bPtr= bVector;
84 
85  __m256 aVal, bVal, cVal;
86  for(;number < eighthPoints; number++){
87 
88  aVal = _mm256_loadu_ps(aPtr);
89  bVal = _mm256_loadu_ps(bPtr);
90 
91  cVal = _mm256_mul_ps(aVal, bVal);
92 
93  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
94 
95  aPtr += 8;
96  bPtr += 8;
97  cPtr += 8;
98  }
99 
100  number = eighthPoints * 8;
101  for(;number < num_points; number++){
102  *cPtr++ = (*aPtr++) * (*bPtr++);
103  }
104 }
105 #endif /* LV_HAVE_AVX */
106 
107 #ifdef LV_HAVE_GENERIC
108 /*!
109  \brief Multiplys the two input vectors and store their results in the third vector
110  \param cVector The vector where the results will be stored
111  \param aVector One of the vectors to be multiplied
112  \param bVector One of the vectors to be multiplied
113  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
114 */
115 static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
116  float* cPtr = cVector;
117  const float* aPtr = aVector;
118  const float* bPtr= bVector;
119  unsigned int number = 0;
120 
121  for(number = 0; number < num_points; number++){
122  *cPtr++ = (*aPtr++) * (*bPtr++);
123  }
124 }
125 #endif /* LV_HAVE_GENERIC */
126 
127 
128 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
129 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
130 #define INCLUDED_volk_32f_x2_multiply_32f_a_H
131 
132 #include <inttypes.h>
133 #include <stdio.h>
134 
135 #ifdef LV_HAVE_SSE
136 #include <xmmintrin.h>
137 /*!
138  \brief Multiplys the two input vectors and store their results in the third vector
139  \param cVector The vector where the results will be stored
140  \param aVector One of the vectors to be multiplied
141  \param bVector One of the vectors to be multiplied
142  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
143 */
144 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
145  unsigned int number = 0;
146  const unsigned int quarterPoints = num_points / 4;
147 
148  float* cPtr = cVector;
149  const float* aPtr = aVector;
150  const float* bPtr= bVector;
151 
152  __m128 aVal, bVal, cVal;
153  for(;number < quarterPoints; number++){
154 
155  aVal = _mm_load_ps(aPtr);
156  bVal = _mm_load_ps(bPtr);
157 
158  cVal = _mm_mul_ps(aVal, bVal);
159 
160  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
161 
162  aPtr += 4;
163  bPtr += 4;
164  cPtr += 4;
165  }
166 
167  number = quarterPoints * 4;
168  for(;number < num_points; number++){
169  *cPtr++ = (*aPtr++) * (*bPtr++);
170  }
171 }
172 #endif /* LV_HAVE_SSE */
173 
174 #ifdef LV_HAVE_AVX
175 #include <immintrin.h>
176 /*!
177  \brief Multiplies the two input vectors and store their results in the third vector
178  \param cVector The vector where the results will be stored
179  \param aVector One of the vectors to be multiplied
180  \param bVector One of the vectors to be multiplied
181  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
182 */
183 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
184  unsigned int number = 0;
185  const unsigned int eighthPoints = num_points / 8;
186 
187  float* cPtr = cVector;
188  const float* aPtr = aVector;
189  const float* bPtr= bVector;
190 
191  __m256 aVal, bVal, cVal;
192  for(;number < eighthPoints; number++){
193 
194  aVal = _mm256_load_ps(aPtr);
195  bVal = _mm256_load_ps(bPtr);
196 
197  cVal = _mm256_mul_ps(aVal, bVal);
198 
199  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
200 
201  aPtr += 8;
202  bPtr += 8;
203  cPtr += 8;
204  }
205 
206  number = eighthPoints * 8;
207  for(;number < num_points; number++){
208  *cPtr++ = (*aPtr++) * (*bPtr++);
209  }
210 }
211 #endif /* LV_HAVE_AVX */
212 
213 #ifdef LV_HAVE_NEON
214 #include <arm_neon.h>
215 
216 /*!
217  \brief Multiplys the two input vectors and store their results in the third vector
218  \param cVector The vector where the results will be stored
219  \param aVector One of the vectors to be multiplied
220  \param bVector One of the vectors to be multiplied
221  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
222 */
223 static inline void volk_32f_x2_multiply_32f_neon(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
224  const unsigned int quarter_points = num_points / 4;
225  unsigned int number;
226  float32x4_t avec, bvec, cvec;
227  for(number=0; number < quarter_points; ++number) {
228  avec = vld1q_f32(aVector);
229  bvec = vld1q_f32(bVector);
230  cvec = vmulq_f32(avec, bvec);
231  vst1q_f32(cVector, cvec);
232  aVector += 4;
233  bVector += 4;
234  cVector += 4;
235  }
236  for(number=quarter_points*4; number < num_points; ++number) {
237  *cVector++ = *aVector++ * *bVector++;
238  }
239 }
240 #endif /* LV_HAVE_NEON */
241 
242 #ifdef LV_HAVE_GENERIC
243 /*!
244  \brief Multiplys the two input vectors and store their results in the third vector
245  \param cVector The vector where the results will be stored
246  \param aVector One of the vectors to be multiplied
247  \param bVector One of the vectors to be multiplied
248  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
249 */
250 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
251  float* cPtr = cVector;
252  const float* aPtr = aVector;
253  const float* bPtr= bVector;
254  unsigned int number = 0;
255 
256  for(number = 0; number < num_points; number++){
257  *cPtr++ = (*aPtr++) * (*bPtr++);
258  }
259 }
260 #endif /* LV_HAVE_GENERIC */
261 
262 #ifdef LV_HAVE_ORC
263 /*!
264  \brief Multiplys the two input vectors and store their results in the third vector
265  \param cVector The vector where the results will be stored
266  \param aVector One of the vectors to be multiplied
267  \param bVector One of the vectors to be multiplied
268  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
269 */
270 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
271 static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
272  volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
273 }
274 #endif /* LV_HAVE_ORC */
275 
276 
277 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */