GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_s32f_multiply_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
24 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE
30 #include <xmmintrin.h>
31 /*!
32  \brief Scalar float multiply
33  \param cVector The vector where the results will be stored
34  \param aVector One of the vectors to be multiplied
35  \param scalar the scalar value
36  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
37 */
38 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
39  unsigned int number = 0;
40  const unsigned int quarterPoints = num_points / 4;
41 
42  float* cPtr = cVector;
43  const float* aPtr = aVector;
44 
45  __m128 aVal, bVal, cVal;
46  bVal = _mm_set_ps1(scalar);
47  for(;number < quarterPoints; number++){
48 
49  aVal = _mm_loadu_ps(aPtr);
50 
51  cVal = _mm_mul_ps(aVal, bVal);
52 
53  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
54 
55  aPtr += 4;
56  cPtr += 4;
57  }
58 
59  number = quarterPoints * 4;
60  for(;number < num_points; number++){
61  *cPtr++ = (*aPtr++) * scalar;
62  }
63 }
64 #endif /* LV_HAVE_SSE */
65 
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68 /*!
69  \brief Scalar float multiply
70  \param cVector The vector where the results will be stored
71  \param aVector One of the vectors to be multiplied
72  \param scalar the scalar value
73  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
74 */
75 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
76  unsigned int number = 0;
77  const unsigned int eighthPoints = num_points / 8;
78 
79  float* cPtr = cVector;
80  const float* aPtr = aVector;
81 
82  __m256 aVal, bVal, cVal;
83  bVal = _mm256_set1_ps(scalar);
84  for(;number < eighthPoints; number++){
85 
86  aVal = _mm256_loadu_ps(aPtr);
87 
88  cVal = _mm256_mul_ps(aVal, bVal);
89 
90  _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
91 
92  aPtr += 8;
93  cPtr += 8;
94  }
95 
96  number = eighthPoints * 8;
97  for(;number < num_points; number++){
98  *cPtr++ = (*aPtr++) * scalar;
99  }
100 }
101 #endif /* LV_HAVE_AVX */
102 
103 #ifdef LV_HAVE_GENERIC
104 /*!
105  \brief Scalar float multiply
106  \param cVector The vector where the results will be stored
107  \param aVector One of the vectors to be multiplied
108  \param scalar the scalar value
109  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
110 */
111 static inline void volk_32f_s32f_multiply_32f_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
112  unsigned int number = 0;
113  const float* inputPtr = aVector;
114  float* outputPtr = cVector;
115  for(number = 0; number < num_points; number++){
116  *outputPtr = (*inputPtr) * scalar;
117  inputPtr++;
118  outputPtr++;
119  }
120 }
121 #endif /* LV_HAVE_GENERIC */
122 
123 
124 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
125 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
126 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
127 
128 #include <inttypes.h>
129 #include <stdio.h>
130 
131 #ifdef LV_HAVE_SSE
132 #include <xmmintrin.h>
133 /*!
134  \brief Scalar float multiply
135  \param cVector The vector where the results will be stored
136  \param aVector One of the vectors to be multiplied
137  \param scalar the scalar value
138  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
139 */
140 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
141  unsigned int number = 0;
142  const unsigned int quarterPoints = num_points / 4;
143 
144  float* cPtr = cVector;
145  const float* aPtr = aVector;
146 
147  __m128 aVal, bVal, cVal;
148  bVal = _mm_set_ps1(scalar);
149  for(;number < quarterPoints; number++){
150 
151  aVal = _mm_load_ps(aPtr);
152 
153  cVal = _mm_mul_ps(aVal, bVal);
154 
155  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
156 
157  aPtr += 4;
158  cPtr += 4;
159  }
160 
161  number = quarterPoints * 4;
162  for(;number < num_points; number++){
163  *cPtr++ = (*aPtr++) * scalar;
164  }
165 }
166 #endif /* LV_HAVE_SSE */
167 
168 #ifdef LV_HAVE_AVX
169 #include <immintrin.h>
170 /*!
171  \brief Scalar float multiply
172  \param cVector The vector where the results will be stored
173  \param aVector One of the vectors to be multiplied
174  \param scalar the scalar value
175  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
176 */
177 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
178  unsigned int number = 0;
179  const unsigned int eighthPoints = num_points / 8;
180 
181  float* cPtr = cVector;
182  const float* aPtr = aVector;
183 
184  __m256 aVal, bVal, cVal;
185  bVal = _mm256_set1_ps(scalar);
186  for(;number < eighthPoints; number++){
187 
188  aVal = _mm256_load_ps(aPtr);
189 
190  cVal = _mm256_mul_ps(aVal, bVal);
191 
192  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
193 
194  aPtr += 8;
195  cPtr += 8;
196  }
197 
198  number = eighthPoints * 8;
199  for(;number < num_points; number++){
200  *cPtr++ = (*aPtr++) * scalar;
201  }
202 }
203 #endif /* LV_HAVE_AVX */
204 
205 #ifdef LV_HAVE_NEON
206 #include <arm_neon.h>
207 /*!
208  \brief Scalar float multiply
209  \param cVector The vector where the results will be stored
210  \param aVector One of the vectors to be multiplied
211  \param scalar the scalar value
212  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
213 */
214 static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
215  unsigned int number = 0;
216  const float* inputPtr = aVector;
217  float* outputPtr = cVector;
218  const unsigned int quarterPoints = num_points / 4;
219 
220  float32x4_t aVal, cVal;
221 
222  for(number = 0; number < quarterPoints; number++){
223  aVal = vld1q_f32(inputPtr); // Load into NEON regs
224  cVal = vmulq_n_f32 (aVal, scalar); // Do the multiply
225  vst1q_f32(outputPtr, cVal); // Store results back to output
226  inputPtr += 4;
227  outputPtr += 4;
228  }
229  for(number = quarterPoints * 4; number < num_points; number++){
230  *outputPtr++ = (*inputPtr++) * scalar;
231  }
232 }
233 #endif /* LV_HAVE_NEON */
234 
235 #ifdef LV_HAVE_GENERIC
236 /*!
237  \brief Scalar float multiply
238  \param cVector The vector where the results will be stored
239  \param aVector One of the vectors to be multiplied
240  \param scalar the scalar value
241  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
242 */
243 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
244  unsigned int number = 0;
245  const float* inputPtr = aVector;
246  float* outputPtr = cVector;
247  for(number = 0; number < num_points; number++){
248  *outputPtr = (*inputPtr) * scalar;
249  inputPtr++;
250  outputPtr++;
251  }
252 }
253 #endif /* LV_HAVE_GENERIC */
254 
255 #ifdef LV_HAVE_ORC
256 /*!
257  \brief Scalar float multiply
258  \param cVector The vector where the results will be stored
259  \param aVector One of the vectors to be multiplied
260  \param scalar the scalar value
261  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
262 */
263 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points);
264 static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
265  volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
266 }
267 #endif /* LV_HAVE_GENERIC */
268 
269 
270 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */