GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32fc_32f_multiply_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
24 #define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_AVX
30 #include <immintrin.h>
31  /*!
32  \brief Multiplies the input complex vector with the input float vector and store their results in the third vector
33  \param cVector The vector where the results will be stored
34  \param aVector The complex vector to be multiplied
35  \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
36  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
37  */
38 static inline void volk_32fc_32f_multiply_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
39 {
40  unsigned int number = 0;
41  const unsigned int eighthPoints = num_points / 8;
42 
43  lv_32fc_t* cPtr = cVector;
44  const lv_32fc_t* aPtr = aVector;
45  const float* bPtr= bVector;
46 
47  __m256 aVal1, aVal2, bVal, bVal1, bVal2, cVal1, cVal2;
48 
49  __m256i permute_mask = _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
50 
51  for(;number < eighthPoints; number++){
52 
53  aVal1 = _mm256_load_ps((float *)aPtr);
54  aPtr += 4;
55 
56  aVal2 = _mm256_load_ps((float *)aPtr);
57  aPtr += 4;
58 
59  bVal = _mm256_load_ps(bPtr); // b0|b1|b2|b3|b4|b5|b6|b7
60  bPtr += 8;
61 
62  bVal1 = _mm256_permute2f128_ps(bVal, bVal, 0x00); // b0|b1|b2|b3|b0|b1|b2|b3
63  bVal2 = _mm256_permute2f128_ps(bVal, bVal, 0x11); // b4|b5|b6|b7|b4|b5|b6|b7
64 
65  bVal1 = _mm256_permutevar_ps(bVal1, permute_mask); // b0|b0|b1|b1|b2|b2|b3|b3
66  bVal2 = _mm256_permutevar_ps(bVal2, permute_mask); // b4|b4|b5|b5|b6|b6|b7|b7
67 
68  cVal1 = _mm256_mul_ps(aVal1, bVal1);
69  cVal2 = _mm256_mul_ps(aVal2, bVal2);
70 
71  _mm256_store_ps((float*)cPtr,cVal1); // Store the results back into the C container
72  cPtr += 4;
73 
74  _mm256_store_ps((float*)cPtr,cVal2); // Store the results back into the C container
75  cPtr += 4;
76  }
77 
78  number = eighthPoints * 8;
79  for(;number < num_points; ++number){
80  *cPtr++ = (*aPtr++) * (*bPtr++);
81  }
82 }
83 #endif /* LV_HAVE_AVX */
84 
85 
86 
87 #ifdef LV_HAVE_SSE
88 #include <xmmintrin.h>
89  /*!
90  \brief Multiplies the input complex vector with the input float vector and store their results in the third vector
91  \param cVector The vector where the results will be stored
92  \param aVector The complex vector to be multiplied
93  \param bVector The vectors containing the float values to be multiplied against each complex value in aVector
94  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
95  */
96 static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
97 {
98  unsigned int number = 0;
99  const unsigned int quarterPoints = num_points / 4;
100 
101  lv_32fc_t* cPtr = cVector;
102  const lv_32fc_t* aPtr = aVector;
103  const float* bPtr= bVector;
104 
105  __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
106  for(;number < quarterPoints; number++){
107 
108  aVal1 = _mm_load_ps((const float*)aPtr);
109  aPtr += 2;
110 
111  aVal2 = _mm_load_ps((const float*)aPtr);
112  aPtr += 2;
113 
114  bVal = _mm_load_ps(bPtr);
115  bPtr += 4;
116 
117  bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
118  bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
119 
120  cVal = _mm_mul_ps(aVal1, bVal1);
121 
122  _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
123  cPtr += 2;
124 
125  cVal = _mm_mul_ps(aVal2, bVal2);
126 
127  _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C container
128 
129  cPtr += 2;
130  }
131 
132  number = quarterPoints * 4;
133  for(;number < num_points; number++){
134  *cPtr++ = (*aPtr++) * (*bPtr);
135  bPtr++;
136  }
137 }
138 #endif /* LV_HAVE_SSE */
139 
140 #ifdef LV_HAVE_GENERIC
141  /*!
142  \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
143  \param cVector The vector where the results will be stored
144  \param aVector The complex vector to be multiplied
145  \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
146  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
147  */
148 static inline void volk_32fc_32f_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
149  lv_32fc_t* cPtr = cVector;
150  const lv_32fc_t* aPtr = aVector;
151  const float* bPtr= bVector;
152  unsigned int number = 0;
153 
154  for(number = 0; number < num_points; number++){
155  *cPtr++ = (*aPtr++) * (*bPtr++);
156  }
157 }
158 #endif /* LV_HAVE_GENERIC */
159 
160 #ifdef LV_HAVE_NEON
161 #include <arm_neon.h>
162  /*!
163  \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
164  \param cVector The vector where the results will be stored
165  \param aVector The complex vector to be multiplied
166  \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
167  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
168  */
169 static inline void volk_32fc_32f_multiply_32fc_neon(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
170  lv_32fc_t* cPtr = cVector;
171  const lv_32fc_t* aPtr = aVector;
172  const float* bPtr= bVector;
173  unsigned int number = 0;
174  unsigned int quarter_points = num_points / 4;
175 
176  float32x4x2_t inputVector, outputVector;
177  float32x4_t tapsVector;
178  for(number = 0; number < quarter_points; number++){
179  inputVector = vld2q_f32((float*)aPtr);
180  tapsVector = vld1q_f32(bPtr);
181 
182  outputVector.val[0] = vmulq_f32(inputVector.val[0], tapsVector);
183  outputVector.val[1] = vmulq_f32(inputVector.val[1], tapsVector);
184 
185  vst2q_f32((float*)cPtr, outputVector);
186  aPtr += 4;
187  bPtr += 4;
188  cPtr += 4;
189  }
190 
191  for(number = quarter_points * 4; number < num_points; number++){
192  *cPtr++ = (*aPtr++) * (*bPtr++);
193  }
194 }
195 #endif /* LV_HAVE_NEON */
196 
197 #ifdef LV_HAVE_ORC
198  /*!
199  \brief Multiplies the input complex vector with the input lv_32fc_t vector and store their results in the third vector
200  \param cVector The vector where the results will be stored
201  \param aVector The complex vector to be multiplied
202  \param bVector The vectors containing the lv_32fc_t values to be multiplied against each complex value in aVector
203  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
204  */
205 extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
206 static inline void volk_32fc_32f_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
207  volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
208 }
209 #endif /* LV_HAVE_GENERIC */
210 
211 
212 
213 #endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56