GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_x2_add_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_x2_add_32f_u_H
24 #define INCLUDED_volk_32f_x2_add_32f_u_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 
29 #ifdef LV_HAVE_SSE
30 #include <xmmintrin.h>
31 /*!
32  \brief Adds the two input vectors and store their results in the third vector
33  \param cVector The vector where the results will be stored
34  \param aVector One of the vectors to be added
35  \param bVector One of the vectors to be added
36  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
37 */
38 static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
39  unsigned int number = 0;
40  const unsigned int quarterPoints = num_points / 4;
41 
42  float* cPtr = cVector;
43  const float* aPtr = aVector;
44  const float* bPtr= bVector;
45 
46  __m128 aVal, bVal, cVal;
47  for(;number < quarterPoints; number++){
48 
49  aVal = _mm_loadu_ps(aPtr);
50  bVal = _mm_loadu_ps(bPtr);
51 
52  cVal = _mm_add_ps(aVal, bVal);
53 
54  _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
55 
56  aPtr += 4;
57  bPtr += 4;
58  cPtr += 4;
59  }
60 
61  number = quarterPoints * 4;
62  for(;number < num_points; number++){
63  *cPtr++ = (*aPtr++) + (*bPtr++);
64  }
65 }
66 #endif /* LV_HAVE_SSE */
67 
68 #ifdef LV_HAVE_GENERIC
69 /*!
70  \brief Adds the two input vectors and store their results in the third vector
71  \param cVector The vector where the results will be stored
72  \param aVector One of the vectors to be added
73  \param bVector One of the vectors to be added
74  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
75 */
76 static inline void volk_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
77  float* cPtr = cVector;
78  const float* aPtr = aVector;
79  const float* bPtr= bVector;
80  unsigned int number = 0;
81 
82  for(number = 0; number < num_points; number++){
83  *cPtr++ = (*aPtr++) + (*bPtr++);
84  }
85 }
86 #endif /* LV_HAVE_GENERIC */
87 
88 #endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
89 #ifndef INCLUDED_volk_32f_x2_add_32f_a_H
90 #define INCLUDED_volk_32f_x2_add_32f_a_H
91 
92 #include <inttypes.h>
93 #include <stdio.h>
94 
95 #ifdef LV_HAVE_SSE
96 #include <xmmintrin.h>
97 /*!
98  \brief Adds the two input vectors and store their results in the third vector
99  \param cVector The vector where the results will be stored
100  \param aVector One of the vectors to be added
101  \param bVector One of the vectors to be added
102  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
103 */
104 static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
105  unsigned int number = 0;
106  const unsigned int quarterPoints = num_points / 4;
107 
108  float* cPtr = cVector;
109  const float* aPtr = aVector;
110  const float* bPtr= bVector;
111 
112  __m128 aVal, bVal, cVal;
113  for(;number < quarterPoints; number++){
114 
115  aVal = _mm_load_ps(aPtr);
116  bVal = _mm_load_ps(bPtr);
117 
118  cVal = _mm_add_ps(aVal, bVal);
119 
120  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
121 
122  aPtr += 4;
123  bPtr += 4;
124  cPtr += 4;
125  }
126 
127  number = quarterPoints * 4;
128  for(;number < num_points; number++){
129  *cPtr++ = (*aPtr++) + (*bPtr++);
130  }
131 }
132 #endif /* LV_HAVE_SSE */
133 
134 #ifdef LV_HAVE_NEON
135 #include <arm_neon.h>
136 /*
137  \brief Adds the two input vectors and store their results in the third vector
138  \param cVector The vector where the results will be stored
139  \param aVector One of the vectors to be added
140  \param bVector One of the vectors to be added
141  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
142 */
143 static inline void volk_32f_x2_add_32f_u_neon(float* cVector, const float* aVector, const float* bVector, unsigned int num_points) {
144  unsigned int number = 0;
145  const unsigned int quarterPoints = num_points / 4;
146 
147  float* cPtr = cVector;
148  const float* aPtr = aVector;
149  const float* bPtr= bVector;
150  float32x4_t aVal, bVal, cVal;
151  for(number=0; number < quarterPoints; number++){
152  // Load in to NEON registers
153  aVal = vld1q_f32(aPtr);
154  bVal = vld1q_f32(bPtr);
155  __builtin_prefetch(aPtr+4);
156  __builtin_prefetch(bPtr+4);
157 
158  // vector add
159  cVal = vaddq_f32(aVal, bVal);
160  // Store the results back into the C container
161  vst1q_f32(cPtr,cVal);
162 
163  aPtr += 4; // q uses quadwords, 4 floats per vadd
164  bPtr += 4;
165  cPtr += 4;
166  }
167 
168  number = quarterPoints * 4; // should be = num_points
169  for(;number < num_points; number++){
170  *cPtr++ = (*aPtr++) + (*bPtr++);
171  }
172 
173 }
174 
175 #endif /* LV_HAVE_NEON */
176 
177 #ifdef LV_HAVE_GENERIC
178 /*!
179  \brief Adds the two input vectors and store their results in the third vector
180  \param cVector The vector where the results will be stored
181  \param aVector One of the vectors to be added
182  \param bVector One of the vectors to be added
183  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
184 */
185 static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
186  float* cPtr = cVector;
187  const float* aPtr = aVector;
188  const float* bPtr= bVector;
189  unsigned int number = 0;
190 
191  for(number = 0; number < num_points; number++){
192  *cPtr++ = (*aPtr++) + (*bPtr++);
193  }
194 }
195 #endif /* LV_HAVE_GENERIC */
196 
197 #ifdef LV_HAVE_ORC
198 /*!
199  \brief Adds the two input vectors and store their results in the third vector
200  \param cVector The vector where the results will be stored
201  \param aVector One of the vectors to be added
202  \param bVector One of the vectors to be added
203  \param num_points The number of values in aVector and bVector to be added together and stored into cVector
204 */
205 extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
206 static inline void volk_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
207  volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
208 }
209 #endif /* LV_HAVE_ORC */
210 
211 
212 #endif /* INCLUDED_volk_32f_x2_add_32f_a_H */