GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_invsqrt_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
24 #define INCLUDED_volk_32f_invsqrt_32f_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <math.h>
29 #include <string.h>
30 
31 static inline float Q_rsqrt( float number )
32 {
33  float x2;
34  const float threehalfs = 1.5F;
35  union f32_to_i32 {
36  int32_t i;
37  float f;
38  } u;
39 
40  x2 = number * 0.5F;
41  u.f = number;
42  u.i = 0x5f3759df - ( u.i >> 1 ); // what the fuck?
43  u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 1st iteration
44  //u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 2nd iteration, this can be removed
45 
46  return u.f;
47 }
48 
49 #ifdef LV_HAVE_AVX
50 #include <immintrin.h>
51 /*!
52 \brief Sqrts the two input vectors and store their results in the third vector
53 \param cVector The vector where the results will be stored
54 \param aVector One of the vectors to be invsqrted
55 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
56 */
57 static inline void volk_32f_invsqrt_32f_a_avx(float* cVector, const float* aVector, unsigned int num_points){
58  unsigned int number = 0;
59  const unsigned int eighthPoints = num_points / 8;
60 
61  float* cPtr = cVector;
62  const float* aPtr = aVector;
63  __m256 aVal, cVal;
64  for (; number < eighthPoints; number++)
65  {
66  aVal = _mm256_load_ps(aPtr);
67  cVal = _mm256_rsqrt_ps(aVal);
68  _mm256_store_ps(cPtr, cVal);
69  aPtr += 8;
70  cPtr += 8;
71  }
72 
73  number = eighthPoints * 8;
74  for(;number < num_points; number++)
75  *cPtr++ = Q_rsqrt(*aPtr++);
76 
77 }
78 #endif /* LV_HAVE_AVX */
79 
80 #ifdef LV_HAVE_SSE
81 #include <xmmintrin.h>
82 /*!
83  \brief Sqrts the two input vectors and store their results in the third vector
84  \param cVector The vector where the results will be stored
85  \param aVector One of the vectors to be invsqrted
86  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
87 */
88 static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
89  unsigned int number = 0;
90  const unsigned int quarterPoints = num_points / 4;
91 
92  float* cPtr = cVector;
93  const float* aPtr = aVector;
94 
95  __m128 aVal, cVal;
96  for(;number < quarterPoints; number++){
97 
98  aVal = _mm_load_ps(aPtr);
99 
100  cVal = _mm_rsqrt_ps(aVal);
101 
102  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
103 
104  aPtr += 4;
105  cPtr += 4;
106  }
107 
108  number = quarterPoints * 4;
109  for(;number < num_points; number++){
110  *cPtr++ = Q_rsqrt(*aPtr++);
111  }
112 }
113 #endif /* LV_HAVE_SSE */
114 
115 #ifdef LV_HAVE_NEON
116 #include <arm_neon.h>
117 /*!
118 \brief Sqrts the two input vectors and store their results in the third vector
119 \param cVector The vector where the results will be stored
120 \param aVector One of the vectors to be invsqrted
121 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
122 */
123 static inline void volk_32f_invsqrt_32f_neon(float* cVector, const float* aVector, unsigned int num_points){
124  unsigned int number;
125  const unsigned int quarter_points = num_points / 4;
126 
127  float* cPtr = cVector;
128  const float* aPtr = aVector;
129  float32x4_t a_val, c_val;
130  for (number = 0; number < quarter_points; ++number)
131  {
132  a_val = vld1q_f32(aPtr);
133  c_val = vrsqrteq_f32(a_val);
134  vst1q_f32(cPtr, c_val);
135  aPtr += 4;
136  cPtr += 4;
137  }
138 
139  for(number=quarter_points * 4;number < num_points; number++)
140  *cPtr++ = Q_rsqrt(*aPtr++);
141 
142 }
143 #endif /* LV_HAVE_NEON */
144 
145 
146 #ifdef LV_HAVE_GENERIC
147 /*!
148  \brief Sqrts the two input vectors and store their results in the third vector
149  \param cVector The vector where the results will be stored
150  \param aVector One of the vectors to be invsqrted
151  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
152 */
153 static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
154  float* cPtr = cVector;
155  const float* aPtr = aVector;
156  unsigned int number = 0;
157  for(number = 0; number < num_points; number++){
158  *cPtr++ = Q_rsqrt(*aPtr++);
159  }
160 }
161 #endif /* LV_HAVE_GENERIC */
162 
163 #ifdef LV_HAVE_AVX
164 #include <immintrin.h>
165 /*!
166 \brief Sqrts the two input vectors and store their results in the third vector
167 \param cVector The vector where the results will be stored
168 \param aVector One of the vectors to be invsqrted
169 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
170 */
171 static inline void volk_32f_invsqrt_32f_u_avx(float* cVector, const float* aVector, unsigned int num_points){
172  unsigned int number = 0;
173  const unsigned int eighthPoints = num_points / 8;
174 
175  float* cPtr = cVector;
176  const float* aPtr = aVector;
177  __m256 aVal, cVal;
178  for (; number < eighthPoints; number++)
179  {
180  aVal = _mm256_loadu_ps(aPtr);
181  cVal = _mm256_rsqrt_ps(aVal);
182  _mm256_storeu_ps(cPtr, cVal);
183  aPtr += 8;
184  cPtr += 8;
185  }
186 
187  number = eighthPoints * 8;
188  for(;number < num_points; number++)
189  *cPtr++ = Q_rsqrt(*aPtr++);
190 
191 }
192 #endif /* LV_HAVE_AVX */
193 
194 
195 
196 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:31
signed int int32_t
Definition: stdint.h:77