GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_stddev_and_mean_32f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
24 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
25 
26 #include <volk/volk_common.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <math.h>
30 
31 #ifdef LV_HAVE_SSE4_1
32 #include <smmintrin.h>
33 /*!
34  \brief Calculates the standard deviation and mean of the input buffer
35  \param stddev The calculated standard deviation
36  \param mean The mean of the input buffer
37  \param inputBuffer The buffer of points to calculate the std deviation for
38  \param num_points The number of values in input buffer to used in the stddev and mean calculations
39 */
40 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
41  float returnValue = 0;
42  float newMean = 0;
43  if(num_points > 0){
44  unsigned int number = 0;
45  const unsigned int sixteenthPoints = num_points / 16;
46 
47  const float* aPtr = inputBuffer;
48  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
49  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
50 
51  __m128 accumulator = _mm_setzero_ps();
52  __m128 squareAccumulator = _mm_setzero_ps();
53  __m128 aVal1, aVal2, aVal3, aVal4;
54  __m128 cVal1, cVal2, cVal3, cVal4;
55  for(;number < sixteenthPoints; number++) {
56  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
57  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
58  accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x
59 
60  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
61  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
62  accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x
63 
64  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
65  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
66  accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x
67 
68  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
69  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
70  accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x
71 
72  cVal1 = _mm_or_ps(cVal1, cVal2);
73  cVal3 = _mm_or_ps(cVal3, cVal4);
74  cVal1 = _mm_or_ps(cVal1, cVal3);
75 
76  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
77  }
78  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
79  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
80  newMean = meanBuffer[0];
81  newMean += meanBuffer[1];
82  newMean += meanBuffer[2];
83  newMean += meanBuffer[3];
84  returnValue = squareBuffer[0];
85  returnValue += squareBuffer[1];
86  returnValue += squareBuffer[2];
87  returnValue += squareBuffer[3];
88 
89  number = sixteenthPoints * 16;
90  for(;number < num_points; number++){
91  returnValue += (*aPtr) * (*aPtr);
92  newMean += *aPtr++;
93  }
94  newMean /= num_points;
95  returnValue /= num_points;
96  returnValue -= (newMean * newMean);
97  returnValue = sqrtf(returnValue);
98  }
99  *stddev = returnValue;
100  *mean = newMean;
101 }
102 #endif /* LV_HAVE_SSE4_1 */
103 
104 #ifdef LV_HAVE_SSE
105 #include <xmmintrin.h>
106 /*!
107  \brief Calculates the standard deviation and mean of the input buffer
108  \param stddev The calculated standard deviation
109  \param mean The mean of the input buffer
110  \param inputBuffer The buffer of points to calculate the std deviation for
111  \param num_points The number of values in input buffer to used in the stddev and mean calculations
112 */
113 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
114  float returnValue = 0;
115  float newMean = 0;
116  if(num_points > 0){
117  unsigned int number = 0;
118  const unsigned int quarterPoints = num_points / 4;
119 
120  const float* aPtr = inputBuffer;
121  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
122  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
123 
124  __m128 accumulator = _mm_setzero_ps();
125  __m128 squareAccumulator = _mm_setzero_ps();
126  __m128 aVal = _mm_setzero_ps();
127  for(;number < quarterPoints; number++) {
128  aVal = _mm_load_ps(aPtr); // aVal = x
129  accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x
130  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
131  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
132  aPtr += 4;
133  }
134  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
135  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
136  newMean = meanBuffer[0];
137  newMean += meanBuffer[1];
138  newMean += meanBuffer[2];
139  newMean += meanBuffer[3];
140  returnValue = squareBuffer[0];
141  returnValue += squareBuffer[1];
142  returnValue += squareBuffer[2];
143  returnValue += squareBuffer[3];
144 
145  number = quarterPoints * 4;
146  for(;number < num_points; number++){
147  returnValue += (*aPtr) * (*aPtr);
148  newMean += *aPtr++;
149  }
150  newMean /= num_points;
151  returnValue /= num_points;
152  returnValue -= (newMean * newMean);
153  returnValue = sqrtf(returnValue);
154  }
155  *stddev = returnValue;
156  *mean = newMean;
157 }
158 #endif /* LV_HAVE_SSE */
159 
160 #ifdef LV_HAVE_GENERIC
161 /*!
162  \brief Calculates the standard deviation and mean of the input buffer
163  \param stddev The calculated standard deviation
164  \param mean The mean of the input buffer
165  \param inputBuffer The buffer of points to calculate the std deviation for
166  \param num_points The number of values in input buffer to used in the stddev and mean calculations
167 */
168 static inline void volk_32f_stddev_and_mean_32f_x2_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
169  float returnValue = 0;
170  float newMean = 0;
171  if(num_points > 0){
172  const float* aPtr = inputBuffer;
173  unsigned int number = 0;
174 
175  for(number = 0; number < num_points; number++){
176  returnValue += (*aPtr) * (*aPtr);
177  newMean += *aPtr++;
178  }
179  newMean /= num_points;
180  returnValue /= num_points;
181  returnValue -= (newMean * newMean);
182  returnValue = sqrtf(returnValue);
183  }
184  *stddev = returnValue;
185  *mean = newMean;
186 }
187 #endif /* LV_HAVE_GENERIC */
188 
189 
190 
191 
192 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27