GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_s32f_stddev_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
24 #define INCLUDED_volk_32f_s32f_stddev_32f_a_H
25 
26 #include <volk/volk_common.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <math.h>
30 
31 #ifdef LV_HAVE_SSE4_1
32 #include <smmintrin.h>
33 /*!
34  \brief Calculates the standard deviation of the input buffer using the supplied mean
35  \param stddev The calculated standard deviation
36  \param inputBuffer The buffer of points to calculate the std deviation for
37  \param mean The mean of the input buffer
38  \param num_points The number of values in input buffer to used in the stddev calculation
39 */
40 static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
41  float returnValue = 0;
42  if(num_points > 0){
43  unsigned int number = 0;
44  const unsigned int sixteenthPoints = num_points / 16;
45 
46  const float* aPtr = inputBuffer;
47 
48  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
49 
50  __m128 squareAccumulator = _mm_setzero_ps();
51  __m128 aVal1, aVal2, aVal3, aVal4;
52  __m128 cVal1, cVal2, cVal3, cVal4;
53  for(;number < sixteenthPoints; number++) {
54  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
55  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
56 
57  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
58  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
59 
60  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
61  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
62 
63  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
64  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
65 
66  cVal1 = _mm_or_ps(cVal1, cVal2);
67  cVal3 = _mm_or_ps(cVal3, cVal4);
68  cVal1 = _mm_or_ps(cVal1, cVal3);
69 
70  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
71  }
72  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
73  returnValue = squareBuffer[0];
74  returnValue += squareBuffer[1];
75  returnValue += squareBuffer[2];
76  returnValue += squareBuffer[3];
77 
78  number = sixteenthPoints * 16;
79  for(;number < num_points; number++){
80  returnValue += (*aPtr) * (*aPtr);
81  aPtr++;
82  }
83  returnValue /= num_points;
84  returnValue -= (mean * mean);
85  returnValue = sqrtf(returnValue);
86  }
87  *stddev = returnValue;
88 }
89 #endif /* LV_HAVE_SSE4_1 */
90 
91 #ifdef LV_HAVE_SSE
92 #include <xmmintrin.h>
93 /*!
94  \brief Calculates the standard deviation of the input buffer using the supplied mean
95  \param stddev The calculated standard deviation
96  \param inputBuffer The buffer of points to calculate the std deviation for
97  \param mean The mean of the input buffer
98  \param num_points The number of values in input buffer to used in the stddev calculation
99 */
100 static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
101  float returnValue = 0;
102  if(num_points > 0){
103  unsigned int number = 0;
104  const unsigned int quarterPoints = num_points / 4;
105 
106  const float* aPtr = inputBuffer;
107 
108  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
109 
110  __m128 squareAccumulator = _mm_setzero_ps();
111  __m128 aVal = _mm_setzero_ps();
112  for(;number < quarterPoints; number++) {
113  aVal = _mm_load_ps(aPtr); // aVal = x
114  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
115  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
116  aPtr += 4;
117  }
118  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
119  returnValue = squareBuffer[0];
120  returnValue += squareBuffer[1];
121  returnValue += squareBuffer[2];
122  returnValue += squareBuffer[3];
123 
124  number = quarterPoints * 4;
125  for(;number < num_points; number++){
126  returnValue += (*aPtr) * (*aPtr);
127  aPtr++;
128  }
129  returnValue /= num_points;
130  returnValue -= (mean * mean);
131  returnValue = sqrtf(returnValue);
132  }
133  *stddev = returnValue;
134 }
135 #endif /* LV_HAVE_SSE */
136 
137 #ifdef LV_HAVE_GENERIC
138 /*!
139  \brief Calculates the standard deviation of the input buffer using the supplied mean
140  \param stddev The calculated standard deviation
141  \param inputBuffer The buffer of points to calculate the std deviation for
142  \param mean The mean of the input buffer
143  \param num_points The number of values in input buffer to used in the stddev calculation
144 */
145 static inline void volk_32f_s32f_stddev_32f_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
146  float returnValue = 0;
147  if(num_points > 0){
148  const float* aPtr = inputBuffer;
149  unsigned int number = 0;
150 
151  for(number = 0; number < num_points; number++){
152  returnValue += (*aPtr) * (*aPtr);
153  aPtr++;
154  }
155 
156  returnValue /= num_points;
157  returnValue -= (mean * mean);
158  returnValue = sqrtf(returnValue);
159  }
160  *stddev = returnValue;
161 }
162 #endif /* LV_HAVE_GENERIC */
163 
164 
165 
166 
167 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27