GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_32f_index_max_16u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
24 #define INCLUDED_volk_32f_index_max_16u_a_H
25 
26 #include <volk/volk_common.h>
27 #include <volk/volk_common.h>
28 #include <inttypes.h>
29 #include <stdio.h>
30 
31 #ifdef LV_HAVE_SSE4_1
32 #include<smmintrin.h>
33 
34 static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
35  if(num_points > 0){
36  unsigned int number = 0;
37  const unsigned int quarterPoints = num_points / 4;
38 
39  float* inputPtr = (float*)src0;
40 
41  __m128 indexIncrementValues = _mm_set1_ps(4);
42  __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
43 
44  float max = src0[0];
45  float index = 0;
46  __m128 maxValues = _mm_set1_ps(max);
47  __m128 maxValuesIndex = _mm_setzero_ps();
48  __m128 compareResults;
49  __m128 currentValues;
50 
51  __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
52  __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
53 
54  for(;number < quarterPoints; number++){
55 
56  currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
57  currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
58 
59  compareResults = _mm_cmpgt_ps(maxValues, currentValues);
60 
61  maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
62  maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
63  }
64 
65  // Calculate the largest value from the remaining 4 points
66  _mm_store_ps(maxValuesBuffer, maxValues);
67  _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
68 
69  for(number = 0; number < 4; number++){
70  if(maxValuesBuffer[number] > max){
71  index = maxIndexesBuffer[number];
72  max = maxValuesBuffer[number];
73  }
74  }
75 
76  number = quarterPoints * 4;
77  for(;number < num_points; number++){
78  if(src0[number] > max){
79  index = number;
80  max = src0[number];
81  }
82  }
83  target[0] = (unsigned int)index;
84  }
85 }
86 
87 #endif /*LV_HAVE_SSE4_1*/
88 
89 #ifdef LV_HAVE_SSE
90 #include<xmmintrin.h>
91 
92 static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
93  if(num_points > 0){
94  unsigned int number = 0;
95  const unsigned int quarterPoints = num_points / 4;
96 
97  float* inputPtr = (float*)src0;
98 
99  __m128 indexIncrementValues = _mm_set1_ps(4);
100  __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
101 
102  float max = src0[0];
103  float index = 0;
104  __m128 maxValues = _mm_set1_ps(max);
105  __m128 maxValuesIndex = _mm_setzero_ps();
106  __m128 compareResults;
107  __m128 currentValues;
108 
109  __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
110  __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
111 
112  for(;number < quarterPoints; number++){
113 
114  currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
115  currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
116 
117  compareResults = _mm_cmpgt_ps(maxValues, currentValues);
118 
119  maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
120 
121  maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
122  }
123 
124  // Calculate the largest value from the remaining 4 points
125  _mm_store_ps(maxValuesBuffer, maxValues);
126  _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
127 
128  for(number = 0; number < 4; number++){
129  if(maxValuesBuffer[number] > max){
130  index = maxIndexesBuffer[number];
131  max = maxValuesBuffer[number];
132  }
133  }
134 
135  number = quarterPoints * 4;
136  for(;number < num_points; number++){
137  if(src0[number] > max){
138  index = number;
139  max = src0[number];
140  }
141  }
142  target[0] = (unsigned int)index;
143  }
144 }
145 
146 #endif /*LV_HAVE_SSE*/
147 
148 #ifdef LV_HAVE_GENERIC
149 static inline void volk_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
150  if(num_points > 0){
151  float max = src0[0];
152  unsigned int index = 0;
153 
154  unsigned int i = 1;
155 
156  for(; i < num_points; ++i) {
157 
158  if(src0[i] > max){
159  index = i;
160  max = src0[i];
161  }
162 
163  }
164  target[0] = index;
165  }
166 }
167 
168 #endif /*LV_HAVE_GENERIC*/
169 
170 
171 #endif /*INCLUDED_volk_32f_index_max_16u_a_H*/
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27