23 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
24 #define INCLUDED_volk_32f_index_max_16u_a_H
34 static inline void volk_32f_index_max_16u_a_sse4_1(
unsigned int* target,
const float* src0,
unsigned int num_points) {
36 unsigned int number = 0;
37 const unsigned int quarterPoints = num_points / 4;
39 float* inputPtr = (
float*)src0;
41 __m128 indexIncrementValues = _mm_set1_ps(4);
42 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
46 __m128 maxValues = _mm_set1_ps(max);
47 __m128 maxValuesIndex = _mm_setzero_ps();
48 __m128 compareResults;
54 for(;number < quarterPoints; number++){
56 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
57 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
59 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
61 maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
62 maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
66 _mm_store_ps(maxValuesBuffer, maxValues);
67 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
69 for(number = 0; number < 4; number++){
70 if(maxValuesBuffer[number] > max){
71 index = maxIndexesBuffer[number];
72 max = maxValuesBuffer[number];
76 number = quarterPoints * 4;
77 for(;number < num_points; number++){
78 if(src0[number] > max){
83 target[0] = (
unsigned int)index;
92 static inline void volk_32f_index_max_16u_a_sse(
unsigned int* target,
const float* src0,
unsigned int num_points) {
94 unsigned int number = 0;
95 const unsigned int quarterPoints = num_points / 4;
97 float* inputPtr = (
float*)src0;
99 __m128 indexIncrementValues = _mm_set1_ps(4);
100 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
104 __m128 maxValues = _mm_set1_ps(max);
105 __m128 maxValuesIndex = _mm_setzero_ps();
106 __m128 compareResults;
107 __m128 currentValues;
112 for(;number < quarterPoints; number++){
114 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
115 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
117 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
119 maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
121 maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
125 _mm_store_ps(maxValuesBuffer, maxValues);
126 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
128 for(number = 0; number < 4; number++){
129 if(maxValuesBuffer[number] > max){
130 index = maxIndexesBuffer[number];
131 max = maxValuesBuffer[number];
135 number = quarterPoints * 4;
136 for(;number < num_points; number++){
137 if(src0[number] > max){
142 target[0] = (
unsigned int)index;
148 #ifdef LV_HAVE_GENERIC
149 static inline void volk_32f_index_max_16u_generic(
unsigned int* target,
const float* src0,
unsigned int num_points) {
152 unsigned int index = 0;
156 for(; i < num_points; ++i) {
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27