GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_16i_max_star_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
24 #define INCLUDED_volk_16i_max_star_16i_a_H
25 
26 
27 #include<inttypes.h>
28 #include<stdio.h>
29 
30 
31 #ifdef LV_HAVE_SSSE3
32 
33 #include<xmmintrin.h>
34 #include<emmintrin.h>
35 #include<tmmintrin.h>
36 
37 static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_points) {
38 
39  const unsigned int num_bytes = num_points*2;
40 
41  short candidate = src0[0];
42  short cands[8];
43  __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
44 
45 
46  __m128i *p_src0;
47 
48  p_src0 = (__m128i*)src0;
49 
50  int bound = num_bytes >> 4;
51  int leftovers = (num_bytes >> 1) & 7;
52 
53  int i = 0;
54 
55 
56  xmm1 = _mm_setzero_si128();
57  xmm0 = _mm_setzero_si128();
58  //_mm_insert_epi16(xmm0, candidate, 0);
59 
60  xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
61 
62 
63  for(i = 0; i < bound; ++i) {
64  xmm1 = _mm_load_si128(p_src0);
65  p_src0 += 1;
66  //xmm2 = _mm_sub_epi16(xmm1, xmm0);
67 
68 
69 
70 
71 
72 
73  xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
74  xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
75  xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
76 
77  xmm6 = _mm_xor_si128(xmm4, xmm5);
78 
79  xmm3 = _mm_and_si128(xmm3, xmm0);
80  xmm4 = _mm_and_si128(xmm6, xmm1);
81 
82  xmm0 = _mm_add_epi16(xmm3, xmm4);
83 
84 
85  }
86 
87  _mm_store_si128((__m128i*)cands, xmm0);
88 
89  for(i = 0; i < 8; ++i) {
90  candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
91  }
92 
93 
94 
95  for(i = 0; i < leftovers; ++i) {
96 
97  candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
98  }
99 
100  target[0] = candidate;
101 
102 
103 
104 
105 
106 }
107 
108 #endif /*LV_HAVE_SSSE3*/
109 
110 #ifdef LV_HAVE_NEON
111 #include <arm_neon.h>
112 static inline void volk_16i_max_star_16i_neon(short* target, short* src0, unsigned int num_points) {
113  const unsigned int eighth_points = num_points / 8;
114  unsigned number;
115  int16x8_t input_vec;
116  int16x8_t diff, zeros;
117  uint16x8_t comp1, comp2;
118  zeros = veorq_s16(zeros, zeros);
119 
120  int16x8x2_t tmpvec;
121 
122  int16x8_t candidate_vec = vld1q_dup_s16(src0 );
123  short candidate;
124  ++src0;
125 
126  for(number=0; number < eighth_points; ++number) {
127  input_vec = vld1q_s16(src0);
128  __builtin_prefetch(src0+16);
129  diff = vsubq_s16(candidate_vec, input_vec);
130  comp1 = vcgeq_s16(diff, zeros);
131  comp2 = vcltq_s16(diff, zeros);
132 
133  tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
134  tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
135 
136  candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
137  src0 += 8;
138  }
139  vst1q_s16(&candidate, candidate_vec);
140 
141  for(number=0; number < num_points%8; number++) {
142  candidate = ((int16_t)(candidate - src0[number]) > 0) ? candidate : src0[number];
143  }
144  target[0] = candidate;
145 }
146 #endif /*LV_HAVE_NEON*/
147 
148 #ifdef LV_HAVE_GENERIC
149 
150 static inline void volk_16i_max_star_16i_generic(short* target, short* src0, unsigned int num_points) {
151 
152  const unsigned int num_bytes = num_points*2;
153 
154  int i = 0;
155 
156  int bound = num_bytes >> 1;
157 
158  short candidate = src0[0];
159  for(i = 1; i < bound; ++i) {
160  candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
161  }
162  target[0] = candidate;
163 
164 }
165 
166 
167 #endif /*LV_HAVE_GENERIC*/
168 
169 
170 #endif /*INCLUDED_volk_16i_max_star_16i_a_H*/
signed short int16_t
Definition: stdint.h:76