GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_8u_x4_conv_k7_r2_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
24 #define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
25 
26 
27 
28 typedef union {
29  unsigned char/*DECISIONTYPE*/ t[64/*NUMSTATES*//8/*DECISIONTYPE_BITSIZE*/];
30  unsigned int w[64/*NUMSTATES*//32];
31  unsigned short s[64/*NUMSTATES*//16];
32  unsigned char c[64/*NUMSTATES*//8];
33 #ifdef _MSC_VER
34 } decision_t;
35 #else
36 } decision_t __attribute__ ((aligned (16)));
37 #endif
38 
39 static inline void renormalize(unsigned char* X, unsigned char threshold){
40  int NUMSTATES = 64;
41  int i;
42 
43  unsigned char min=X[0];
44  //if(min > threshold) {
45  for(i=0;i<NUMSTATES;i++)
46  if (min>X[i])
47  min=X[i];
48  for(i=0;i<NUMSTATES;i++)
49  X[i]-=min;
50  //}
51 }
52 
53 
54 
55 //helper BFLY for GENERIC version
56 static inline void BFLY(int i, int s, unsigned char * syms, unsigned char *Y, unsigned char *X, decision_t * d, unsigned char* Branchtab) {
57  int j, decision0, decision1;
58  unsigned char metric,m0,m1,m2,m3;
59 
60  int NUMSTATES = 64;
61  int RATE = 2;
62  int METRICSHIFT = 1;
63  int PRECISIONSHIFT = 2;
64 
65 
66 
67  metric =0;
68  for(j=0;j<RATE;j++)
69  metric += (Branchtab[i+j*NUMSTATES/2] ^ syms[s*RATE+j])>>METRICSHIFT ;
70  metric=metric>>PRECISIONSHIFT;
71 
72  unsigned char max = ((RATE*((256 -1)>>METRICSHIFT))>>PRECISIONSHIFT);
73 
74  m0 = X[i] + metric;
75  m1 = X[i+NUMSTATES/2] + (max - metric);
76  m2 = X[i] + (max - metric);
77  m3 = X[i+NUMSTATES/2] + metric;
78 
79  decision0 = (signed int)(m0-m1) > 0;
80  decision1 = (signed int)(m2-m3) > 0;
81 
82  Y[2*i] = decision0 ? m1 : m0;
83  Y[2*i+1] = decision1 ? m3 : m2;
84 
85  d->w[i/(sizeof(unsigned int)*8/2)+s*(sizeof(decision_t)/sizeof(unsigned int))] |=
86  (decision0|decision1<<1) << ((2*i)&(sizeof(unsigned int)*8-1));
87 }
88 
89 
90 #if LV_HAVE_SSE3
91 
92 #include <pmmintrin.h>
93 #include <emmintrin.h>
94 #include <xmmintrin.h>
95 #include <mmintrin.h>
96 #include <stdio.h>
97 
98 static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y, unsigned char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, unsigned int excess, unsigned char* Branchtab) {
99  unsigned int i9;
100  for(i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
101  unsigned char a75, a81;
102  int a73, a92;
103  short int s20, s21, s26, s27;
104  unsigned char *a74, *a80, *b6;
105  short int *a110, *a111, *a91, *a93, *a94;
106  __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83
107  , *a95, *a96, *a97, *a98, *a99;
108  __m128i a105, a106, a86, a87;
109  __m128i a100, a101, a103, a104, a107, a108, a109
110  , a76, a78, a79, a82, a84, a85, a88, a89
111  , a90, d10, d11, d12, d9, m23, m24, m25
112  , m26, m27, m28, m29, m30, s18, s19, s22
113  , s23, s24, s25, s28, s29, t13, t14, t15
114  , t16, t17, t18;
115  a71 = ((__m128i *) X);
116  s18 = *(a71);
117  a72 = (a71 + 2);
118  s19 = *(a72);
119  a73 = (4 * i9);
120  a74 = (syms + a73);
121  a75 = *(a74);
122  a76 = _mm_set1_epi8(a75);
123  a77 = ((__m128i *) Branchtab);
124  a78 = *(a77);
125  a79 = _mm_xor_si128(a76, a78);
126  b6 = (a73 + syms);
127  a80 = (b6 + 1);
128  a81 = *(a80);
129  a82 = _mm_set1_epi8(a81);
130  a83 = (a77 + 2);
131  a84 = *(a83);
132  a85 = _mm_xor_si128(a82, a84);
133  t13 = _mm_avg_epu8(a79,a85);
134  a86 = ((__m128i ) t13);
135  a87 = _mm_srli_epi16(a86, 2);
136  a88 = ((__m128i ) a87);
137  t14 = _mm_and_si128(a88, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
138  , 63, 63, 63, 63, 63, 63, 63, 63
139  , 63));
140  t15 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
141  , 63, 63, 63, 63, 63, 63, 63, 63
142  , 63), t14);
143  m23 = _mm_adds_epu8(s18, t14);
144  m24 = _mm_adds_epu8(s19, t15);
145  m25 = _mm_adds_epu8(s18, t15);
146  m26 = _mm_adds_epu8(s19, t14);
147  a89 = _mm_min_epu8(m24, m23);
148  d9 = _mm_cmpeq_epi8(a89, m24);
149  a90 = _mm_min_epu8(m26, m25);
150  d10 = _mm_cmpeq_epi8(a90, m26);
151  s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9,d10));
152  a91 = ((short int *) dec);
153  a92 = (8 * i9);
154  a93 = (a91 + a92);
155  *(a93) = s20;
156  s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9,d10));
157  a94 = (a93 + 1);
158  *(a94) = s21;
159  s22 = _mm_unpacklo_epi8(a89, a90);
160  s23 = _mm_unpackhi_epi8(a89, a90);
161  a95 = ((__m128i *) Y);
162  *(a95) = s22;
163  a96 = (a95 + 1);
164  *(a96) = s23;
165  a97 = (a71 + 1);
166  s24 = *(a97);
167  a98 = (a71 + 3);
168  s25 = *(a98);
169  a99 = (a77 + 1);
170  a100 = *(a99);
171  a101 = _mm_xor_si128(a76, a100);
172  a102 = (a77 + 3);
173  a103 = *(a102);
174  a104 = _mm_xor_si128(a82, a103);
175  t16 = _mm_avg_epu8(a101,a104);
176  a105 = ((__m128i ) t16);
177  a106 = _mm_srli_epi16(a105, 2);
178  a107 = ((__m128i ) a106);
179  t17 = _mm_and_si128(a107, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
180  , 63, 63, 63, 63, 63, 63, 63, 63
181  , 63));
182  t18 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
183  , 63, 63, 63, 63, 63, 63, 63, 63
184  , 63), t17);
185  m27 = _mm_adds_epu8(s24, t17);
186  m28 = _mm_adds_epu8(s25, t18);
187  m29 = _mm_adds_epu8(s24, t18);
188  m30 = _mm_adds_epu8(s25, t17);
189  a108 = _mm_min_epu8(m28, m27);
190  d11 = _mm_cmpeq_epi8(a108, m28);
191  a109 = _mm_min_epu8(m30, m29);
192  d12 = _mm_cmpeq_epi8(a109, m30);
193  s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11,d12));
194  a110 = (a93 + 2);
195  *(a110) = s26;
196  s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11,d12));
197  a111 = (a93 + 3);
198  *(a111) = s27;
199  s28 = _mm_unpacklo_epi8(a108, a109);
200  s29 = _mm_unpackhi_epi8(a108, a109);
201  a112 = (a95 + 2);
202  *(a112) = s28;
203  a113 = (a95 + 3);
204  *(a113) = s29;
205  if ((((unsigned char *) Y)[0]>210)) {
206  __m128i m5, m6;
207  m5 = ((__m128i *) Y)[0];
208  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[1]);
209  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[2]);
210  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[3]);
211  __m128i m7;
212  m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
213  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 32)), ((__m128i ) m7)));
214  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 16)), ((__m128i ) m7)));
215  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 8)), ((__m128i ) m7)));
216  m7 = _mm_unpacklo_epi8(m7, m7);
217  m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
218  m6 = _mm_unpacklo_epi64(m7, m7);
219  ((__m128i *) Y)[0] = _mm_subs_epu8(((__m128i *) Y)[0], m6);
220  ((__m128i *) Y)[1] = _mm_subs_epu8(((__m128i *) Y)[1], m6);
221  ((__m128i *) Y)[2] = _mm_subs_epu8(((__m128i *) Y)[2], m6);
222  ((__m128i *) Y)[3] = _mm_subs_epu8(((__m128i *) Y)[3], m6);
223  }
224  unsigned char a188, a194;
225  int a186, a205;
226  short int s48, s49, s54, s55;
227  unsigned char *a187, *a193, *b15;
228  short int *a204, *a206, *a207, *a223, *a224, *b16;
229  __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210
230  , *a211, *a212, *a215, *a225, *a226;
231  __m128i a199, a200, a218, a219;
232  __m128i a189, a191, a192, a195, a197, a198, a201
233  , a202, a203, a213, a214, a216, a217, a220, a221
234  , a222, d17, d18, d19, d20, m39, m40, m41
235  , m42, m43, m44, m45, m46, s46, s47, s50
236  , s51, s52, s53, s56, s57, t25, t26, t27
237  , t28, t29, t30;
238  a184 = ((__m128i *) Y);
239  s46 = *(a184);
240  a185 = (a184 + 2);
241  s47 = *(a185);
242  a186 = (4 * i9);
243  b15 = (a186 + syms);
244  a187 = (b15 + 2);
245  a188 = *(a187);
246  a189 = _mm_set1_epi8(a188);
247  a190 = ((__m128i *) Branchtab);
248  a191 = *(a190);
249  a192 = _mm_xor_si128(a189, a191);
250  a193 = (b15 + 3);
251  a194 = *(a193);
252  a195 = _mm_set1_epi8(a194);
253  a196 = (a190 + 2);
254  a197 = *(a196);
255  a198 = _mm_xor_si128(a195, a197);
256  t25 = _mm_avg_epu8(a192,a198);
257  a199 = ((__m128i ) t25);
258  a200 = _mm_srli_epi16(a199, 2);
259  a201 = ((__m128i ) a200);
260  t26 = _mm_and_si128(a201, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
261  , 63, 63, 63, 63, 63, 63, 63, 63
262  , 63));
263  t27 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
264  , 63, 63, 63, 63, 63, 63, 63, 63
265  , 63), t26);
266  m39 = _mm_adds_epu8(s46, t26);
267  m40 = _mm_adds_epu8(s47, t27);
268  m41 = _mm_adds_epu8(s46, t27);
269  m42 = _mm_adds_epu8(s47, t26);
270  a202 = _mm_min_epu8(m40, m39);
271  d17 = _mm_cmpeq_epi8(a202, m40);
272  a203 = _mm_min_epu8(m42, m41);
273  d18 = _mm_cmpeq_epi8(a203, m42);
274  s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17,d18));
275  a204 = ((short int *) dec);
276  a205 = (8 * i9);
277  b16 = (a204 + a205);
278  a206 = (b16 + 4);
279  *(a206) = s48;
280  s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17,d18));
281  a207 = (b16 + 5);
282  *(a207) = s49;
283  s50 = _mm_unpacklo_epi8(a202, a203);
284  s51 = _mm_unpackhi_epi8(a202, a203);
285  a208 = ((__m128i *) X);
286  *(a208) = s50;
287  a209 = (a208 + 1);
288  *(a209) = s51;
289  a210 = (a184 + 1);
290  s52 = *(a210);
291  a211 = (a184 + 3);
292  s53 = *(a211);
293  a212 = (a190 + 1);
294  a213 = *(a212);
295  a214 = _mm_xor_si128(a189, a213);
296  a215 = (a190 + 3);
297  a216 = *(a215);
298  a217 = _mm_xor_si128(a195, a216);
299  t28 = _mm_avg_epu8(a214,a217);
300  a218 = ((__m128i ) t28);
301  a219 = _mm_srli_epi16(a218, 2);
302  a220 = ((__m128i ) a219);
303  t29 = _mm_and_si128(a220, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
304  , 63, 63, 63, 63, 63, 63, 63, 63
305  , 63));
306  t30 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
307  , 63, 63, 63, 63, 63, 63, 63, 63
308  , 63), t29);
309  m43 = _mm_adds_epu8(s52, t29);
310  m44 = _mm_adds_epu8(s53, t30);
311  m45 = _mm_adds_epu8(s52, t30);
312  m46 = _mm_adds_epu8(s53, t29);
313  a221 = _mm_min_epu8(m44, m43);
314  d19 = _mm_cmpeq_epi8(a221, m44);
315  a222 = _mm_min_epu8(m46, m45);
316  d20 = _mm_cmpeq_epi8(a222, m46);
317  s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19,d20));
318  a223 = (b16 + 6);
319  *(a223) = s54;
320  s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19,d20));
321  a224 = (b16 + 7);
322  *(a224) = s55;
323  s56 = _mm_unpacklo_epi8(a221, a222);
324  s57 = _mm_unpackhi_epi8(a221, a222);
325  a225 = (a208 + 2);
326  *(a225) = s56;
327  a226 = (a208 + 3);
328  *(a226) = s57;
329  if ((((unsigned char *) X)[0]>210)) {
330  __m128i m12, m13;
331  m12 = ((__m128i *) X)[0];
332  m12 = _mm_min_epu8(m12, ((__m128i *) X)[1]);
333  m12 = _mm_min_epu8(m12, ((__m128i *) X)[2]);
334  m12 = _mm_min_epu8(m12, ((__m128i *) X)[3]);
335  __m128i m14;
336  m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
337  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 32)), ((__m128i ) m14)));
338  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 16)), ((__m128i ) m14)));
339  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 8)), ((__m128i ) m14)));
340  m14 = _mm_unpacklo_epi8(m14, m14);
341  m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
342  m13 = _mm_unpacklo_epi64(m14, m14);
343  ((__m128i *) X)[0] = _mm_subs_epu8(((__m128i *) X)[0], m13);
344  ((__m128i *) X)[1] = _mm_subs_epu8(((__m128i *) X)[1], m13);
345  ((__m128i *) X)[2] = _mm_subs_epu8(((__m128i *) X)[2], m13);
346  ((__m128i *) X)[3] = _mm_subs_epu8(((__m128i *) X)[3], m13);
347  }
348  }
349 
350  renormalize(X, 210);
351 
352  /*int ch;
353  for(ch = 0; ch < 64; ch++) {
354  printf("%d,", X[ch]);
355  }
356  printf("\n");*/
357 
358  unsigned int j;
359  for(j=0; j < (framebits + excess) % 2; ++j) {
360  int i;
361  for(i=0;i<64/2;i++){
362  BFLY(i, (((framebits+excess) >> 1) << 1) + j , syms, Y, X, (decision_t *)dec, Branchtab);
363  }
364 
365 
366  renormalize(Y, 210);
367 
368  /*printf("\n");
369  for(ch = 0; ch < 64; ch++) {
370  printf("%d,", Y[ch]);
371  }
372  printf("\n");*/
373 
374  }
375  /*skip*/
376  return;
377 }
378 
379 #endif /*LV_HAVE_SSE3*/
380 
381 
382 
383 
384 
385 
386 
387 #if LV_HAVE_GENERIC
388 
389 
390 static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y, unsigned char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, unsigned int excess, unsigned char* Branchtab) {
391  int nbits = framebits + excess;
392  int NUMSTATES = 64;
393  int RENORMALIZE_THRESHOLD = 210;
394 
395 
396  int s,i;
397 
398 
399 
400  for (s=0;s<nbits;s++){
401  void *tmp;
402  for(i=0;i<NUMSTATES/2;i++){
403  BFLY(i, s, syms, Y, X, (decision_t *)dec, Branchtab);
404  }
405 
406 
407 
408  renormalize(Y, RENORMALIZE_THRESHOLD);
409 
410  /// Swap pointers to old and new metrics
411  tmp = (void *)X;
412  X = Y;
413  Y = (unsigned char*)tmp;
414  }
415 
416 
417  return;
418 }
419 
420 #endif /* LV_HAVE_GENERIC */
421 
422 #endif /*INCLUDED_volk_8u_x4_conv_k7_r2_8u_H*/
float min(float a, float b)
unsigned int * w
Definition: cc_common.h:36
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:39
Definition: cc_common.h:33
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:56