GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_16i_branch_4_state_8.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_16i_branch_4_state_8_a_H
24 #define INCLUDED_volk_16i_branch_4_state_8_a_H
25 
26 
27 #include<inttypes.h>
28 #include<stdio.h>
29 
30 
31 
32 
33 #ifdef LV_HAVE_SSSE3
34 
35 #include<xmmintrin.h>
36 #include<emmintrin.h>
37 #include<tmmintrin.h>
38 
39 static inline void volk_16i_branch_4_state_8_a_ssse3(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
40 
41 
42  __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11;
43 
44  __m128i *p_target, *p_src0, *p_cntl2, *p_cntl3, *p_scalars;
45 
46 
47 
48  p_target = (__m128i*)target;
49  p_src0 = (__m128i*)src0;
50  p_cntl2 = (__m128i*)cntl2;
51  p_cntl3 = (__m128i*)cntl3;
52  p_scalars = (__m128i*)scalars;
53 
54  int i = 0;
55 
56  int bound = 1;
57 
58 
59  xmm0 = _mm_load_si128(p_scalars);
60 
61  xmm1 = _mm_shufflelo_epi16(xmm0, 0);
62  xmm2 = _mm_shufflelo_epi16(xmm0, 0x55);
63  xmm3 = _mm_shufflelo_epi16(xmm0, 0xaa);
64  xmm4 = _mm_shufflelo_epi16(xmm0, 0xff);
65 
66  xmm1 = _mm_shuffle_epi32(xmm1, 0x00);
67  xmm2 = _mm_shuffle_epi32(xmm2, 0x00);
68  xmm3 = _mm_shuffle_epi32(xmm3, 0x00);
69  xmm4 = _mm_shuffle_epi32(xmm4, 0x00);
70 
71  xmm0 = _mm_load_si128((__m128i*)permuters[0]);
72  xmm6 = _mm_load_si128((__m128i*)permuters[1]);
73  xmm8 = _mm_load_si128((__m128i*)permuters[2]);
74  xmm10 = _mm_load_si128((__m128i*)permuters[3]);
75 
76  for(; i < bound; ++i) {
77 
78  xmm5 = _mm_load_si128(p_src0);
79 
80 
81 
82 
83 
84 
85 
86 
87 
88  xmm0 = _mm_shuffle_epi8(xmm5, xmm0);
89  xmm6 = _mm_shuffle_epi8(xmm5, xmm6);
90  xmm8 = _mm_shuffle_epi8(xmm5, xmm8);
91  xmm10 = _mm_shuffle_epi8(xmm5, xmm10);
92 
93  p_src0 += 4;
94 
95 
96  xmm5 = _mm_add_epi16(xmm1, xmm2);
97 
98  xmm6 = _mm_add_epi16(xmm2, xmm6);
99  xmm8 = _mm_add_epi16(xmm1, xmm8);
100 
101 
102  xmm7 = _mm_load_si128(p_cntl2);
103  xmm9 = _mm_load_si128(p_cntl3);
104 
105  xmm0 = _mm_add_epi16(xmm5, xmm0);
106 
107 
108  xmm7 = _mm_and_si128(xmm7, xmm3);
109  xmm9 = _mm_and_si128(xmm9, xmm4);
110 
111  xmm5 = _mm_load_si128(&p_cntl2[1]);
112  xmm11 = _mm_load_si128(&p_cntl3[1]);
113 
114  xmm7 = _mm_add_epi16(xmm7, xmm9);
115 
116  xmm5 = _mm_and_si128(xmm5, xmm3);
117  xmm11 = _mm_and_si128(xmm11, xmm4);
118 
119  xmm0 = _mm_add_epi16(xmm0, xmm7);
120 
121 
122 
123  xmm7 = _mm_load_si128(&p_cntl2[2]);
124  xmm9 = _mm_load_si128(&p_cntl3[2]);
125 
126  xmm5 = _mm_add_epi16(xmm5, xmm11);
127 
128  xmm7 = _mm_and_si128(xmm7, xmm3);
129  xmm9 = _mm_and_si128(xmm9, xmm4);
130 
131  xmm6 = _mm_add_epi16(xmm6, xmm5);
132 
133 
134  xmm5 = _mm_load_si128(&p_cntl2[3]);
135  xmm11 = _mm_load_si128(&p_cntl3[3]);
136 
137  xmm7 = _mm_add_epi16(xmm7, xmm9);
138 
139  xmm5 = _mm_and_si128(xmm5, xmm3);
140  xmm11 = _mm_and_si128(xmm11, xmm4);
141 
142  xmm8 = _mm_add_epi16(xmm8, xmm7);
143 
144  xmm5 = _mm_add_epi16(xmm5, xmm11);
145 
146  _mm_store_si128(p_target, xmm0);
147  _mm_store_si128(&p_target[1], xmm6);
148 
149  xmm10 = _mm_add_epi16(xmm5, xmm10);
150 
151  _mm_store_si128(&p_target[2], xmm8);
152 
153  _mm_store_si128(&p_target[3], xmm10);
154 
155  p_target += 3;
156  }
157 }
158 
159 
160 #endif /*LV_HAVE_SSEs*/
161 
162 #ifdef LV_HAVE_GENERIC
163 static inline void volk_16i_branch_4_state_8_generic(short* target, short* src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
164  int i = 0;
165 
166  int bound = 4;
167 
168  for(; i < bound; ++i) {
169  target[i* 8] = src0[((char)permuters[i][0])/2]
170  + ((i + 1)%2 * scalars[0])
171  + (((i >> 1)^1) * scalars[1])
172  + (cntl2[i * 8] & scalars[2])
173  + (cntl3[i * 8] & scalars[3]);
174  target[i* 8 + 1] = src0[((char)permuters[i][1 * 2])/2]
175  + ((i + 1)%2 * scalars[0])
176  + (((i >> 1)^1) * scalars[1])
177  + (cntl2[i * 8 + 1] & scalars[2])
178  + (cntl3[i * 8 + 1] & scalars[3]);
179  target[i* 8 + 2] = src0[((char)permuters[i][2 * 2])/2]
180  + ((i + 1)%2 * scalars[0])
181  + (((i >> 1)^1) * scalars[1])
182  + (cntl2[i * 8 + 2] & scalars[2])
183  + (cntl3[i * 8 + 2] & scalars[3]);
184  target[i* 8 + 3] = src0[((char)permuters[i][3 * 2])/2]
185  + ((i + 1)%2 * scalars[0])
186  + (((i >> 1)^1) * scalars[1])
187  + (cntl2[i * 8 + 3] & scalars[2])
188  + (cntl3[i * 8 + 3] & scalars[3]);
189  target[i* 8 + 4] = src0[((char)permuters[i][4 * 2])/2]
190  + ((i + 1)%2 * scalars[0])
191  + (((i >> 1)^1) * scalars[1])
192  + (cntl2[i * 8 + 4] & scalars[2])
193  + (cntl3[i * 8 + 4] & scalars[3]);
194  target[i* 8 + 5] = src0[((char)permuters[i][5 * 2])/2]
195  + ((i + 1)%2 * scalars[0])
196  + (((i >> 1)^1) * scalars[1])
197  + (cntl2[i * 8 + 5] & scalars[2])
198  + (cntl3[i * 8 + 5] & scalars[3]);
199  target[i* 8 + 6] = src0[((char)permuters[i][6 * 2])/2]
200  + ((i + 1)%2 * scalars[0])
201  + (((i >> 1)^1) * scalars[1])
202  + (cntl2[i * 8 + 6] & scalars[2])
203  + (cntl3[i * 8 + 6] & scalars[3]);
204  target[i* 8 + 7] = src0[((char)permuters[i][7 * 2])/2]
205  + ((i + 1)%2 * scalars[0])
206  + (((i >> 1)^1) * scalars[1])
207  + (cntl2[i * 8 + 7] & scalars[2])
208  + (cntl3[i * 8 + 7] & scalars[3]);
209 
210  }
211 }
212 
213 #endif /*LV_HAVE_GENERIC*/
214 
215 
216 #endif /*INCLUDED_volk_16i_branch_4_state_8_a_H*/