GNU Radio Manual and C++ API Reference  3.7.4
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_8u_x4_conv_k7_r2_8u.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
2 #define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
3 
4 
5 
6 typedef union {
7  unsigned char/*DECISIONTYPE*/ t[64/*NUMSTATES*//8/*DECISIONTYPE_BITSIZE*/];
8  unsigned int w[64/*NUMSTATES*//32];
9  unsigned short s[64/*NUMSTATES*//16];
10  unsigned char c[64/*NUMSTATES*//8];
11 #ifdef _MSC_VER
12 } decision_t;
13 #else
14 } decision_t __attribute__ ((aligned (16)));
15 #endif
16 
17 static inline void renormalize(unsigned char* X, unsigned char threshold){
18  int NUMSTATES = 64;
19  int i;
20 
21  unsigned char min=X[0];
22  //if(min > threshold) {
23  for(i=0;i<NUMSTATES;i++)
24  if (min>X[i])
25  min=X[i];
26  for(i=0;i<NUMSTATES;i++)
27  X[i]-=min;
28  //}
29 }
30 
31 
32 
33 //helper BFLY for GENERIC version
34 static inline void BFLY(int i, int s, unsigned char * syms, unsigned char *Y, unsigned char *X, decision_t * d, unsigned char* Branchtab) {
35  int j, decision0, decision1;
36  unsigned char metric,m0,m1,m2,m3;
37 
38  int NUMSTATES = 64;
39  int RATE = 2;
40  int METRICSHIFT = 1;
41  int PRECISIONSHIFT = 2;
42 
43 
44 
45  metric =0;
46  for(j=0;j<RATE;j++)
47  metric += (Branchtab[i+j*NUMSTATES/2] ^ syms[s*RATE+j])>>METRICSHIFT ;
48  metric=metric>>PRECISIONSHIFT;
49 
50  unsigned char max = ((RATE*((256 -1)>>METRICSHIFT))>>PRECISIONSHIFT);
51 
52  m0 = X[i] + metric;
53  m1 = X[i+NUMSTATES/2] + (max - metric);
54  m2 = X[i] + (max - metric);
55  m3 = X[i+NUMSTATES/2] + metric;
56 
57  decision0 = (signed int)(m0-m1) > 0;
58  decision1 = (signed int)(m2-m3) > 0;
59 
60  Y[2*i] = decision0 ? m1 : m0;
61  Y[2*i+1] = decision1 ? m3 : m2;
62 
63  d->w[i/(sizeof(unsigned int)*8/2)+s*(sizeof(decision_t)/sizeof(unsigned int))] |=
64  (decision0|decision1<<1) << ((2*i)&(sizeof(unsigned int)*8-1));
65 }
66 
67 
68 #if LV_HAVE_SSE3
69 
70 #include <pmmintrin.h>
71 #include <emmintrin.h>
72 #include <xmmintrin.h>
73 #include <mmintrin.h>
74 #include <stdio.h>
75 
76 static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y, unsigned char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, unsigned int excess, unsigned char* Branchtab) {
77  unsigned int i9;
78  for(i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
79  unsigned char a75, a81;
80  int a73, a92;
81  short int s20, s21, s26, s27;
82  unsigned char *a74, *a80, *b6;
83  short int *a110, *a111, *a91, *a93, *a94;
84  __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83
85  , *a95, *a96, *a97, *a98, *a99;
86  __m128i a105, a106, a86, a87;
87  __m128i a100, a101, a103, a104, a107, a108, a109
88  , a76, a78, a79, a82, a84, a85, a88, a89
89  , a90, d10, d11, d12, d9, m23, m24, m25
90  , m26, m27, m28, m29, m30, s18, s19, s22
91  , s23, s24, s25, s28, s29, t13, t14, t15
92  , t16, t17, t18;
93  a71 = ((__m128i *) X);
94  s18 = *(a71);
95  a72 = (a71 + 2);
96  s19 = *(a72);
97  a73 = (4 * i9);
98  a74 = (syms + a73);
99  a75 = *(a74);
100  a76 = _mm_set1_epi8(a75);
101  a77 = ((__m128i *) Branchtab);
102  a78 = *(a77);
103  a79 = _mm_xor_si128(a76, a78);
104  b6 = (a73 + syms);
105  a80 = (b6 + 1);
106  a81 = *(a80);
107  a82 = _mm_set1_epi8(a81);
108  a83 = (a77 + 2);
109  a84 = *(a83);
110  a85 = _mm_xor_si128(a82, a84);
111  t13 = _mm_avg_epu8(a79,a85);
112  a86 = ((__m128i ) t13);
113  a87 = _mm_srli_epi16(a86, 2);
114  a88 = ((__m128i ) a87);
115  t14 = _mm_and_si128(a88, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
116  , 63, 63, 63, 63, 63, 63, 63, 63
117  , 63));
118  t15 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
119  , 63, 63, 63, 63, 63, 63, 63, 63
120  , 63), t14);
121  m23 = _mm_adds_epu8(s18, t14);
122  m24 = _mm_adds_epu8(s19, t15);
123  m25 = _mm_adds_epu8(s18, t15);
124  m26 = _mm_adds_epu8(s19, t14);
125  a89 = _mm_min_epu8(m24, m23);
126  d9 = _mm_cmpeq_epi8(a89, m24);
127  a90 = _mm_min_epu8(m26, m25);
128  d10 = _mm_cmpeq_epi8(a90, m26);
129  s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9,d10));
130  a91 = ((short int *) dec);
131  a92 = (8 * i9);
132  a93 = (a91 + a92);
133  *(a93) = s20;
134  s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9,d10));
135  a94 = (a93 + 1);
136  *(a94) = s21;
137  s22 = _mm_unpacklo_epi8(a89, a90);
138  s23 = _mm_unpackhi_epi8(a89, a90);
139  a95 = ((__m128i *) Y);
140  *(a95) = s22;
141  a96 = (a95 + 1);
142  *(a96) = s23;
143  a97 = (a71 + 1);
144  s24 = *(a97);
145  a98 = (a71 + 3);
146  s25 = *(a98);
147  a99 = (a77 + 1);
148  a100 = *(a99);
149  a101 = _mm_xor_si128(a76, a100);
150  a102 = (a77 + 3);
151  a103 = *(a102);
152  a104 = _mm_xor_si128(a82, a103);
153  t16 = _mm_avg_epu8(a101,a104);
154  a105 = ((__m128i ) t16);
155  a106 = _mm_srli_epi16(a105, 2);
156  a107 = ((__m128i ) a106);
157  t17 = _mm_and_si128(a107, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
158  , 63, 63, 63, 63, 63, 63, 63, 63
159  , 63));
160  t18 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
161  , 63, 63, 63, 63, 63, 63, 63, 63
162  , 63), t17);
163  m27 = _mm_adds_epu8(s24, t17);
164  m28 = _mm_adds_epu8(s25, t18);
165  m29 = _mm_adds_epu8(s24, t18);
166  m30 = _mm_adds_epu8(s25, t17);
167  a108 = _mm_min_epu8(m28, m27);
168  d11 = _mm_cmpeq_epi8(a108, m28);
169  a109 = _mm_min_epu8(m30, m29);
170  d12 = _mm_cmpeq_epi8(a109, m30);
171  s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11,d12));
172  a110 = (a93 + 2);
173  *(a110) = s26;
174  s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11,d12));
175  a111 = (a93 + 3);
176  *(a111) = s27;
177  s28 = _mm_unpacklo_epi8(a108, a109);
178  s29 = _mm_unpackhi_epi8(a108, a109);
179  a112 = (a95 + 2);
180  *(a112) = s28;
181  a113 = (a95 + 3);
182  *(a113) = s29;
183  if ((((unsigned char *) Y)[0]>210)) {
184  __m128i m5, m6;
185  m5 = ((__m128i *) Y)[0];
186  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[1]);
187  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[2]);
188  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[3]);
189  __m128i m7;
190  m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
191  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 32)), ((__m128i ) m7)));
192  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 16)), ((__m128i ) m7)));
193  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 8)), ((__m128i ) m7)));
194  m7 = _mm_unpacklo_epi8(m7, m7);
195  m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
196  m6 = _mm_unpacklo_epi64(m7, m7);
197  ((__m128i *) Y)[0] = _mm_subs_epu8(((__m128i *) Y)[0], m6);
198  ((__m128i *) Y)[1] = _mm_subs_epu8(((__m128i *) Y)[1], m6);
199  ((__m128i *) Y)[2] = _mm_subs_epu8(((__m128i *) Y)[2], m6);
200  ((__m128i *) Y)[3] = _mm_subs_epu8(((__m128i *) Y)[3], m6);
201  }
202  unsigned char a188, a194;
203  int a186, a205;
204  short int s48, s49, s54, s55;
205  unsigned char *a187, *a193, *b15;
206  short int *a204, *a206, *a207, *a223, *a224, *b16;
207  __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210
208  , *a211, *a212, *a215, *a225, *a226;
209  __m128i a199, a200, a218, a219;
210  __m128i a189, a191, a192, a195, a197, a198, a201
211  , a202, a203, a213, a214, a216, a217, a220, a221
212  , a222, d17, d18, d19, d20, m39, m40, m41
213  , m42, m43, m44, m45, m46, s46, s47, s50
214  , s51, s52, s53, s56, s57, t25, t26, t27
215  , t28, t29, t30;
216  a184 = ((__m128i *) Y);
217  s46 = *(a184);
218  a185 = (a184 + 2);
219  s47 = *(a185);
220  a186 = (4 * i9);
221  b15 = (a186 + syms);
222  a187 = (b15 + 2);
223  a188 = *(a187);
224  a189 = _mm_set1_epi8(a188);
225  a190 = ((__m128i *) Branchtab);
226  a191 = *(a190);
227  a192 = _mm_xor_si128(a189, a191);
228  a193 = (b15 + 3);
229  a194 = *(a193);
230  a195 = _mm_set1_epi8(a194);
231  a196 = (a190 + 2);
232  a197 = *(a196);
233  a198 = _mm_xor_si128(a195, a197);
234  t25 = _mm_avg_epu8(a192,a198);
235  a199 = ((__m128i ) t25);
236  a200 = _mm_srli_epi16(a199, 2);
237  a201 = ((__m128i ) a200);
238  t26 = _mm_and_si128(a201, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
239  , 63, 63, 63, 63, 63, 63, 63, 63
240  , 63));
241  t27 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
242  , 63, 63, 63, 63, 63, 63, 63, 63
243  , 63), t26);
244  m39 = _mm_adds_epu8(s46, t26);
245  m40 = _mm_adds_epu8(s47, t27);
246  m41 = _mm_adds_epu8(s46, t27);
247  m42 = _mm_adds_epu8(s47, t26);
248  a202 = _mm_min_epu8(m40, m39);
249  d17 = _mm_cmpeq_epi8(a202, m40);
250  a203 = _mm_min_epu8(m42, m41);
251  d18 = _mm_cmpeq_epi8(a203, m42);
252  s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17,d18));
253  a204 = ((short int *) dec);
254  a205 = (8 * i9);
255  b16 = (a204 + a205);
256  a206 = (b16 + 4);
257  *(a206) = s48;
258  s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17,d18));
259  a207 = (b16 + 5);
260  *(a207) = s49;
261  s50 = _mm_unpacklo_epi8(a202, a203);
262  s51 = _mm_unpackhi_epi8(a202, a203);
263  a208 = ((__m128i *) X);
264  *(a208) = s50;
265  a209 = (a208 + 1);
266  *(a209) = s51;
267  a210 = (a184 + 1);
268  s52 = *(a210);
269  a211 = (a184 + 3);
270  s53 = *(a211);
271  a212 = (a190 + 1);
272  a213 = *(a212);
273  a214 = _mm_xor_si128(a189, a213);
274  a215 = (a190 + 3);
275  a216 = *(a215);
276  a217 = _mm_xor_si128(a195, a216);
277  t28 = _mm_avg_epu8(a214,a217);
278  a218 = ((__m128i ) t28);
279  a219 = _mm_srli_epi16(a218, 2);
280  a220 = ((__m128i ) a219);
281  t29 = _mm_and_si128(a220, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
282  , 63, 63, 63, 63, 63, 63, 63, 63
283  , 63));
284  t30 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
285  , 63, 63, 63, 63, 63, 63, 63, 63
286  , 63), t29);
287  m43 = _mm_adds_epu8(s52, t29);
288  m44 = _mm_adds_epu8(s53, t30);
289  m45 = _mm_adds_epu8(s52, t30);
290  m46 = _mm_adds_epu8(s53, t29);
291  a221 = _mm_min_epu8(m44, m43);
292  d19 = _mm_cmpeq_epi8(a221, m44);
293  a222 = _mm_min_epu8(m46, m45);
294  d20 = _mm_cmpeq_epi8(a222, m46);
295  s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19,d20));
296  a223 = (b16 + 6);
297  *(a223) = s54;
298  s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19,d20));
299  a224 = (b16 + 7);
300  *(a224) = s55;
301  s56 = _mm_unpacklo_epi8(a221, a222);
302  s57 = _mm_unpackhi_epi8(a221, a222);
303  a225 = (a208 + 2);
304  *(a225) = s56;
305  a226 = (a208 + 3);
306  *(a226) = s57;
307  if ((((unsigned char *) X)[0]>210)) {
308  __m128i m12, m13;
309  m12 = ((__m128i *) X)[0];
310  m12 = _mm_min_epu8(m12, ((__m128i *) X)[1]);
311  m12 = _mm_min_epu8(m12, ((__m128i *) X)[2]);
312  m12 = _mm_min_epu8(m12, ((__m128i *) X)[3]);
313  __m128i m14;
314  m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
315  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 32)), ((__m128i ) m14)));
316  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 16)), ((__m128i ) m14)));
317  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 8)), ((__m128i ) m14)));
318  m14 = _mm_unpacklo_epi8(m14, m14);
319  m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
320  m13 = _mm_unpacklo_epi64(m14, m14);
321  ((__m128i *) X)[0] = _mm_subs_epu8(((__m128i *) X)[0], m13);
322  ((__m128i *) X)[1] = _mm_subs_epu8(((__m128i *) X)[1], m13);
323  ((__m128i *) X)[2] = _mm_subs_epu8(((__m128i *) X)[2], m13);
324  ((__m128i *) X)[3] = _mm_subs_epu8(((__m128i *) X)[3], m13);
325  }
326  }
327 
328  renormalize(X, 210);
329 
330  /*int ch;
331  for(ch = 0; ch < 64; ch++) {
332  printf("%d,", X[ch]);
333  }
334  printf("\n");*/
335 
336  unsigned int j;
337  for(j=0; j < (framebits + excess) % 2; ++j) {
338  int i;
339  for(i=0;i<64/2;i++){
340  BFLY(i, (((framebits+excess) >> 1) << 1) + j , syms, Y, X, (decision_t *)dec, Branchtab);
341  }
342 
343 
344  renormalize(Y, 210);
345 
346  /*printf("\n");
347  for(ch = 0; ch < 64; ch++) {
348  printf("%d,", Y[ch]);
349  }
350  printf("\n");*/
351 
352  }
353  /*skip*/
354  return;
355 }
356 
357 #endif /*LV_HAVE_SSE3*/
358 
359 
360 
361 
362 
363 
364 
365 #if LV_HAVE_GENERIC
366 
367 
368 static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y, unsigned char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, unsigned int excess, unsigned char* Branchtab) {
369  int nbits = framebits + excess;
370  int NUMSTATES = 64;
371  int RENORMALIZE_THRESHOLD = 210;
372 
373 
374  int s,i;
375 
376 
377 
378  for (s=0;s<nbits;s++){
379  void *tmp;
380  for(i=0;i<NUMSTATES/2;i++){
381  BFLY(i, s, syms, Y, X, (decision_t *)dec, Branchtab);
382  }
383 
384 
385 
386  renormalize(Y, RENORMALIZE_THRESHOLD);
387 
388  /// Swap pointers to old and new metrics
389  tmp = (void *)X;
390  X = Y;
391  Y = (unsigned char*)tmp;
392  }
393 
394 
395  return;
396 }
397 
398 #endif /* LV_HAVE_GENERIC */
399 
400 #endif /*INCLUDED_volk_8u_x4_conv_k7_r2_8u_H*/
float min(float a, float b)
unsigned int * w
Definition: cc_common.h:36
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:17
Definition: cc_common.h:33
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:34