GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_16i_max_star_16i.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
2 #define INCLUDED_volk_16i_max_star_16i_a_H
3 
4 
5 #include<inttypes.h>
6 #include<stdio.h>
7 
8 
9 #ifdef LV_HAVE_SSSE3
10 
11 #include<xmmintrin.h>
12 #include<emmintrin.h>
13 #include<tmmintrin.h>
14 
15 static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_points) {
16 
17  const unsigned int num_bytes = num_points*2;
18 
19  short candidate = src0[0];
20  short cands[8];
21  __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
22 
23 
24  __m128i *p_src0;
25 
26  p_src0 = (__m128i*)src0;
27 
28  int bound = num_bytes >> 4;
29  int leftovers = (num_bytes >> 1) & 7;
30 
31  int i = 0;
32 
33 
34  xmm1 = _mm_setzero_si128();
35  xmm0 = _mm_setzero_si128();
36  //_mm_insert_epi16(xmm0, candidate, 0);
37 
38  xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
39 
40 
41  for(i = 0; i < bound; ++i) {
42  xmm1 = _mm_load_si128(p_src0);
43  p_src0 += 1;
44  //xmm2 = _mm_sub_epi16(xmm1, xmm0);
45 
46 
47 
48 
49 
50 
51  xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
52  xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
53  xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
54 
55  xmm6 = _mm_xor_si128(xmm4, xmm5);
56 
57  xmm3 = _mm_and_si128(xmm3, xmm0);
58  xmm4 = _mm_and_si128(xmm6, xmm1);
59 
60  xmm0 = _mm_add_epi16(xmm3, xmm4);
61 
62 
63  }
64 
65  _mm_store_si128((__m128i*)cands, xmm0);
66 
67  for(i = 0; i < 8; ++i) {
68  candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
69  }
70 
71 
72 
73  for(i = 0; i < leftovers; ++i) {
74 
75  candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
76  }
77 
78  target[0] = candidate;
79 
80 
81 
82 
83 
84 }
85 
86 #endif /*LV_HAVE_SSSE3*/
87 
88 #ifdef LV_HAVE_NEON
89 #include <arm_neon.h>
90 static inline void volk_16i_max_star_16i_neon(short* target, short* src0, unsigned int num_points) {
91  const unsigned int eighth_points = num_points / 8;
92  unsigned number;
93  int16x8_t input_vec;
94  int16x8_t diff, zeros;
95  uint16x8_t comp1, comp2;
96  zeros = veorq_s16(zeros, zeros);
97 
98  int16x8x2_t tmpvec;
99 
100  int16x8_t candidate_vec = vld1q_dup_s16(src0 );
101  short candidate;
102  ++src0;
103 
104  for(number=0; number < eighth_points; ++number) {
105  input_vec = vld1q_s16(src0);
106  __builtin_prefetch(src0+16);
107  diff = vsubq_s16(candidate_vec, input_vec);
108  comp1 = vcgeq_s16(diff, zeros);
109  comp2 = vcltq_s16(diff, zeros);
110 
111  tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
112  tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
113 
114  candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
115  src0 += 8;
116  }
117  vst1q_s16(&candidate, candidate_vec);
118 
119  for(number=0; number < num_points%8; number++) {
120  candidate = ((int16_t)(candidate - src0[number]) > 0) ? candidate : src0[number];
121  }
122  target[0] = candidate;
123 }
124 #endif /*LV_HAVE_NEON*/
125 
126 #ifdef LV_HAVE_GENERIC
127 
128 static inline void volk_16i_max_star_16i_generic(short* target, short* src0, unsigned int num_points) {
129 
130  const unsigned int num_bytes = num_points*2;
131 
132  int i = 0;
133 
134  int bound = num_bytes >> 1;
135 
136  short candidate = src0[0];
137  for(i = 1; i < bound; ++i) {
138  candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
139  }
140  target[0] = candidate;
141 
142 }
143 
144 
145 #endif /*LV_HAVE_GENERIC*/
146 
147 
148 #endif /*INCLUDED_volk_16i_max_star_16i_a_H*/
signed short int16_t
Definition: stdint.h:76