1 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
2 #define INCLUDED_volk_16i_max_star_16i_a_H
15 static inline void volk_16i_max_star_16i_a_ssse3(
short* target,
short* src0,
unsigned int num_points) {
17 const unsigned int num_bytes = num_points*2;
19 short candidate = src0[0];
21 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
26 p_src0 = (__m128i*)src0;
28 int bound = num_bytes >> 4;
29 int leftovers = (num_bytes >> 1) & 7;
34 xmm1 = _mm_setzero_si128();
35 xmm0 = _mm_setzero_si128();
38 xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
41 for(i = 0; i < bound; ++i) {
42 xmm1 = _mm_load_si128(p_src0);
51 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
52 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
53 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
55 xmm6 = _mm_xor_si128(xmm4, xmm5);
57 xmm3 = _mm_and_si128(xmm3, xmm0);
58 xmm4 = _mm_and_si128(xmm6, xmm1);
60 xmm0 = _mm_add_epi16(xmm3, xmm4);
65 _mm_store_si128((__m128i*)cands, xmm0);
67 for(i = 0; i < 8; ++i) {
68 candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
73 for(i = 0; i < leftovers; ++i) {
75 candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
78 target[0] = candidate;
88 #ifdef LV_HAVE_GENERIC
90 static inline void volk_16i_max_star_16i_generic(
short* target,
short* src0,
unsigned int num_points) {
92 const unsigned int num_bytes = num_points*2;
96 int bound = num_bytes >> 1;
98 short candidate = src0[0];
99 for(i = 1; i < bound; ++i) {
100 candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
102 target[0] = candidate;