1 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
2 #define INCLUDED_volk_16i_max_star_16i_a_H
15 static inline void volk_16i_max_star_16i_a_ssse3(
short* target,
short* src0,
unsigned int num_points) {
17 const unsigned int num_bytes = num_points*2;
19 short candidate = src0[0];
21 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
26 p_src0 = (__m128i*)src0;
28 int bound = num_bytes >> 4;
29 int leftovers = (num_bytes >> 1) & 7;
34 xmm1 = _mm_setzero_si128();
35 xmm0 = _mm_setzero_si128();
38 xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
41 for(i = 0; i < bound; ++i) {
42 xmm1 = _mm_load_si128(p_src0);
51 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
52 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
53 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
55 xmm6 = _mm_xor_si128(xmm4, xmm5);
57 xmm3 = _mm_and_si128(xmm3, xmm0);
58 xmm4 = _mm_and_si128(xmm6, xmm1);
60 xmm0 = _mm_add_epi16(xmm3, xmm4);
65 _mm_store_si128((__m128i*)cands, xmm0);
67 for(i = 0; i < 8; ++i) {
68 candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
73 for(i = 0; i < leftovers; ++i) {
75 candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
78 target[0] = candidate;
90 static inline void volk_16i_max_star_16i_neon(
short* target,
short* src0,
unsigned int num_points) {
91 const unsigned int eighth_points = num_points / 8;
94 int16x8_t diff, zeros;
95 uint16x8_t comp1, comp2;
96 zeros = veorq_s16(zeros, zeros);
100 int16x8_t candidate_vec = vld1q_dup_s16(src0 );
104 for(number=0; number < eighth_points; ++number) {
105 input_vec = vld1q_s16(src0);
106 __builtin_prefetch(src0+16);
107 diff = vsubq_s16(candidate_vec, input_vec);
108 comp1 = vcgeq_s16(diff, zeros);
109 comp2 = vcltq_s16(diff, zeros);
111 tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
112 tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
114 candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
117 vst1q_s16(&candidate, candidate_vec);
119 for(number=0; number < num_points%8; number++) {
120 candidate = ((
int16_t)(candidate - src0[number]) > 0) ? candidate : src0[number];
122 target[0] = candidate;
126 #ifdef LV_HAVE_GENERIC
128 static inline void volk_16i_max_star_16i_generic(
short* target,
short* src0,
unsigned int num_points) {
130 const unsigned int num_bytes = num_points*2;
134 int bound = num_bytes >> 1;
136 short candidate = src0[0];
137 for(i = 1; i < bound; ++i) {
138 candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
140 target[0] = candidate;
signed short int16_t
Definition: stdint.h:76