GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_16i_max_star_16i_a_H 00002 #define INCLUDED_volk_16i_max_star_16i_a_H 00003 00004 00005 #include<inttypes.h> 00006 #include<stdio.h> 00007 00008 00009 #ifdef LV_HAVE_SSSE3 00010 00011 #include<xmmintrin.h> 00012 #include<emmintrin.h> 00013 #include<tmmintrin.h> 00014 00015 static inline void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) { 00016 00017 00018 00019 short candidate = src0[0]; 00020 short cands[8]; 00021 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6; 00022 00023 00024 __m128i *p_src0; 00025 00026 p_src0 = (__m128i*)src0; 00027 00028 int bound = num_bytes >> 4; 00029 int leftovers = (num_bytes >> 1) & 7; 00030 00031 int i = 0; 00032 00033 00034 xmm1 = _mm_setzero_si128(); 00035 xmm0 = _mm_setzero_si128(); 00036 //_mm_insert_epi16(xmm0, candidate, 0); 00037 00038 xmm0 = _mm_shuffle_epi8(xmm0, xmm1); 00039 00040 00041 for(i = 0; i < bound; ++i) { 00042 xmm1 = _mm_load_si128(p_src0); 00043 p_src0 += 1; 00044 //xmm2 = _mm_sub_epi16(xmm1, xmm0); 00045 00046 00047 00048 00049 00050 00051 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1); 00052 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1); 00053 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0); 00054 00055 xmm6 = _mm_xor_si128(xmm4, xmm5); 00056 00057 xmm3 = _mm_and_si128(xmm3, xmm0); 00058 xmm4 = _mm_and_si128(xmm6, xmm1); 00059 00060 xmm0 = _mm_add_epi16(xmm3, xmm4); 00061 00062 00063 } 00064 00065 _mm_store_si128((__m128i*)cands, xmm0); 00066 00067 for(i = 0; i < 8; ++i) { 00068 candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i]; 00069 } 00070 00071 00072 00073 for(i = 0; i < leftovers; ++i) { 00074 00075 candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i]; 00076 } 00077 00078 target[0] = candidate; 00079 00080 00081 00082 00083 00084 } 00085 00086 #endif /*LV_HAVE_SSSE3*/ 00087 00088 #ifdef LV_HAVE_GENERIC 00089 00090 static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) { 00091 00092 int i = 0; 00093 00094 int bound = num_bytes >> 1; 00095 00096 short candidate = src0[0]; 00097 for(i = 1; i < bound; ++i) { 00098 candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i]; 00099 } 00100 target[0] = candidate; 00101 00102 } 00103 00104 00105 #endif /*LV_HAVE_GENERIC*/ 00106 00107 00108 #endif /*INCLUDED_volk_16i_max_star_16i_a_H*/