GNU Radio 3.5.3.2 C++ API
volk_16i_max_star_16i_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
00002 #define INCLUDED_volk_16i_max_star_16i_a_H
00003 
00004 
00005 #include<inttypes.h>
00006 #include<stdio.h>       
00007 
00008 
00009 #ifdef LV_HAVE_SSSE3
00010 
00011 #include<xmmintrin.h>
00012 #include<emmintrin.h>
00013 #include<tmmintrin.h>
00014 
00015 static inline  void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, unsigned int num_bytes) {
00016 
00017 
00018   
00019   short candidate = src0[0];
00020   short cands[8];
00021   __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
00022   
00023 
00024   __m128i *p_src0;
00025   
00026   p_src0 = (__m128i*)src0;
00027 
00028   int bound = num_bytes >> 4;
00029   int leftovers = (num_bytes >> 1) & 7;
00030   
00031   int i = 0;
00032   
00033   
00034   xmm1 = _mm_setzero_si128();
00035   xmm0 = _mm_setzero_si128();
00036   //_mm_insert_epi16(xmm0, candidate, 0);
00037   
00038   xmm0 = _mm_shuffle_epi8(xmm0, xmm1); 
00039 
00040   
00041   for(i = 0; i < bound; ++i) {
00042     xmm1 = _mm_load_si128(p_src0);
00043     p_src0 += 1;
00044     //xmm2 = _mm_sub_epi16(xmm1, xmm0);
00045   
00046 
00047     
00048   
00049   
00050   
00051     xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
00052     xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
00053     xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
00054 
00055     xmm6 = _mm_xor_si128(xmm4, xmm5);
00056     
00057     xmm3 = _mm_and_si128(xmm3, xmm0);
00058     xmm4 = _mm_and_si128(xmm6, xmm1);
00059     
00060     xmm0 = _mm_add_epi16(xmm3, xmm4);
00061     
00062   
00063   }
00064   
00065   _mm_store_si128((__m128i*)cands, xmm0);
00066   
00067   for(i = 0; i < 8; ++i) {
00068     candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
00069   }
00070   
00071  
00072   
00073   for(i = 0; i < leftovers; ++i) {
00074   
00075     candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate : src0[(bound << 3) + i];
00076   }
00077 
00078   target[0] = candidate;
00079   
00080     
00081     
00082  
00083 
00084 }   
00085  
00086 #endif /*LV_HAVE_SSSE3*/
00087 
00088 #ifdef LV_HAVE_GENERIC
00089 
00090 static inline void volk_16i_max_star_16i_a_generic(short* target, short* src0, unsigned int num_bytes) {
00091         
00092         int i = 0;
00093         
00094         int bound = num_bytes >> 1;
00095 
00096         short candidate = src0[0];
00097         for(i = 1; i < bound; ++i) {
00098           candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
00099         }
00100         target[0] = candidate;
00101           
00102 }
00103 
00104 
00105 #endif /*LV_HAVE_GENERIC*/
00106 
00107 
00108 #endif /*INCLUDED_volk_16i_max_star_16i_a_H*/