GNU Radio 3.6.5 C++ API

volk_32f_index_max_16u_a.h

Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
00002 #define INCLUDED_volk_32f_index_max_16u_a_H
00003 
00004 #include <volk/volk_common.h>
00005 #include <volk/volk_common.h>
00006 #include <inttypes.h>
00007 #include <stdio.h>
00008 
00009 #ifdef LV_HAVE_SSE4_1
00010 #include<smmintrin.h>
00011 
00012 static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
00013   if(num_points > 0){
00014     unsigned int number = 0;
00015     const unsigned int quarterPoints = num_points / 4;
00016 
00017     float* inputPtr = (float*)src0;
00018 
00019     __m128 indexIncrementValues = _mm_set1_ps(4);
00020     __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
00021 
00022     float max = src0[0];
00023     float index = 0;
00024     __m128 maxValues = _mm_set1_ps(max);
00025     __m128 maxValuesIndex = _mm_setzero_ps();
00026     __m128 compareResults;
00027     __m128 currentValues;
00028 
00029     __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
00030     __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
00031 
00032     for(;number < quarterPoints; number++){
00033 
00034       currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
00035       currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
00036 
00037       compareResults = _mm_cmpgt_ps(maxValues, currentValues);
00038 
00039       maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
00040       maxValues      = _mm_blendv_ps(currentValues, maxValues, compareResults);
00041     }
00042 
00043     // Calculate the largest value from the remaining 4 points
00044     _mm_store_ps(maxValuesBuffer, maxValues);
00045     _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
00046 
00047     for(number = 0; number < 4; number++){
00048       if(maxValuesBuffer[number] > max){
00049         index = maxIndexesBuffer[number];
00050         max = maxValuesBuffer[number];
00051       }
00052     }
00053 
00054     number = quarterPoints * 4;
00055     for(;number < num_points; number++){
00056       if(src0[number] > max){
00057         index = number;
00058         max = src0[number];
00059       }
00060     }
00061     target[0] = (unsigned int)index;
00062   }
00063 }
00064 
00065 #endif /*LV_HAVE_SSE4_1*/
00066 
00067 #ifdef LV_HAVE_SSE
00068 #include<xmmintrin.h>
00069 
00070 static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
00071   if(num_points > 0){
00072     unsigned int number = 0;
00073     const unsigned int quarterPoints = num_points / 4;
00074 
00075     float* inputPtr = (float*)src0;
00076 
00077     __m128 indexIncrementValues = _mm_set1_ps(4);
00078     __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
00079 
00080     float max = src0[0];
00081     float index = 0;
00082     __m128 maxValues = _mm_set1_ps(max);
00083     __m128 maxValuesIndex = _mm_setzero_ps();
00084     __m128 compareResults;
00085     __m128 currentValues;
00086 
00087     __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
00088     __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
00089 
00090     for(;number < quarterPoints; number++){
00091 
00092       currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
00093       currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
00094 
00095       compareResults = _mm_cmpgt_ps(maxValues, currentValues);
00096 
00097       maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
00098 
00099       maxValues      = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
00100     }
00101 
00102     // Calculate the largest value from the remaining 4 points
00103     _mm_store_ps(maxValuesBuffer, maxValues);
00104     _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
00105 
00106     for(number = 0; number < 4; number++){
00107       if(maxValuesBuffer[number] > max){
00108         index = maxIndexesBuffer[number];
00109         max = maxValuesBuffer[number];
00110       }
00111     }
00112 
00113     number = quarterPoints * 4;
00114     for(;number < num_points; number++){
00115       if(src0[number] > max){
00116         index = number;
00117         max = src0[number];
00118       }
00119     }
00120     target[0] = (unsigned int)index;
00121   }
00122 }
00123 
00124 #endif /*LV_HAVE_SSE*/
00125 
00126 #ifdef LV_HAVE_GENERIC
00127 static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const float* src0, unsigned int num_points) {
00128   if(num_points > 0){
00129     float max = src0[0];
00130     unsigned int index = 0;
00131 
00132     unsigned int i = 1;
00133 
00134     for(; i < num_points; ++i) {
00135 
00136       if(src0[i] > max){
00137         index = i;
00138         max = src0[i];
00139       }
00140 
00141     }
00142     target[0] = index;
00143   }
00144 }
00145 
00146 #endif /*LV_HAVE_GENERIC*/
00147 
00148 
00149 #endif /*INCLUDED_volk_32f_index_max_16u_a_H*/