GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_index_max_16u_a_H 00002 #define INCLUDED_volk_32f_index_max_16u_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <volk/volk_common.h> 00006 #include <inttypes.h> 00007 #include <stdio.h> 00008 00009 #ifdef LV_HAVE_SSE4_1 00010 #include<smmintrin.h> 00011 00012 static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { 00013 if(num_points > 0){ 00014 unsigned int number = 0; 00015 const unsigned int quarterPoints = num_points / 4; 00016 00017 float* inputPtr = (float*)src0; 00018 00019 __m128 indexIncrementValues = _mm_set1_ps(4); 00020 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); 00021 00022 float max = src0[0]; 00023 float index = 0; 00024 __m128 maxValues = _mm_set1_ps(max); 00025 __m128 maxValuesIndex = _mm_setzero_ps(); 00026 __m128 compareResults; 00027 __m128 currentValues; 00028 00029 __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; 00030 __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; 00031 00032 for(;number < quarterPoints; number++){ 00033 00034 currentValues = _mm_load_ps(inputPtr); inputPtr += 4; 00035 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); 00036 00037 compareResults = _mm_cmpgt_ps(maxValues, currentValues); 00038 00039 maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults); 00040 maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults); 00041 } 00042 00043 // Calculate the largest value from the remaining 4 points 00044 _mm_store_ps(maxValuesBuffer, maxValues); 00045 _mm_store_ps(maxIndexesBuffer, maxValuesIndex); 00046 00047 for(number = 0; number < 4; number++){ 00048 if(maxValuesBuffer[number] > max){ 00049 index = maxIndexesBuffer[number]; 00050 max = maxValuesBuffer[number]; 00051 } 00052 } 00053 00054 number = quarterPoints * 4; 00055 for(;number < num_points; number++){ 00056 if(src0[number] > max){ 00057 index = number; 00058 max = src0[number]; 00059 } 00060 } 00061 target[0] = (unsigned int)index; 00062 } 00063 } 00064 00065 #endif /*LV_HAVE_SSE4_1*/ 00066 00067 #ifdef LV_HAVE_SSE 00068 #include<xmmintrin.h> 00069 00070 static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) { 00071 if(num_points > 0){ 00072 unsigned int number = 0; 00073 const unsigned int quarterPoints = num_points / 4; 00074 00075 float* inputPtr = (float*)src0; 00076 00077 __m128 indexIncrementValues = _mm_set1_ps(4); 00078 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); 00079 00080 float max = src0[0]; 00081 float index = 0; 00082 __m128 maxValues = _mm_set1_ps(max); 00083 __m128 maxValuesIndex = _mm_setzero_ps(); 00084 __m128 compareResults; 00085 __m128 currentValues; 00086 00087 __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; 00088 __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; 00089 00090 for(;number < quarterPoints; number++){ 00091 00092 currentValues = _mm_load_ps(inputPtr); inputPtr += 4; 00093 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); 00094 00095 compareResults = _mm_cmpgt_ps(maxValues, currentValues); 00096 00097 maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes)); 00098 00099 maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues)); 00100 } 00101 00102 // Calculate the largest value from the remaining 4 points 00103 _mm_store_ps(maxValuesBuffer, maxValues); 00104 _mm_store_ps(maxIndexesBuffer, maxValuesIndex); 00105 00106 for(number = 0; number < 4; number++){ 00107 if(maxValuesBuffer[number] > max){ 00108 index = maxIndexesBuffer[number]; 00109 max = maxValuesBuffer[number]; 00110 } 00111 } 00112 00113 number = quarterPoints * 4; 00114 for(;number < num_points; number++){ 00115 if(src0[number] > max){ 00116 index = number; 00117 max = src0[number]; 00118 } 00119 } 00120 target[0] = (unsigned int)index; 00121 } 00122 } 00123 00124 #endif /*LV_HAVE_SSE*/ 00125 00126 #ifdef LV_HAVE_GENERIC 00127 static inline void volk_32f_index_max_16u_a_generic(unsigned int* target, const float* src0, unsigned int num_points) { 00128 if(num_points > 0){ 00129 float max = src0[0]; 00130 unsigned int index = 0; 00131 00132 unsigned int i = 1; 00133 00134 for(; i < num_points; ++i) { 00135 00136 if(src0[i] > max){ 00137 index = i; 00138 max = src0[i]; 00139 } 00140 00141 } 00142 target[0] = index; 00143 } 00144 } 00145 00146 #endif /*LV_HAVE_GENERIC*/ 00147 00148 00149 #endif /*INCLUDED_volk_32f_index_max_16u_a_H*/