1 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
2 #define INCLUDED_volk_32f_index_max_16u_a_H
12 static inline void volk_32f_index_max_16u_a_sse4_1(
unsigned int* target,
const float* src0,
unsigned int num_points) {
14 unsigned int number = 0;
15 const unsigned int quarterPoints = num_points / 4;
17 float* inputPtr = (
float*)src0;
19 __m128 indexIncrementValues = _mm_set1_ps(4);
20 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
24 __m128 maxValues = _mm_set1_ps(max);
25 __m128 maxValuesIndex = _mm_setzero_ps();
26 __m128 compareResults;
32 for(;number < quarterPoints; number++){
34 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
35 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
37 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
39 maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
40 maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
44 _mm_store_ps(maxValuesBuffer, maxValues);
45 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
47 for(number = 0; number < 4; number++){
48 if(maxValuesBuffer[number] > max){
49 index = maxIndexesBuffer[number];
50 max = maxValuesBuffer[number];
54 number = quarterPoints * 4;
55 for(;number < num_points; number++){
56 if(src0[number] > max){
61 target[0] = (
unsigned int)index;
70 static inline void volk_32f_index_max_16u_a_sse(
unsigned int* target,
const float* src0,
unsigned int num_points) {
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
75 float* inputPtr = (
float*)src0;
77 __m128 indexIncrementValues = _mm_set1_ps(4);
78 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
82 __m128 maxValues = _mm_set1_ps(max);
83 __m128 maxValuesIndex = _mm_setzero_ps();
84 __m128 compareResults;
90 for(;number < quarterPoints; number++){
92 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
93 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
95 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
97 maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
99 maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
103 _mm_store_ps(maxValuesBuffer, maxValues);
104 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
106 for(number = 0; number < 4; number++){
107 if(maxValuesBuffer[number] > max){
108 index = maxIndexesBuffer[number];
109 max = maxValuesBuffer[number];
113 number = quarterPoints * 4;
114 for(;number < num_points; number++){
115 if(src0[number] > max){
120 target[0] = (
unsigned int)index;
126 #ifdef LV_HAVE_GENERIC
127 static inline void volk_32f_index_max_16u_generic(
unsigned int* target,
const float* src0,
unsigned int num_points) {
130 unsigned int index = 0;
134 for(; i < num_points; ++i) {
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27