Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_32f_max_aligned16.h @ 74f206ed

History | View | Annotate | Download (2.2 kB)

1 23914465 Tom Rondeau
#ifndef INCLUDED_VOLK_32f_MAX_ALIGNED16_H
2 23914465 Tom Rondeau
#define INCLUDED_VOLK_32f_MAX_ALIGNED16_H
3 23914465 Tom Rondeau
4 23914465 Tom Rondeau
#include <inttypes.h>
5 23914465 Tom Rondeau
#include <stdio.h>
6 23914465 Tom Rondeau
7 23914465 Tom Rondeau
#if LV_HAVE_SSE
8 23914465 Tom Rondeau
#include <xmmintrin.h>
9 23914465 Tom Rondeau
/*!
10 23914465 Tom Rondeau
  \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
11 23914465 Tom Rondeau
  \param cVector The vector where the results will be stored
12 23914465 Tom Rondeau
  \param aVector The vector to be checked
13 23914465 Tom Rondeau
  \param bVector The vector to be checked
14 23914465 Tom Rondeau
  \param num_points The number of values in aVector and bVector to be checked and stored into cVector
15 23914465 Tom Rondeau
*/
16 23914465 Tom Rondeau
static inline void volk_32f_max_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17 23914465 Tom Rondeau
    unsigned int number = 0;
18 23914465 Tom Rondeau
    const unsigned int quarterPoints = num_points / 4;
19 23914465 Tom Rondeau
20 23914465 Tom Rondeau
    float* cPtr = cVector;
21 23914465 Tom Rondeau
    const float* aPtr = aVector;
22 23914465 Tom Rondeau
    const float* bPtr=  bVector;
23 23914465 Tom Rondeau
24 23914465 Tom Rondeau
    __m128 aVal, bVal, cVal;
25 23914465 Tom Rondeau
    for(;number < quarterPoints; number++){
26 23914465 Tom Rondeau
      
27 23914465 Tom Rondeau
      aVal = _mm_load_ps(aPtr); 
28 23914465 Tom Rondeau
      bVal = _mm_load_ps(bPtr);
29 23914465 Tom Rondeau
      
30 23914465 Tom Rondeau
      cVal = _mm_max_ps(aVal, bVal); 
31 23914465 Tom Rondeau
      
32 23914465 Tom Rondeau
      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
33 23914465 Tom Rondeau
34 23914465 Tom Rondeau
      aPtr += 4;
35 23914465 Tom Rondeau
      bPtr += 4;
36 23914465 Tom Rondeau
      cPtr += 4;
37 23914465 Tom Rondeau
    }
38 23914465 Tom Rondeau
39 23914465 Tom Rondeau
    number = quarterPoints * 4;
40 23914465 Tom Rondeau
    for(;number < num_points; number++){
41 23914465 Tom Rondeau
      const float a = *aPtr++;
42 23914465 Tom Rondeau
      const float b = *bPtr++;
43 23914465 Tom Rondeau
      *cPtr++ = ( a > b ? a : b);
44 23914465 Tom Rondeau
    }
45 23914465 Tom Rondeau
}
46 23914465 Tom Rondeau
#endif /* LV_HAVE_SSE */
47 23914465 Tom Rondeau
48 23914465 Tom Rondeau
#if LV_HAVE_GENERIC
49 23914465 Tom Rondeau
/*!
50 23914465 Tom Rondeau
  \brief Selects maximum value from each entry between bVector and aVector and store their results in the cVector
51 23914465 Tom Rondeau
  \param cVector The vector where the results will be stored
52 23914465 Tom Rondeau
  \param aVector The vector to be checked
53 23914465 Tom Rondeau
  \param bVector The vector to be checked
54 23914465 Tom Rondeau
  \param num_points The number of values in aVector and bVector to be checked and stored into cVector
55 23914465 Tom Rondeau
*/
56 23914465 Tom Rondeau
static inline void volk_32f_max_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
57 23914465 Tom Rondeau
    float* cPtr = cVector;
58 23914465 Tom Rondeau
    const float* aPtr = aVector;
59 23914465 Tom Rondeau
    const float* bPtr=  bVector;
60 23914465 Tom Rondeau
    unsigned int number = 0;
61 23914465 Tom Rondeau
62 23914465 Tom Rondeau
    for(number = 0; number < num_points; number++){
63 23914465 Tom Rondeau
      const float a = *aPtr++;
64 23914465 Tom Rondeau
      const float b = *bPtr++;
65 23914465 Tom Rondeau
      *cPtr++ = ( a > b ? a : b);
66 23914465 Tom Rondeau
    }
67 23914465 Tom Rondeau
}
68 23914465 Tom Rondeau
#endif /* LV_HAVE_GENERIC */
69 23914465 Tom Rondeau
70 23914465 Tom Rondeau
71 23914465 Tom Rondeau
#endif /* INCLUDED_VOLK_32f_MAX_ALIGNED16_H */