Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_32f_divide_aligned16.h @ 108a594c

History | View | Annotate | Download (2.1 kB)

1 23914465 Tom Rondeau
#ifndef INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H
2 23914465 Tom Rondeau
#define INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H
3 23914465 Tom Rondeau
4 23914465 Tom Rondeau
#include <inttypes.h>
5 23914465 Tom Rondeau
#include <stdio.h>
6 23914465 Tom Rondeau
7 23914465 Tom Rondeau
#if LV_HAVE_SSE
8 23914465 Tom Rondeau
#include <xmmintrin.h>
9 23914465 Tom Rondeau
/*!
10 23914465 Tom Rondeau
  \brief Divides the two input vectors and store their results in the third vector
11 23914465 Tom Rondeau
  \param cVector The vector where the results will be stored
12 23914465 Tom Rondeau
  \param aVector The vector to be divideed
13 23914465 Tom Rondeau
  \param bVector The divisor vector
14 23914465 Tom Rondeau
  \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
15 23914465 Tom Rondeau
*/
16 23914465 Tom Rondeau
static inline void volk_32f_divide_aligned16_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17 23914465 Tom Rondeau
    unsigned int number = 0;
18 23914465 Tom Rondeau
    const unsigned int quarterPoints = num_points / 4;
19 23914465 Tom Rondeau
20 23914465 Tom Rondeau
    float* cPtr = cVector;
21 23914465 Tom Rondeau
    const float* aPtr = aVector;
22 23914465 Tom Rondeau
    const float* bPtr=  bVector;
23 23914465 Tom Rondeau
24 23914465 Tom Rondeau
    __m128 aVal, bVal, cVal;
25 23914465 Tom Rondeau
    for(;number < quarterPoints; number++){
26 23914465 Tom Rondeau
      
27 23914465 Tom Rondeau
      aVal = _mm_load_ps(aPtr); 
28 23914465 Tom Rondeau
      bVal = _mm_load_ps(bPtr);
29 23914465 Tom Rondeau
      
30 23914465 Tom Rondeau
      cVal = _mm_div_ps(aVal, bVal); 
31 23914465 Tom Rondeau
      
32 23914465 Tom Rondeau
      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
33 23914465 Tom Rondeau
34 23914465 Tom Rondeau
      aPtr += 4;
35 23914465 Tom Rondeau
      bPtr += 4;
36 23914465 Tom Rondeau
      cPtr += 4;
37 23914465 Tom Rondeau
    }
38 23914465 Tom Rondeau
39 23914465 Tom Rondeau
    number = quarterPoints * 4;
40 23914465 Tom Rondeau
    for(;number < num_points; number++){
41 23914465 Tom Rondeau
      *cPtr++ = (*aPtr++) / (*bPtr++);
42 23914465 Tom Rondeau
    }
43 23914465 Tom Rondeau
}
44 23914465 Tom Rondeau
#endif /* LV_HAVE_SSE */
45 23914465 Tom Rondeau
46 23914465 Tom Rondeau
#if LV_HAVE_GENERIC
47 23914465 Tom Rondeau
/*!
48 23914465 Tom Rondeau
  \brief Divides the two input vectors and store their results in the third vector
49 23914465 Tom Rondeau
  \param cVector The vector where the results will be stored
50 23914465 Tom Rondeau
  \param aVector The vector to be divideed
51 23914465 Tom Rondeau
  \param bVector The divisor vector
52 23914465 Tom Rondeau
  \param num_points The number of values in aVector and bVector to be divideed together and stored into cVector
53 23914465 Tom Rondeau
*/
54 23914465 Tom Rondeau
static inline void volk_32f_divide_aligned16_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
55 23914465 Tom Rondeau
    float* cPtr = cVector;
56 23914465 Tom Rondeau
    const float* aPtr = aVector;
57 23914465 Tom Rondeau
    const float* bPtr=  bVector;
58 23914465 Tom Rondeau
    unsigned int number = 0;
59 23914465 Tom Rondeau
60 23914465 Tom Rondeau
    for(number = 0; number < num_points; number++){
61 23914465 Tom Rondeau
      *cPtr++ = (*aPtr++) / (*bPtr++);
62 23914465 Tom Rondeau
    }
63 23914465 Tom Rondeau
}
64 23914465 Tom Rondeau
#endif /* LV_HAVE_GENERIC */
65 23914465 Tom Rondeau
66 23914465 Tom Rondeau
67 23914465 Tom Rondeau
68 23914465 Tom Rondeau
69 23914465 Tom Rondeau
#endif /* INCLUDED_VOLK_32f_DIVIDE_ALIGNED16_H */