GNU Radio 3.7.2 C++ API
volk_32f_invsqrt_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
2 #define INCLUDED_volk_32f_invsqrt_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 static inline float Q_rsqrt( float number )
9 {
10  long i;
11  float x2, y;
12  const float threehalfs = 1.5F;
13 
14  x2 = number * 0.5F;
15  y = number;
16  i = * ( long * ) &y; // evil floating point bit level hacking
17  i = 0x5f3759df - ( i >> 1 ); // what the fuck?
18  y = * ( float * ) &i;
19  y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
20 // y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
21 
22  return y;
23 }
24 
25 #ifdef LV_HAVE_SSE
26 #include <xmmintrin.h>
27 /*!
28  \brief Sqrts the two input vectors and store their results in the third vector
29  \param cVector The vector where the results will be stored
30  \param aVector One of the vectors to be invsqrted
31  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
32 */
33 static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
34  unsigned int number = 0;
35  const unsigned int quarterPoints = num_points / 4;
36 
37  float* cPtr = cVector;
38  const float* aPtr = aVector;
39 
40  __m128 aVal, cVal;
41  for(;number < quarterPoints; number++){
42 
43  aVal = _mm_load_ps(aPtr);
44 
45  cVal = _mm_rsqrt_ps(aVal);
46 
47  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
48 
49  aPtr += 4;
50  cPtr += 4;
51  }
52 
53  number = quarterPoints * 4;
54  for(;number < num_points; number++){
55  *cPtr++ = Q_rsqrt(*aPtr++);
56  }
57 }
58 #endif /* LV_HAVE_SSE */
59 
60 #ifdef LV_HAVE_GENERIC
61 /*!
62  \brief Sqrts the two input vectors and store their results in the third vector
63  \param cVector The vector where the results will be stored
64  \param aVector One of the vectors to be invsqrted
65  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
66 */
67 static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
68  float* cPtr = cVector;
69  const float* aPtr = aVector;
70  unsigned int number = 0;
71  for(number = 0; number < num_points; number++){
72  *cPtr++ = Q_rsqrt(*aPtr++);
73  }
74 }
75 #endif /* LV_HAVE_GENERIC */
76 
77 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:8