GNU Radio 3.7.3 C++ API
volk_32f_invsqrt_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
2 #define INCLUDED_volk_32f_invsqrt_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 #include <string.h>
8 
9 static inline float Q_rsqrt( float number )
10 {
11  float x2;
12  const float threehalfs = 1.5F;
13  union f32_to_i32 {
14  int32_t i;
15  float f;
16  } u;
17 
18  x2 = number * 0.5F;
19  u.f = number;
20  u.i = 0x5f3759df - ( u.i >> 1 ); // what the fuck?
21  u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 1st iteration
22  //u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 2nd iteration, this can be removed
23 
24  return u.f;
25 }
26 
27 #ifdef LV_HAVE_AVX
28 #include <immintrin.h>
29 /*!
30 \brief Sqrts the two input vectors and store their results in the third vector
31 \param cVector The vector where the results will be stored
32 \param aVector One of the vectors to be invsqrted
33 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
34 */
35 static inline void volk_32f_invsqrt_32f_a_avx(float* cVector, const float* aVector, unsigned int num_points){
36  unsigned int number = 0;
37  const unsigned int eighthPoints = num_points / 8;
38 
39  float* cPtr = cVector;
40  const float* aPtr = aVector;
41  __m256 aVal, cVal;
42  for (; number < eighthPoints; number++)
43  {
44  aVal = _mm256_load_ps(aPtr);
45  cVal = _mm256_rsqrt_ps(aVal);
46  _mm256_store_ps(cPtr, cVal);
47  aPtr += 8;
48  cPtr += 8;
49  }
50 
51  number = eighthPoints * 8;
52  for(;number < num_points; number++)
53  *cPtr++ = Q_rsqrt(*aPtr++);
54 
55 }
56 #endif /* LV_HAVE_AVX */
57 
58 #ifdef LV_HAVE_SSE
59 #include <xmmintrin.h>
60 /*!
61  \brief Sqrts the two input vectors and store their results in the third vector
62  \param cVector The vector where the results will be stored
63  \param aVector One of the vectors to be invsqrted
64  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
65 */
66 static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
67  unsigned int number = 0;
68  const unsigned int quarterPoints = num_points / 4;
69 
70  float* cPtr = cVector;
71  const float* aPtr = aVector;
72 
73  __m128 aVal, cVal;
74  for(;number < quarterPoints; number++){
75 
76  aVal = _mm_load_ps(aPtr);
77 
78  cVal = _mm_rsqrt_ps(aVal);
79 
80  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
81 
82  aPtr += 4;
83  cPtr += 4;
84  }
85 
86  number = quarterPoints * 4;
87  for(;number < num_points; number++){
88  *cPtr++ = Q_rsqrt(*aPtr++);
89  }
90 }
91 #endif /* LV_HAVE_SSE */
92 
93 #ifdef LV_HAVE_GENERIC
94 /*!
95  \brief Sqrts the two input vectors and store their results in the third vector
96  \param cVector The vector where the results will be stored
97  \param aVector One of the vectors to be invsqrted
98  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
99 */
100 static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
101  float* cPtr = cVector;
102  const float* aPtr = aVector;
103  unsigned int number = 0;
104  for(number = 0; number < num_points; number++){
105  *cPtr++ = Q_rsqrt(*aPtr++);
106  }
107 }
108 #endif /* LV_HAVE_GENERIC */
109 
110 #ifdef LV_HAVE_AVX
111 #include <immintrin.h>
112 /*!
113 \brief Sqrts the two input vectors and store their results in the third vector
114 \param cVector The vector where the results will be stored
115 \param aVector One of the vectors to be invsqrted
116 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
117 */
118 static inline void volk_32f_invsqrt_32f_u_avx(float* cVector, const float* aVector, unsigned int num_points){
119  unsigned int number = 0;
120  const unsigned int eighthPoints = num_points / 8;
121 
122  float* cPtr = cVector;
123  const float* aPtr = aVector;
124  __m256 aVal, cVal;
125  for (; number < eighthPoints; number++)
126  {
127  aVal = _mm256_loadu_ps(aPtr);
128  cVal = _mm256_rsqrt_ps(aVal);
129  _mm256_storeu_ps(cPtr, cVal);
130  aPtr += 8;
131  cPtr += 8;
132  }
133 
134  number = eighthPoints * 8;
135  for(;number < num_points; number++)
136  *cPtr++ = Q_rsqrt(*aPtr++);
137 
138 }
139 #endif /* LV_HAVE_AVX */
140 
141 
142 
143 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:9
signed int int32_t
Definition: stdint.h:77