GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_invsqrt_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
2 #define INCLUDED_volk_32f_invsqrt_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 #include <string.h>
8 
9 static inline float Q_rsqrt( float number )
10 {
11  float x2;
12  const float threehalfs = 1.5F;
13  union f32_to_i32 {
14  int32_t i;
15  float f;
16  } u;
17 
18  x2 = number * 0.5F;
19  u.f = number;
20  u.i = 0x5f3759df - ( u.i >> 1 ); // what the fuck?
21  u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 1st iteration
22  //u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 2nd iteration, this can be removed
23 
24  return u.f;
25 }
26 
27 #ifdef LV_HAVE_AVX
28 #include <immintrin.h>
29 /*!
30 \brief Sqrts the two input vectors and store their results in the third vector
31 \param cVector The vector where the results will be stored
32 \param aVector One of the vectors to be invsqrted
33 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
34 */
35 static inline void volk_32f_invsqrt_32f_a_avx(float* cVector, const float* aVector, unsigned int num_points){
36  unsigned int number = 0;
37  const unsigned int eighthPoints = num_points / 8;
38 
39  float* cPtr = cVector;
40  const float* aPtr = aVector;
41  __m256 aVal, cVal;
42  for (; number < eighthPoints; number++)
43  {
44  aVal = _mm256_load_ps(aPtr);
45  cVal = _mm256_rsqrt_ps(aVal);
46  _mm256_store_ps(cPtr, cVal);
47  aPtr += 8;
48  cPtr += 8;
49  }
50 
51  number = eighthPoints * 8;
52  for(;number < num_points; number++)
53  *cPtr++ = Q_rsqrt(*aPtr++);
54 
55 }
56 #endif /* LV_HAVE_AVX */
57 
58 #ifdef LV_HAVE_SSE
59 #include <xmmintrin.h>
60 /*!
61  \brief Sqrts the two input vectors and store their results in the third vector
62  \param cVector The vector where the results will be stored
63  \param aVector One of the vectors to be invsqrted
64  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
65 */
66 static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
67  unsigned int number = 0;
68  const unsigned int quarterPoints = num_points / 4;
69 
70  float* cPtr = cVector;
71  const float* aPtr = aVector;
72 
73  __m128 aVal, cVal;
74  for(;number < quarterPoints; number++){
75 
76  aVal = _mm_load_ps(aPtr);
77 
78  cVal = _mm_rsqrt_ps(aVal);
79 
80  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
81 
82  aPtr += 4;
83  cPtr += 4;
84  }
85 
86  number = quarterPoints * 4;
87  for(;number < num_points; number++){
88  *cPtr++ = Q_rsqrt(*aPtr++);
89  }
90 }
91 #endif /* LV_HAVE_SSE */
92 
93 #ifdef LV_HAVE_NEON
94 #include <arm_neon.h>
95 /*!
96 \brief Sqrts the two input vectors and store their results in the third vector
97 \param cVector The vector where the results will be stored
98 \param aVector One of the vectors to be invsqrted
99 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
100 */
101 static inline void volk_32f_invsqrt_32f_neon(float* cVector, const float* aVector, unsigned int num_points){
102  unsigned int number;
103  const unsigned int quarter_points = num_points / 4;
104 
105  float* cPtr = cVector;
106  const float* aPtr = aVector;
107  float32x4_t a_val, c_val;
108  for (number = 0; number < quarter_points; ++number)
109  {
110  a_val = vld1q_f32(aPtr);
111  c_val = vrsqrteq_f32(a_val);
112  vst1q_f32(cPtr, c_val);
113  aPtr += 4;
114  cPtr += 4;
115  }
116 
117  for(number=quarter_points * 4;number < num_points; number++)
118  *cPtr++ = Q_rsqrt(*aPtr++);
119 
120 }
121 #endif /* LV_HAVE_NEON */
122 
123 
124 #ifdef LV_HAVE_GENERIC
125 /*!
126  \brief Sqrts the two input vectors and store their results in the third vector
127  \param cVector The vector where the results will be stored
128  \param aVector One of the vectors to be invsqrted
129  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
130 */
131 static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
132  float* cPtr = cVector;
133  const float* aPtr = aVector;
134  unsigned int number = 0;
135  for(number = 0; number < num_points; number++){
136  *cPtr++ = Q_rsqrt(*aPtr++);
137  }
138 }
139 #endif /* LV_HAVE_GENERIC */
140 
141 #ifdef LV_HAVE_AVX
142 #include <immintrin.h>
143 /*!
144 \brief Sqrts the two input vectors and store their results in the third vector
145 \param cVector The vector where the results will be stored
146 \param aVector One of the vectors to be invsqrted
147 \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
148 */
149 static inline void volk_32f_invsqrt_32f_u_avx(float* cVector, const float* aVector, unsigned int num_points){
150  unsigned int number = 0;
151  const unsigned int eighthPoints = num_points / 8;
152 
153  float* cPtr = cVector;
154  const float* aPtr = aVector;
155  __m256 aVal, cVal;
156  for (; number < eighthPoints; number++)
157  {
158  aVal = _mm256_loadu_ps(aPtr);
159  cVal = _mm256_rsqrt_ps(aVal);
160  _mm256_storeu_ps(cPtr, cVal);
161  aPtr += 8;
162  cPtr += 8;
163  }
164 
165  number = eighthPoints * 8;
166  for(;number < num_points; number++)
167  *cPtr++ = Q_rsqrt(*aPtr++);
168 
169 }
170 #endif /* LV_HAVE_AVX */
171 
172 
173 
174 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:9
signed int int32_t
Definition: stdint.h:77