1 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
2 #define INCLUDED_volk_32f_invsqrt_32f_a_H
9 static inline float Q_rsqrt(
float number )
12 const float threehalfs = 1.5F;
20 u.i = 0x5f3759df - ( u.i >> 1 );
21 u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) );
28 #include <immintrin.h>
35 static inline void volk_32f_invsqrt_32f_a_avx(
float* cVector,
const float* aVector,
unsigned int num_points){
36 unsigned int number = 0;
37 const unsigned int eighthPoints = num_points / 8;
39 float* cPtr = cVector;
40 const float* aPtr = aVector;
42 for (; number < eighthPoints; number++)
44 aVal = _mm256_load_ps(aPtr);
45 cVal = _mm256_rsqrt_ps(aVal);
46 _mm256_store_ps(cPtr, cVal);
51 number = eighthPoints * 8;
52 for(;number < num_points; number++)
59 #include <xmmintrin.h>
66 static inline void volk_32f_invsqrt_32f_a_sse(
float* cVector,
const float* aVector,
unsigned int num_points){
67 unsigned int number = 0;
68 const unsigned int quarterPoints = num_points / 4;
70 float* cPtr = cVector;
71 const float* aPtr = aVector;
74 for(;number < quarterPoints; number++){
76 aVal = _mm_load_ps(aPtr);
78 cVal = _mm_rsqrt_ps(aVal);
80 _mm_store_ps(cPtr,cVal);
86 number = quarterPoints * 4;
87 for(;number < num_points; number++){
101 static inline void volk_32f_invsqrt_32f_neon(
float* cVector,
const float* aVector,
unsigned int num_points){
103 const unsigned int quarter_points = num_points / 4;
105 float* cPtr = cVector;
106 const float* aPtr = aVector;
107 float32x4_t a_val, c_val;
108 for (number = 0; number < quarter_points; ++number)
110 a_val = vld1q_f32(aPtr);
111 c_val = vrsqrteq_f32(a_val);
112 vst1q_f32(cPtr, c_val);
117 for(number=quarter_points * 4;number < num_points; number++)
124 #ifdef LV_HAVE_GENERIC
131 static inline void volk_32f_invsqrt_32f_generic(
float* cVector,
const float* aVector,
unsigned int num_points){
132 float* cPtr = cVector;
133 const float* aPtr = aVector;
134 unsigned int number = 0;
135 for(number = 0; number < num_points; number++){
142 #include <immintrin.h>
149 static inline void volk_32f_invsqrt_32f_u_avx(
float* cVector,
const float* aVector,
unsigned int num_points){
150 unsigned int number = 0;
151 const unsigned int eighthPoints = num_points / 8;
153 float* cPtr = cVector;
154 const float* aPtr = aVector;
156 for (; number < eighthPoints; number++)
158 aVal = _mm256_loadu_ps(aPtr);
159 cVal = _mm256_rsqrt_ps(aVal);
160 _mm256_storeu_ps(cPtr, cVal);
165 number = eighthPoints * 8;
166 for(;number < num_points; number++)
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:9
signed int int32_t
Definition: stdint.h:77