GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_sqrt_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_sqrt_32f_a_H
2 #define INCLUDED_volk_32f_sqrt_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 #ifdef LV_HAVE_SSE
9 #include <xmmintrin.h>
10 /*!
11  \brief Sqrts the two input vectors and store their results in the third vector
12  \param cVector The vector where the results will be stored
13  \param aVector One of the vectors to be sqrted
14  \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
15 */
16 static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22 
23  __m128 aVal, cVal;
24  for(;number < quarterPoints; number++){
25 
26  aVal = _mm_load_ps(aPtr);
27 
28  cVal = _mm_sqrt_ps(aVal);
29 
30  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
31 
32  aPtr += 4;
33  cPtr += 4;
34  }
35 
36  number = quarterPoints * 4;
37  for(;number < num_points; number++){
38  *cPtr++ = sqrtf(*aPtr++);
39  }
40 }
41 #endif /* LV_HAVE_SSE */
42 
43 #ifdef LV_HAVE_NEON
44 #include <arm_neon.h>
45 
46 /*!
47  \brief Sqrts the two input vectors and store their results in the third vector
48  \param cVector The vector where the results will be stored
49  \param aVector One of the vectors to be sqrted
50  \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
51 */
52 static inline void volk_32f_sqrt_32f_neon(float* cVector, const float* aVector, unsigned int num_points){
53  float* cPtr = cVector;
54  const float* aPtr = aVector;
55  unsigned int number = 0;
56  unsigned int quarter_points = num_points / 4;
57  float32x4_t in_vec, out_vec;
58 
59  for(number = 0; number < quarter_points; number++){
60  in_vec = vld1q_f32(aPtr);
61  // note that armv8 has vsqrt_f32 which will be much better
62  out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
63  vst1q_f32(cPtr, out_vec);
64  aPtr += 4;
65  cPtr += 4;
66  }
67 
68  for(number = quarter_points * 4; number < num_points; number++){
69  *cPtr++ = sqrtf(*aPtr++);
70  }
71 }
72 #endif /* LV_HAVE_NEON */
73 
74 #ifdef LV_HAVE_GENERIC
75 /*!
76  \brief Sqrts the two input vectors and store their results in the third vector
77  \param cVector The vector where the results will be stored
78  \param aVector One of the vectors to be sqrted
79  \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
80 */
81 static inline void volk_32f_sqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
82  float* cPtr = cVector;
83  const float* aPtr = aVector;
84  unsigned int number = 0;
85 
86  for(number = 0; number < num_points; number++){
87  *cPtr++ = sqrtf(*aPtr++);
88  }
89 }
90 #endif /* LV_HAVE_GENERIC */
91 
92 #ifdef LV_HAVE_ORC
93 extern void volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int);
94 /*!
95  \brief Sqrts the two input vectors and store their results in the third vector
96  \param cVector The vector where the results will be stored
97  \param aVector One of the vectors to be sqrted
98  \param num_points The number of values in aVector and bVector to be sqrted together and stored into cVector
99 */
100 static inline void volk_32f_sqrt_32f_u_orc(float* cVector, const float* aVector, unsigned int num_points){
101  volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);
102 }
103 
104 #endif /* LV_HAVE_ORC */
105 
106 
107 
108 #endif /* INCLUDED_volk_32f_sqrt_32f_a_H */