1 #ifndef INCLUDED_volk_32f_sqrt_32f_a_H
2 #define INCLUDED_volk_32f_sqrt_32f_a_H
16 static inline void volk_32f_sqrt_32f_a_sse(
float* cVector,
const float* aVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 float* cPtr = cVector;
21 const float* aPtr = aVector;
24 for(;number < quarterPoints; number++){
26 aVal = _mm_load_ps(aPtr);
28 cVal = _mm_sqrt_ps(aVal);
30 _mm_store_ps(cPtr,cVal);
36 number = quarterPoints * 4;
37 for(;number < num_points; number++){
38 *cPtr++ = sqrtf(*aPtr++);
52 static inline void volk_32f_sqrt_32f_neon(
float* cVector,
const float* aVector,
unsigned int num_points){
53 float* cPtr = cVector;
54 const float* aPtr = aVector;
55 unsigned int number = 0;
56 unsigned int quarter_points = num_points / 4;
57 float32x4_t in_vec, out_vec;
59 for(number = 0; number < quarter_points; number++){
60 in_vec = vld1q_f32(aPtr);
62 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
63 vst1q_f32(cPtr, out_vec);
68 for(number = quarter_points * 4; number < num_points; number++){
69 *cPtr++ = sqrtf(*aPtr++);
74 #ifdef LV_HAVE_GENERIC
81 static inline void volk_32f_sqrt_32f_generic(
float* cVector,
const float* aVector,
unsigned int num_points){
82 float* cPtr = cVector;
83 const float* aPtr = aVector;
84 unsigned int number = 0;
86 for(number = 0; number < num_points; number++){
87 *cPtr++ = sqrtf(*aPtr++);
93 extern void volk_32f_sqrt_32f_a_orc_impl(
float *,
const float*,
unsigned int);
100 static inline void volk_32f_sqrt_32f_u_orc(
float* cVector,
const float* aVector,
unsigned int num_points){
101 volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);