1 #ifndef INCLUDED_volk_32f_x2_max_32f_a_H
2 #define INCLUDED_volk_32f_x2_max_32f_a_H
16 static inline void volk_32f_x2_max_32f_a_sse(
float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 float* cPtr = cVector;
21 const float* aPtr = aVector;
22 const float* bPtr= bVector;
24 __m128 aVal, bVal, cVal;
25 for(;number < quarterPoints; number++){
27 aVal = _mm_load_ps(aPtr);
28 bVal = _mm_load_ps(bPtr);
30 cVal = _mm_max_ps(aVal, bVal);
32 _mm_store_ps(cPtr,cVal);
39 number = quarterPoints * 4;
40 for(;number < num_points; number++){
41 const float a = *aPtr++;
42 const float b = *bPtr++;
43 *cPtr++ = ( a > b ? a : b);
57 static inline void volk_32f_x2_max_32f_neon(
float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points){
58 unsigned int quarter_points = num_points / 4;
59 float* cPtr = cVector;
60 const float* aPtr = aVector;
61 const float* bPtr= bVector;
62 unsigned int number = 0;
64 float32x4_t a_vec, b_vec, c_vec;
65 for(number = 0; number < quarter_points; number++){
66 a_vec = vld1q_f32(aPtr);
67 b_vec = vld1q_f32(bPtr);
68 c_vec = vmaxq_f32(a_vec, b_vec);
69 vst1q_f32(cPtr, c_vec);
75 for(number = quarter_points*4; number < num_points; number++){
76 const float a = *aPtr++;
77 const float b = *bPtr++;
78 *cPtr++ = ( a > b ? a : b);
84 #ifdef LV_HAVE_GENERIC
92 static inline void volk_32f_x2_max_32f_generic(
float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points){
93 float* cPtr = cVector;
94 const float* aPtr = aVector;
95 const float* bPtr= bVector;
96 unsigned int number = 0;
98 for(number = 0; number < num_points; number++){
99 const float a = *aPtr++;
100 const float b = *bPtr++;
101 *cPtr++ = ( a > b ? a : b);
114 extern void volk_32f_x2_max_32f_a_orc_impl(
float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points);
115 static inline void volk_32f_x2_max_32f_u_orc(
float* cVector,
const float* aVector,
const float* bVector,
unsigned int num_points){
116 volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points);