1 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
2 #define INCLUDED_volk_32f_s32f_power_32f_a_H
11 #ifdef LV_HAVE_LIB_SIMDMATH
22 static inline void volk_32f_s32f_power_32f_a_sse4_1(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
23 unsigned int number = 0;
25 float* cPtr = cVector;
26 const float* aPtr = aVector;
28 #ifdef LV_HAVE_LIB_SIMDMATH
29 const unsigned int quarterPoints = num_points / 4;
30 __m128 vPower = _mm_set_ps1(power);
31 __m128 zeroValue = _mm_setzero_ps();
34 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
35 __m128 onesMask = _mm_set_ps1(1);
38 for(;number < quarterPoints; number++){
40 aVal = _mm_load_ps(aPtr);
41 signMask = _mm_cmplt_ps(aVal, zeroValue);
42 negatedValues = _mm_sub_ps(zeroValue, aVal);
43 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
46 cVal = powf4(aVal, vPower);
48 cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
50 _mm_store_ps(cPtr,cVal);
56 number = quarterPoints * 4;
59 for(;number < num_points; number++){
60 *cPtr++ = powf((*aPtr++), power);
66 #include <xmmintrin.h>
68 #ifdef LV_HAVE_LIB_SIMDMATH
79 static inline void volk_32f_s32f_power_32f_a_sse(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
80 unsigned int number = 0;
82 float* cPtr = cVector;
83 const float* aPtr = aVector;
85 #ifdef LV_HAVE_LIB_SIMDMATH
86 const unsigned int quarterPoints = num_points / 4;
87 __m128 vPower = _mm_set_ps1(power);
88 __m128 zeroValue = _mm_setzero_ps();
91 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
92 __m128 onesMask = _mm_set_ps1(1);
95 for(;number < quarterPoints; number++){
97 aVal = _mm_load_ps(aPtr);
98 signMask = _mm_cmplt_ps(aVal, zeroValue);
99 negatedValues = _mm_sub_ps(zeroValue, aVal);
100 aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
103 cVal = powf4(aVal, vPower);
105 cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
107 _mm_store_ps(cPtr,cVal);
113 number = quarterPoints * 4;
116 for(;number < num_points; number++){
117 *cPtr++ = powf((*aPtr++), power);
122 #ifdef LV_HAVE_GENERIC
130 static inline void volk_32f_s32f_power_32f_generic(
float* cVector,
const float* aVector,
const float power,
unsigned int num_points){
131 float* cPtr = cVector;
132 const float* aPtr = aVector;
133 unsigned int number = 0;
135 for(number = 0; number < num_points; number++){
136 *cPtr++ = powf((*aPtr++), power);