GNU Radio 3.7.3 C++ API
volk_32f_s32f_power_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
2 #define INCLUDED_volk_32f_s32f_power_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 #ifdef LV_HAVE_SSE4_1
9 #include <tmmintrin.h>
10 
11 #ifdef LV_HAVE_LIB_SIMDMATH
12 #include <simdmath.h>
13 #endif /* LV_HAVE_LIB_SIMDMATH */
14 
15 /*!
16  \brief Takes each the input vector value to the specified power and stores the results in the return vector
17  \param cVector The vector where the results will be stored
18  \param aVector The vector of values to be taken to a power
19  \param power The power value to be applied to each data point
20  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
21 */
22 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
23  unsigned int number = 0;
24 
25  float* cPtr = cVector;
26  const float* aPtr = aVector;
27 
28 #ifdef LV_HAVE_LIB_SIMDMATH
29  const unsigned int quarterPoints = num_points / 4;
30  __m128 vPower = _mm_set_ps1(power);
31  __m128 zeroValue = _mm_setzero_ps();
32  __m128 signMask;
33  __m128 negatedValues;
34  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
35  __m128 onesMask = _mm_set_ps1(1);
36 
37  __m128 aVal, cVal;
38  for(;number < quarterPoints; number++){
39 
40  aVal = _mm_load_ps(aPtr);
41  signMask = _mm_cmplt_ps(aVal, zeroValue);
42  negatedValues = _mm_sub_ps(zeroValue, aVal);
43  aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
44 
45  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
46  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
47 
48  cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
49 
50  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
51 
52  aPtr += 4;
53  cPtr += 4;
54  }
55 
56  number = quarterPoints * 4;
57 #endif /* LV_HAVE_LIB_SIMDMATH */
58 
59  for(;number < num_points; number++){
60  *cPtr++ = powf((*aPtr++), power);
61  }
62 }
63 #endif /* LV_HAVE_SSE4_1 */
64 
65 #ifdef LV_HAVE_SSE
66 #include <xmmintrin.h>
67 
68 #ifdef LV_HAVE_LIB_SIMDMATH
69 #include <simdmath.h>
70 #endif /* LV_HAVE_LIB_SIMDMATH */
71 
72 /*!
73  \brief Takes each the input vector value to the specified power and stores the results in the return vector
74  \param cVector The vector where the results will be stored
75  \param aVector The vector of values to be taken to a power
76  \param power The power value to be applied to each data point
77  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
78 */
79 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
80  unsigned int number = 0;
81 
82  float* cPtr = cVector;
83  const float* aPtr = aVector;
84 
85 #ifdef LV_HAVE_LIB_SIMDMATH
86  const unsigned int quarterPoints = num_points / 4;
87  __m128 vPower = _mm_set_ps1(power);
88  __m128 zeroValue = _mm_setzero_ps();
89  __m128 signMask;
90  __m128 negatedValues;
91  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
92  __m128 onesMask = _mm_set_ps1(1);
93 
94  __m128 aVal, cVal;
95  for(;number < quarterPoints; number++){
96 
97  aVal = _mm_load_ps(aPtr);
98  signMask = _mm_cmplt_ps(aVal, zeroValue);
99  negatedValues = _mm_sub_ps(zeroValue, aVal);
100  aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
101 
102  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
103  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
104 
105  cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
106 
107  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
108 
109  aPtr += 4;
110  cPtr += 4;
111  }
112 
113  number = quarterPoints * 4;
114 #endif /* LV_HAVE_LIB_SIMDMATH */
115 
116  for(;number < num_points; number++){
117  *cPtr++ = powf((*aPtr++), power);
118  }
119 }
120 #endif /* LV_HAVE_SSE */
121 
122 #ifdef LV_HAVE_GENERIC
123  /*!
124  \brief Takes each the input vector value to the specified power and stores the results in the return vector
125  \param cVector The vector where the results will be stored
126  \param aVector The vector of values to be taken to a power
127  \param power The power value to be applied to each data point
128  \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
129  */
130 static inline void volk_32f_s32f_power_32f_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
131  float* cPtr = cVector;
132  const float* aPtr = aVector;
133  unsigned int number = 0;
134 
135  for(number = 0; number < num_points; number++){
136  *cPtr++ = powf((*aPtr++), power);
137  }
138 }
139 #endif /* LV_HAVE_GENERIC */
140 
141 
142 
143 
144 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */