GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H 00002 #define INCLUDED_volk_32f_s32f_power_32f_a_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #ifdef LV_HAVE_SSE4_1 00009 #include <tmmintrin.h> 00010 00011 #ifdef LV_HAVE_LIB_SIMDMATH 00012 #include <simdmath.h> 00013 #endif /* LV_HAVE_LIB_SIMDMATH */ 00014 00015 /*! 00016 \brief Takes each the input vector value to the specified power and stores the results in the return vector 00017 \param cVector The vector where the results will be stored 00018 \param aVector The vector of values to be taken to a power 00019 \param power The power value to be applied to each data point 00020 \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector 00021 */ 00022 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){ 00023 unsigned int number = 0; 00024 00025 float* cPtr = cVector; 00026 const float* aPtr = aVector; 00027 00028 #ifdef LV_HAVE_LIB_SIMDMATH 00029 const unsigned int quarterPoints = num_points / 4; 00030 __m128 vPower = _mm_set_ps1(power); 00031 __m128 zeroValue = _mm_setzero_ps(); 00032 __m128 signMask; 00033 __m128 negatedValues; 00034 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); 00035 __m128 onesMask = _mm_set_ps1(1); 00036 00037 __m128 aVal, cVal; 00038 for(;number < quarterPoints; number++){ 00039 00040 aVal = _mm_load_ps(aPtr); 00041 signMask = _mm_cmplt_ps(aVal, zeroValue); 00042 negatedValues = _mm_sub_ps(zeroValue, aVal); 00043 aVal = _mm_blendv_ps(aVal, negatedValues, signMask); 00044 00045 // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after 00046 cVal = powf4(aVal, vPower); // Takes each input value to the specified power 00047 00048 cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal); 00049 00050 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00051 00052 aPtr += 4; 00053 cPtr += 4; 00054 } 00055 00056 number = quarterPoints * 4; 00057 #endif /* LV_HAVE_LIB_SIMDMATH */ 00058 00059 for(;number < num_points; number++){ 00060 *cPtr++ = powf((*aPtr++), power); 00061 } 00062 } 00063 #endif /* LV_HAVE_SSE4_1 */ 00064 00065 #ifdef LV_HAVE_SSE 00066 #include <xmmintrin.h> 00067 00068 #ifdef LV_HAVE_LIB_SIMDMATH 00069 #include <simdmath.h> 00070 #endif /* LV_HAVE_LIB_SIMDMATH */ 00071 00072 /*! 00073 \brief Takes each the input vector value to the specified power and stores the results in the return vector 00074 \param cVector The vector where the results will be stored 00075 \param aVector The vector of values to be taken to a power 00076 \param power The power value to be applied to each data point 00077 \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector 00078 */ 00079 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){ 00080 unsigned int number = 0; 00081 00082 float* cPtr = cVector; 00083 const float* aPtr = aVector; 00084 00085 #ifdef LV_HAVE_LIB_SIMDMATH 00086 const unsigned int quarterPoints = num_points / 4; 00087 __m128 vPower = _mm_set_ps1(power); 00088 __m128 zeroValue = _mm_setzero_ps(); 00089 __m128 signMask; 00090 __m128 negatedValues; 00091 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); 00092 __m128 onesMask = _mm_set_ps1(1); 00093 00094 __m128 aVal, cVal; 00095 for(;number < quarterPoints; number++){ 00096 00097 aVal = _mm_load_ps(aPtr); 00098 signMask = _mm_cmplt_ps(aVal, zeroValue); 00099 negatedValues = _mm_sub_ps(zeroValue, aVal); 00100 aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) ); 00101 00102 // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after 00103 cVal = powf4(aVal, vPower); // Takes each input value to the specified power 00104 00105 cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal); 00106 00107 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00108 00109 aPtr += 4; 00110 cPtr += 4; 00111 } 00112 00113 number = quarterPoints * 4; 00114 #endif /* LV_HAVE_LIB_SIMDMATH */ 00115 00116 for(;number < num_points; number++){ 00117 *cPtr++ = powf((*aPtr++), power); 00118 } 00119 } 00120 #endif /* LV_HAVE_SSE */ 00121 00122 #ifdef LV_HAVE_GENERIC 00123 /*! 00124 \brief Takes each the input vector value to the specified power and stores the results in the return vector 00125 \param cVector The vector where the results will be stored 00126 \param aVector The vector of values to be taken to a power 00127 \param power The power value to be applied to each data point 00128 \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector 00129 */ 00130 static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){ 00131 float* cPtr = cVector; 00132 const float* aPtr = aVector; 00133 unsigned int number = 0; 00134 00135 for(number = 0; number < num_points; number++){ 00136 *cPtr++ = powf((*aPtr++), power); 00137 } 00138 } 00139 #endif /* LV_HAVE_GENERIC */ 00140 00141 00142 00143 00144 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */