GNU Radio 3.5.3.2 C++ API
volk_32f_s32f_power_32f_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_power_32f_a_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007 
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <tmmintrin.h>
00010 
00011 #ifdef LV_HAVE_LIB_SIMDMATH
00012 #include <simdmath.h>
00013 #endif /* LV_HAVE_LIB_SIMDMATH */
00014 
00015 /*!
00016   \brief Takes each the input vector value to the specified power and stores the results in the return vector
00017   \param cVector The vector where the results will be stored
00018   \param aVector The vector of values to be taken to a power
00019   \param power The power value to be applied to each data point
00020   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00021 */
00022 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
00023   unsigned int number = 0;
00024   
00025   float* cPtr = cVector;
00026   const float* aPtr = aVector;
00027 
00028 #ifdef LV_HAVE_LIB_SIMDMATH
00029   const unsigned int quarterPoints = num_points / 4;
00030   __m128 vPower = _mm_set_ps1(power);
00031   __m128 zeroValue = _mm_setzero_ps();
00032   __m128 signMask;
00033   __m128 negatedValues;
00034   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
00035   __m128 onesMask = _mm_set_ps1(1);
00036   
00037   __m128 aVal, cVal;
00038   for(;number < quarterPoints; number++){
00039     
00040     aVal = _mm_load_ps(aPtr);
00041     signMask = _mm_cmplt_ps(aVal, zeroValue);
00042     negatedValues = _mm_sub_ps(zeroValue, aVal);
00043     aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
00044     
00045     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
00046     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
00047 
00048     cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
00049 
00050     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00051     
00052     aPtr += 4;
00053     cPtr += 4;
00054   }
00055 
00056   number = quarterPoints * 4;
00057 #endif /* LV_HAVE_LIB_SIMDMATH */
00058 
00059   for(;number < num_points; number++){
00060     *cPtr++ = powf((*aPtr++), power);
00061   }
00062 }
00063 #endif /* LV_HAVE_SSE4_1 */
00064 
00065 #ifdef LV_HAVE_SSE
00066 #include <xmmintrin.h>
00067 
00068 #ifdef LV_HAVE_LIB_SIMDMATH
00069 #include <simdmath.h>
00070 #endif /* LV_HAVE_LIB_SIMDMATH */
00071 
00072 /*!
00073   \brief Takes each the input vector value to the specified power and stores the results in the return vector
00074   \param cVector The vector where the results will be stored
00075   \param aVector The vector of values to be taken to a power
00076   \param power The power value to be applied to each data point
00077   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00078 */
00079 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
00080   unsigned int number = 0;
00081   
00082   float* cPtr = cVector;
00083   const float* aPtr = aVector;
00084 
00085 #ifdef LV_HAVE_LIB_SIMDMATH
00086   const unsigned int quarterPoints = num_points / 4;
00087   __m128 vPower = _mm_set_ps1(power);
00088   __m128 zeroValue = _mm_setzero_ps();
00089   __m128 signMask;
00090   __m128 negatedValues;
00091   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
00092   __m128 onesMask = _mm_set_ps1(1);
00093   
00094   __m128 aVal, cVal;
00095   for(;number < quarterPoints; number++){
00096     
00097     aVal = _mm_load_ps(aPtr);
00098     signMask = _mm_cmplt_ps(aVal, zeroValue);
00099     negatedValues = _mm_sub_ps(zeroValue, aVal);
00100     aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
00101     
00102     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
00103     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
00104 
00105     cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
00106 
00107     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00108     
00109     aPtr += 4;
00110     cPtr += 4;
00111   }
00112 
00113   number = quarterPoints * 4;
00114 #endif /* LV_HAVE_LIB_SIMDMATH */
00115 
00116   for(;number < num_points; number++){
00117     *cPtr++ = powf((*aPtr++), power);
00118   }
00119 }
00120 #endif /* LV_HAVE_SSE */
00121 
00122 #ifdef LV_HAVE_GENERIC
00123   /*!
00124     \brief Takes each the input vector value to the specified power and stores the results in the return vector
00125     \param cVector The vector where the results will be stored
00126     \param aVector The vector of values to be taken to a power
00127     \param power The power value to be applied to each data point
00128     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00129   */
00130 static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
00131   float* cPtr = cVector;
00132   const float* aPtr = aVector;
00133   unsigned int number = 0;
00134 
00135   for(number = 0; number < num_points; number++){
00136     *cPtr++ = powf((*aPtr++), power);
00137   }
00138 }
00139 #endif /* LV_HAVE_GENERIC */
00140 
00141 
00142 
00143 
00144 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */