doc/doxygen-3.6/volk__32f__s32f__power__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_power_32f_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <tmmintrin.h>
00010
00011 #ifdef LV_HAVE_LIB_SIMDMATH
00012 #include <simdmath.h>
00013 #endif /* LV_HAVE_LIB_SIMDMATH */
00014
00015 /*!
00016   \brief Takes each the input vector value to the specified power and stores the results in the return vector
00017   \param cVector The vector where the results will be stored
00018   \param aVector The vector of values to be taken to a power
00019   \param power The power value to be applied to each data point
00020   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00021 */
00022 static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector, const float power, unsigned int num_points){
00023   unsigned int number = 0;
00024
00025   float* cPtr = cVector;
00026   const float* aPtr = aVector;
00027
00028 #ifdef LV_HAVE_LIB_SIMDMATH
00029   const unsigned int quarterPoints = num_points / 4;
00030   __m128 vPower = _mm_set_ps1(power);
00031   __m128 zeroValue = _mm_setzero_ps();
00032   __m128 signMask;
00033   __m128 negatedValues;
00034   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
00035   __m128 onesMask = _mm_set_ps1(1);
00036
00037   __m128 aVal, cVal;
00038   for(;number < quarterPoints; number++){
00039
00040     aVal = _mm_load_ps(aPtr);
00041     signMask = _mm_cmplt_ps(aVal, zeroValue);
00042     negatedValues = _mm_sub_ps(zeroValue, aVal);
00043     aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
00044
00045     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
00046     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
00047
00048     cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
00049
00050     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00051
00052     aPtr += 4;
00053     cPtr += 4;
00054   }
00055
00056   number = quarterPoints * 4;
00057 #endif /* LV_HAVE_LIB_SIMDMATH */
00058
00059   for(;number < num_points; number++){
00060     *cPtr++ = powf((*aPtr++), power);
00061   }
00062 }
00063 #endif /* LV_HAVE_SSE4_1 */
00064
00065 #ifdef LV_HAVE_SSE
00066 #include <xmmintrin.h>
00067
00068 #ifdef LV_HAVE_LIB_SIMDMATH
00069 #include <simdmath.h>
00070 #endif /* LV_HAVE_LIB_SIMDMATH */
00071
00072 /*!
00073   \brief Takes each the input vector value to the specified power and stores the results in the return vector
00074   \param cVector The vector where the results will be stored
00075   \param aVector The vector of values to be taken to a power
00076   \param power The power value to be applied to each data point
00077   \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00078 */
00079 static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector, const float power, unsigned int num_points){
00080   unsigned int number = 0;
00081
00082   float* cPtr = cVector;
00083   const float* aPtr = aVector;
00084
00085 #ifdef LV_HAVE_LIB_SIMDMATH
00086   const unsigned int quarterPoints = num_points / 4;
00087   __m128 vPower = _mm_set_ps1(power);
00088   __m128 zeroValue = _mm_setzero_ps();
00089   __m128 signMask;
00090   __m128 negatedValues;
00091   __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
00092   __m128 onesMask = _mm_set_ps1(1);
00093
00094   __m128 aVal, cVal;
00095   for(;number < quarterPoints; number++){
00096
00097     aVal = _mm_load_ps(aPtr);
00098     signMask = _mm_cmplt_ps(aVal, zeroValue);
00099     negatedValues = _mm_sub_ps(zeroValue, aVal);
00100     aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
00101
00102     // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
00103     cVal = powf4(aVal, vPower); // Takes each input value to the specified power
00104
00105     cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
00106
00107     _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00108
00109     aPtr += 4;
00110     cPtr += 4;
00111   }
00112
00113   number = quarterPoints * 4;
00114 #endif /* LV_HAVE_LIB_SIMDMATH */
00115
00116   for(;number < num_points; number++){
00117     *cPtr++ = powf((*aPtr++), power);
00118   }
00119 }
00120 #endif /* LV_HAVE_SSE */
00121
00122 #ifdef LV_HAVE_GENERIC
00123   /*!
00124     \brief Takes each the input vector value to the specified power and stores the results in the return vector
00125     \param cVector The vector where the results will be stored
00126     \param aVector The vector of values to be taken to a power
00127     \param power The power value to be applied to each data point
00128     \param num_points The number of values in aVector to be taken to the specified power level and stored into cVector
00129   */
00130 static inline void volk_32f_s32f_power_32f_a_generic(float* cVector, const float* aVector, const float power, unsigned int num_points){
00131   float* cPtr = cVector;
00132   const float* aPtr = aVector;
00133   unsigned int number = 0;
00134
00135   for(number = 0; number < num_points; number++){
00136     *cPtr++ = powf((*aPtr++), power);
00137   }
00138 }
00139 #endif /* LV_HAVE_GENERIC */
00140
00141
00142
00143
00144 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */