doc/doxygen-3.6/volk__32f__s32f__stddev__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
00002 #define INCLUDED_volk_32f_s32f_stddev_32f_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007 #include <math.h>
00008
00009 #ifdef LV_HAVE_SSE4_1
00010 #include <smmintrin.h>
00011 /*!
00012   \brief Calculates the standard deviation of the input buffer using the supplied mean
00013   \param stddev The calculated standard deviation
00014   \param inputBuffer The buffer of points to calculate the std deviation for
00015   \param mean The mean of the input buffer
00016   \param num_points The number of values in input buffer to used in the stddev calculation
00017 */
00018 static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00019   float returnValue = 0;
00020   if(num_points > 0){
00021     unsigned int number = 0;
00022     const unsigned int sixteenthPoints = num_points / 16;
00023
00024     const float* aPtr = inputBuffer;
00025
00026     __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
00027
00028     __m128 squareAccumulator = _mm_setzero_ps();
00029     __m128 aVal1, aVal2, aVal3, aVal4;
00030     __m128 cVal1, cVal2, cVal3, cVal4;
00031     for(;number < sixteenthPoints; number++) {
00032       aVal1 = _mm_load_ps(aPtr); aPtr += 4;
00033       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
00034
00035       aVal2 = _mm_load_ps(aPtr); aPtr += 4;
00036       cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
00037
00038       aVal3 = _mm_load_ps(aPtr); aPtr += 4;
00039       cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
00040
00041       aVal4 = _mm_load_ps(aPtr); aPtr += 4;
00042       cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
00043
00044       cVal1 = _mm_or_ps(cVal1, cVal2);
00045       cVal3 = _mm_or_ps(cVal3, cVal4);
00046       cVal1 = _mm_or_ps(cVal1, cVal3);
00047
00048       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
00049     }
00050     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
00051     returnValue = squareBuffer[0];
00052     returnValue += squareBuffer[1];
00053     returnValue += squareBuffer[2];
00054     returnValue += squareBuffer[3];
00055
00056     number = sixteenthPoints * 16;
00057     for(;number < num_points; number++){
00058       returnValue += (*aPtr) * (*aPtr);
00059       aPtr++;
00060     }
00061     returnValue /= num_points;
00062     returnValue -= (mean * mean);
00063     returnValue = sqrtf(returnValue);
00064   }
00065   *stddev = returnValue;
00066 }
00067 #endif /* LV_HAVE_SSE4_1 */
00068
00069 #ifdef LV_HAVE_SSE
00070 #include <xmmintrin.h>
00071 /*!
00072   \brief Calculates the standard deviation of the input buffer using the supplied mean
00073   \param stddev The calculated standard deviation
00074   \param inputBuffer The buffer of points to calculate the std deviation for
00075   \param mean The mean of the input buffer
00076   \param num_points The number of values in input buffer to used in the stddev calculation
00077 */
00078 static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00079   float returnValue = 0;
00080   if(num_points > 0){
00081     unsigned int number = 0;
00082     const unsigned int quarterPoints = num_points / 4;
00083
00084     const float* aPtr = inputBuffer;
00085
00086     __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
00087
00088     __m128 squareAccumulator = _mm_setzero_ps();
00089     __m128 aVal = _mm_setzero_ps();
00090     for(;number < quarterPoints; number++) {
00091       aVal = _mm_load_ps(aPtr);                     // aVal = x
00092       aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
00093       squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
00094       aPtr += 4;
00095     }
00096     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
00097     returnValue = squareBuffer[0];
00098     returnValue += squareBuffer[1];
00099     returnValue += squareBuffer[2];
00100     returnValue += squareBuffer[3];
00101
00102     number = quarterPoints * 4;
00103     for(;number < num_points; number++){
00104       returnValue += (*aPtr) * (*aPtr);
00105       aPtr++;
00106     }
00107     returnValue /= num_points;
00108     returnValue -= (mean * mean);
00109     returnValue = sqrtf(returnValue);
00110   }
00111   *stddev = returnValue;
00112 }
00113 #endif /* LV_HAVE_SSE */
00114
00115 #ifdef LV_HAVE_GENERIC
00116 /*!
00117   \brief Calculates the standard deviation of the input buffer using the supplied mean
00118   \param stddev The calculated standard deviation
00119   \param inputBuffer The buffer of points to calculate the std deviation for
00120   \param mean The mean of the input buffer
00121   \param num_points The number of values in input buffer to used in the stddev calculation
00122 */
00123 static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
00124   float returnValue = 0;
00125   if(num_points > 0){
00126     const float* aPtr = inputBuffer;
00127     unsigned int number = 0;
00128
00129     for(number = 0; number < num_points; number++){
00130       returnValue += (*aPtr) * (*aPtr);
00131       aPtr++;
00132     }
00133
00134     returnValue /= num_points;
00135     returnValue -= (mean * mean);
00136     returnValue = sqrtf(returnValue);
00137   }
00138   *stddev = returnValue;
00139 }
00140 #endif /* LV_HAVE_GENERIC */
00141
00142
00143
00144
00145 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */