doc/doxygen-3.6/volk__32f__stddev__and__mean__32f__x2__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
00002 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007 #include <math.h>
00008
00009 #ifdef LV_HAVE_SSE4_1
00010 #include <smmintrin.h>
00011 /*!
00012   \brief Calculates the standard deviation and mean of the input buffer
00013   \param stddev The calculated standard deviation
00014   \param mean The mean of the input buffer
00015   \param inputBuffer The buffer of points to calculate the std deviation for
00016   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00017 */
00018 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00019   float returnValue = 0;
00020   float newMean = 0;
00021   if(num_points > 0){
00022     unsigned int number = 0;
00023     const unsigned int sixteenthPoints = num_points / 16;
00024
00025     const float* aPtr = inputBuffer;
00026     __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
00027     __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
00028
00029     __m128 accumulator = _mm_setzero_ps();
00030     __m128 squareAccumulator = _mm_setzero_ps();
00031     __m128 aVal1, aVal2, aVal3, aVal4;
00032     __m128 cVal1, cVal2, cVal3, cVal4;
00033     for(;number < sixteenthPoints; number++) {
00034       aVal1 = _mm_load_ps(aPtr); aPtr += 4;
00035       cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
00036       accumulator = _mm_add_ps(accumulator, aVal1);  // accumulator += x
00037
00038       aVal2 = _mm_load_ps(aPtr); aPtr += 4;
00039       cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
00040       accumulator = _mm_add_ps(accumulator, aVal2);  // accumulator += x
00041
00042       aVal3 = _mm_load_ps(aPtr); aPtr += 4;
00043       cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
00044       accumulator = _mm_add_ps(accumulator, aVal3);  // accumulator += x
00045
00046       aVal4 = _mm_load_ps(aPtr); aPtr += 4;
00047       cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
00048       accumulator = _mm_add_ps(accumulator, aVal4);  // accumulator += x
00049
00050       cVal1 = _mm_or_ps(cVal1, cVal2);
00051       cVal3 = _mm_or_ps(cVal3, cVal4);
00052       cVal1 = _mm_or_ps(cVal1, cVal3);
00053
00054       squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
00055     }
00056     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
00057     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
00058     newMean = meanBuffer[0];
00059     newMean += meanBuffer[1];
00060     newMean += meanBuffer[2];
00061     newMean += meanBuffer[3];
00062     returnValue = squareBuffer[0];
00063     returnValue += squareBuffer[1];
00064     returnValue += squareBuffer[2];
00065     returnValue += squareBuffer[3];
00066
00067     number = sixteenthPoints * 16;
00068     for(;number < num_points; number++){
00069       returnValue += (*aPtr) * (*aPtr);
00070       newMean += *aPtr++;
00071     }
00072     newMean /= num_points;
00073     returnValue /= num_points;
00074     returnValue -= (newMean * newMean);
00075     returnValue = sqrtf(returnValue);
00076   }
00077   *stddev = returnValue;
00078   *mean = newMean;
00079 }
00080 #endif /* LV_HAVE_SSE4_1 */
00081
00082 #ifdef LV_HAVE_SSE
00083 #include <xmmintrin.h>
00084 /*!
00085   \brief Calculates the standard deviation and mean of the input buffer
00086   \param stddev The calculated standard deviation
00087   \param mean The mean of the input buffer
00088   \param inputBuffer The buffer of points to calculate the std deviation for
00089   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00090 */
00091 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00092   float returnValue = 0;
00093   float newMean = 0;
00094   if(num_points > 0){
00095     unsigned int number = 0;
00096     const unsigned int quarterPoints = num_points / 4;
00097
00098     const float* aPtr = inputBuffer;
00099     __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
00100     __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
00101
00102     __m128 accumulator = _mm_setzero_ps();
00103     __m128 squareAccumulator = _mm_setzero_ps();
00104     __m128 aVal = _mm_setzero_ps();
00105     for(;number < quarterPoints; number++) {
00106       aVal = _mm_load_ps(aPtr);                     // aVal = x
00107       accumulator = _mm_add_ps(accumulator, aVal);  // accumulator += x
00108       aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
00109       squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
00110       aPtr += 4;
00111     }
00112     _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
00113     _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
00114     newMean = meanBuffer[0];
00115     newMean += meanBuffer[1];
00116     newMean += meanBuffer[2];
00117     newMean += meanBuffer[3];
00118     returnValue = squareBuffer[0];
00119     returnValue += squareBuffer[1];
00120     returnValue += squareBuffer[2];
00121     returnValue += squareBuffer[3];
00122
00123     number = quarterPoints * 4;
00124     for(;number < num_points; number++){
00125       returnValue += (*aPtr) * (*aPtr);
00126       newMean += *aPtr++;
00127     }
00128     newMean /= num_points;
00129     returnValue /= num_points;
00130     returnValue -= (newMean * newMean);
00131     returnValue = sqrtf(returnValue);
00132   }
00133   *stddev = returnValue;
00134   *mean = newMean;
00135 }
00136 #endif /* LV_HAVE_SSE */
00137
00138 #ifdef LV_HAVE_GENERIC
00139 /*!
00140   \brief Calculates the standard deviation and mean of the input buffer
00141   \param stddev The calculated standard deviation
00142   \param mean The mean of the input buffer
00143   \param inputBuffer The buffer of points to calculate the std deviation for
00144   \param num_points The number of values in input buffer to used in the stddev and mean calculations
00145 */
00146 static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
00147   float returnValue = 0;
00148   float newMean = 0;
00149   if(num_points > 0){
00150     const float* aPtr = inputBuffer;
00151     unsigned int number = 0;
00152
00153     for(number = 0; number < num_points; number++){
00154       returnValue += (*aPtr) * (*aPtr);
00155       newMean += *aPtr++;
00156     }
00157     newMean /= num_points;
00158     returnValue /= num_points;
00159     returnValue -= (newMean * newMean);
00160     returnValue = sqrtf(returnValue);
00161   }
00162   *stddev = returnValue;
00163   *mean = newMean;
00164 }
00165 #endif /* LV_HAVE_GENERIC */
00166
00167
00168
00169
00170 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */