GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H 00002 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 #include <math.h> 00008 00009 #ifdef LV_HAVE_SSE4_1 00010 #include <smmintrin.h> 00011 /*! 00012 \brief Calculates the standard deviation and mean of the input buffer 00013 \param stddev The calculated standard deviation 00014 \param mean The mean of the input buffer 00015 \param inputBuffer The buffer of points to calculate the std deviation for 00016 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00017 */ 00018 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00019 float returnValue = 0; 00020 float newMean = 0; 00021 if(num_points > 0){ 00022 unsigned int number = 0; 00023 const unsigned int sixteenthPoints = num_points / 16; 00024 00025 const float* aPtr = inputBuffer; 00026 __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; 00027 __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; 00028 00029 __m128 accumulator = _mm_setzero_ps(); 00030 __m128 squareAccumulator = _mm_setzero_ps(); 00031 __m128 aVal1, aVal2, aVal3, aVal4; 00032 __m128 cVal1, cVal2, cVal3, cVal4; 00033 for(;number < sixteenthPoints; number++) { 00034 aVal1 = _mm_load_ps(aPtr); aPtr += 4; 00035 cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1); 00036 accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x 00037 00038 aVal2 = _mm_load_ps(aPtr); aPtr += 4; 00039 cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2); 00040 accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x 00041 00042 aVal3 = _mm_load_ps(aPtr); aPtr += 4; 00043 cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4); 00044 accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x 00045 00046 aVal4 = _mm_load_ps(aPtr); aPtr += 4; 00047 cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8); 00048 accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x 00049 00050 cVal1 = _mm_or_ps(cVal1, cVal2); 00051 cVal3 = _mm_or_ps(cVal3, cVal4); 00052 cVal1 = _mm_or_ps(cVal1, cVal3); 00053 00054 squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2 00055 } 00056 _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container 00057 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00058 newMean = meanBuffer[0]; 00059 newMean += meanBuffer[1]; 00060 newMean += meanBuffer[2]; 00061 newMean += meanBuffer[3]; 00062 returnValue = squareBuffer[0]; 00063 returnValue += squareBuffer[1]; 00064 returnValue += squareBuffer[2]; 00065 returnValue += squareBuffer[3]; 00066 00067 number = sixteenthPoints * 16; 00068 for(;number < num_points; number++){ 00069 returnValue += (*aPtr) * (*aPtr); 00070 newMean += *aPtr++; 00071 } 00072 newMean /= num_points; 00073 returnValue /= num_points; 00074 returnValue -= (newMean * newMean); 00075 returnValue = sqrtf(returnValue); 00076 } 00077 *stddev = returnValue; 00078 *mean = newMean; 00079 } 00080 #endif /* LV_HAVE_SSE4_1 */ 00081 00082 #ifdef LV_HAVE_SSE 00083 #include <xmmintrin.h> 00084 /*! 00085 \brief Calculates the standard deviation and mean of the input buffer 00086 \param stddev The calculated standard deviation 00087 \param mean The mean of the input buffer 00088 \param inputBuffer The buffer of points to calculate the std deviation for 00089 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00090 */ 00091 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00092 float returnValue = 0; 00093 float newMean = 0; 00094 if(num_points > 0){ 00095 unsigned int number = 0; 00096 const unsigned int quarterPoints = num_points / 4; 00097 00098 const float* aPtr = inputBuffer; 00099 __VOLK_ATTR_ALIGNED(16) float meanBuffer[4]; 00100 __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; 00101 00102 __m128 accumulator = _mm_setzero_ps(); 00103 __m128 squareAccumulator = _mm_setzero_ps(); 00104 __m128 aVal = _mm_setzero_ps(); 00105 for(;number < quarterPoints; number++) { 00106 aVal = _mm_load_ps(aPtr); // aVal = x 00107 accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x 00108 aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2 00109 squareAccumulator = _mm_add_ps(squareAccumulator, aVal); 00110 aPtr += 4; 00111 } 00112 _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container 00113 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00114 newMean = meanBuffer[0]; 00115 newMean += meanBuffer[1]; 00116 newMean += meanBuffer[2]; 00117 newMean += meanBuffer[3]; 00118 returnValue = squareBuffer[0]; 00119 returnValue += squareBuffer[1]; 00120 returnValue += squareBuffer[2]; 00121 returnValue += squareBuffer[3]; 00122 00123 number = quarterPoints * 4; 00124 for(;number < num_points; number++){ 00125 returnValue += (*aPtr) * (*aPtr); 00126 newMean += *aPtr++; 00127 } 00128 newMean /= num_points; 00129 returnValue /= num_points; 00130 returnValue -= (newMean * newMean); 00131 returnValue = sqrtf(returnValue); 00132 } 00133 *stddev = returnValue; 00134 *mean = newMean; 00135 } 00136 #endif /* LV_HAVE_SSE */ 00137 00138 #ifdef LV_HAVE_GENERIC 00139 /*! 00140 \brief Calculates the standard deviation and mean of the input buffer 00141 \param stddev The calculated standard deviation 00142 \param mean The mean of the input buffer 00143 \param inputBuffer The buffer of points to calculate the std deviation for 00144 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00145 */ 00146 static inline void volk_32f_stddev_and_mean_32f_x2_a_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00147 float returnValue = 0; 00148 float newMean = 0; 00149 if(num_points > 0){ 00150 const float* aPtr = inputBuffer; 00151 unsigned int number = 0; 00152 00153 for(number = 0; number < num_points; number++){ 00154 returnValue += (*aPtr) * (*aPtr); 00155 newMean += *aPtr++; 00156 } 00157 newMean /= num_points; 00158 returnValue /= num_points; 00159 returnValue -= (newMean * newMean); 00160 returnValue = sqrtf(returnValue); 00161 } 00162 *stddev = returnValue; 00163 *mean = newMean; 00164 } 00165 #endif /* LV_HAVE_GENERIC */ 00166 00167 00168 00169 00170 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */