GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H 00002 #define INCLUDED_volk_32f_s32f_stddev_32f_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 #include <math.h> 00008 00009 #ifdef LV_HAVE_SSE4_1 00010 #include <smmintrin.h> 00011 /*! 00012 \brief Calculates the standard deviation of the input buffer using the supplied mean 00013 \param stddev The calculated standard deviation 00014 \param inputBuffer The buffer of points to calculate the std deviation for 00015 \param mean The mean of the input buffer 00016 \param num_points The number of values in input buffer to used in the stddev calculation 00017 */ 00018 static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00019 float returnValue = 0; 00020 if(num_points > 0){ 00021 unsigned int number = 0; 00022 const unsigned int sixteenthPoints = num_points / 16; 00023 00024 const float* aPtr = inputBuffer; 00025 00026 __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; 00027 00028 __m128 squareAccumulator = _mm_setzero_ps(); 00029 __m128 aVal1, aVal2, aVal3, aVal4; 00030 __m128 cVal1, cVal2, cVal3, cVal4; 00031 for(;number < sixteenthPoints; number++) { 00032 aVal1 = _mm_load_ps(aPtr); aPtr += 4; 00033 cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1); 00034 00035 aVal2 = _mm_load_ps(aPtr); aPtr += 4; 00036 cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2); 00037 00038 aVal3 = _mm_load_ps(aPtr); aPtr += 4; 00039 cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4); 00040 00041 aVal4 = _mm_load_ps(aPtr); aPtr += 4; 00042 cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8); 00043 00044 cVal1 = _mm_or_ps(cVal1, cVal2); 00045 cVal3 = _mm_or_ps(cVal3, cVal4); 00046 cVal1 = _mm_or_ps(cVal1, cVal3); 00047 00048 squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2 00049 } 00050 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00051 returnValue = squareBuffer[0]; 00052 returnValue += squareBuffer[1]; 00053 returnValue += squareBuffer[2]; 00054 returnValue += squareBuffer[3]; 00055 00056 number = sixteenthPoints * 16; 00057 for(;number < num_points; number++){ 00058 returnValue += (*aPtr) * (*aPtr); 00059 aPtr++; 00060 } 00061 returnValue /= num_points; 00062 returnValue -= (mean * mean); 00063 returnValue = sqrtf(returnValue); 00064 } 00065 *stddev = returnValue; 00066 } 00067 #endif /* LV_HAVE_SSE4_1 */ 00068 00069 #ifdef LV_HAVE_SSE 00070 #include <xmmintrin.h> 00071 /*! 00072 \brief Calculates the standard deviation of the input buffer using the supplied mean 00073 \param stddev The calculated standard deviation 00074 \param inputBuffer The buffer of points to calculate the std deviation for 00075 \param mean The mean of the input buffer 00076 \param num_points The number of values in input buffer to used in the stddev calculation 00077 */ 00078 static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00079 float returnValue = 0; 00080 if(num_points > 0){ 00081 unsigned int number = 0; 00082 const unsigned int quarterPoints = num_points / 4; 00083 00084 const float* aPtr = inputBuffer; 00085 00086 __VOLK_ATTR_ALIGNED(16) float squareBuffer[4]; 00087 00088 __m128 squareAccumulator = _mm_setzero_ps(); 00089 __m128 aVal = _mm_setzero_ps(); 00090 for(;number < quarterPoints; number++) { 00091 aVal = _mm_load_ps(aPtr); // aVal = x 00092 aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2 00093 squareAccumulator = _mm_add_ps(squareAccumulator, aVal); 00094 aPtr += 4; 00095 } 00096 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00097 returnValue = squareBuffer[0]; 00098 returnValue += squareBuffer[1]; 00099 returnValue += squareBuffer[2]; 00100 returnValue += squareBuffer[3]; 00101 00102 number = quarterPoints * 4; 00103 for(;number < num_points; number++){ 00104 returnValue += (*aPtr) * (*aPtr); 00105 aPtr++; 00106 } 00107 returnValue /= num_points; 00108 returnValue -= (mean * mean); 00109 returnValue = sqrtf(returnValue); 00110 } 00111 *stddev = returnValue; 00112 } 00113 #endif /* LV_HAVE_SSE */ 00114 00115 #ifdef LV_HAVE_GENERIC 00116 /*! 00117 \brief Calculates the standard deviation of the input buffer using the supplied mean 00118 \param stddev The calculated standard deviation 00119 \param inputBuffer The buffer of points to calculate the std deviation for 00120 \param mean The mean of the input buffer 00121 \param num_points The number of values in input buffer to used in the stddev calculation 00122 */ 00123 static inline void volk_32f_s32f_stddev_32f_a_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00124 float returnValue = 0; 00125 if(num_points > 0){ 00126 const float* aPtr = inputBuffer; 00127 unsigned int number = 0; 00128 00129 for(number = 0; number < num_points; number++){ 00130 returnValue += (*aPtr) * (*aPtr); 00131 aPtr++; 00132 } 00133 00134 returnValue /= num_points; 00135 returnValue -= (mean * mean); 00136 returnValue = sqrtf(returnValue); 00137 } 00138 *stddev = returnValue; 00139 } 00140 #endif /* LV_HAVE_GENERIC */ 00141 00142 00143 00144 00145 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */