GNU Radio 3.7.3 C++ API
volk_32f_stddev_and_mean_32f_x2.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
2 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
3 
4 #include <volk/volk_common.h>
5 #include <inttypes.h>
6 #include <stdio.h>
7 #include <math.h>
8 
9 #ifdef LV_HAVE_SSE4_1
10 #include <smmintrin.h>
11 /*!
12  \brief Calculates the standard deviation and mean of the input buffer
13  \param stddev The calculated standard deviation
14  \param mean The mean of the input buffer
15  \param inputBuffer The buffer of points to calculate the std deviation for
16  \param num_points The number of values in input buffer to used in the stddev and mean calculations
17 */
18 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
19  float returnValue = 0;
20  float newMean = 0;
21  if(num_points > 0){
22  unsigned int number = 0;
23  const unsigned int sixteenthPoints = num_points / 16;
24 
25  const float* aPtr = inputBuffer;
26  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
27  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
28 
29  __m128 accumulator = _mm_setzero_ps();
30  __m128 squareAccumulator = _mm_setzero_ps();
31  __m128 aVal1, aVal2, aVal3, aVal4;
32  __m128 cVal1, cVal2, cVal3, cVal4;
33  for(;number < sixteenthPoints; number++) {
34  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
35  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
36  accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x
37 
38  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
39  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
40  accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x
41 
42  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
43  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
44  accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x
45 
46  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
47  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
48  accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x
49 
50  cVal1 = _mm_or_ps(cVal1, cVal2);
51  cVal3 = _mm_or_ps(cVal3, cVal4);
52  cVal1 = _mm_or_ps(cVal1, cVal3);
53 
54  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
55  }
56  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
57  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
58  newMean = meanBuffer[0];
59  newMean += meanBuffer[1];
60  newMean += meanBuffer[2];
61  newMean += meanBuffer[3];
62  returnValue = squareBuffer[0];
63  returnValue += squareBuffer[1];
64  returnValue += squareBuffer[2];
65  returnValue += squareBuffer[3];
66 
67  number = sixteenthPoints * 16;
68  for(;number < num_points; number++){
69  returnValue += (*aPtr) * (*aPtr);
70  newMean += *aPtr++;
71  }
72  newMean /= num_points;
73  returnValue /= num_points;
74  returnValue -= (newMean * newMean);
75  returnValue = sqrtf(returnValue);
76  }
77  *stddev = returnValue;
78  *mean = newMean;
79 }
80 #endif /* LV_HAVE_SSE4_1 */
81 
82 #ifdef LV_HAVE_SSE
83 #include <xmmintrin.h>
84 /*!
85  \brief Calculates the standard deviation and mean of the input buffer
86  \param stddev The calculated standard deviation
87  \param mean The mean of the input buffer
88  \param inputBuffer The buffer of points to calculate the std deviation for
89  \param num_points The number of values in input buffer to used in the stddev and mean calculations
90 */
91 static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
92  float returnValue = 0;
93  float newMean = 0;
94  if(num_points > 0){
95  unsigned int number = 0;
96  const unsigned int quarterPoints = num_points / 4;
97 
98  const float* aPtr = inputBuffer;
99  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
100  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
101 
102  __m128 accumulator = _mm_setzero_ps();
103  __m128 squareAccumulator = _mm_setzero_ps();
104  __m128 aVal = _mm_setzero_ps();
105  for(;number < quarterPoints; number++) {
106  aVal = _mm_load_ps(aPtr); // aVal = x
107  accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x
108  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
109  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
110  aPtr += 4;
111  }
112  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
113  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
114  newMean = meanBuffer[0];
115  newMean += meanBuffer[1];
116  newMean += meanBuffer[2];
117  newMean += meanBuffer[3];
118  returnValue = squareBuffer[0];
119  returnValue += squareBuffer[1];
120  returnValue += squareBuffer[2];
121  returnValue += squareBuffer[3];
122 
123  number = quarterPoints * 4;
124  for(;number < num_points; number++){
125  returnValue += (*aPtr) * (*aPtr);
126  newMean += *aPtr++;
127  }
128  newMean /= num_points;
129  returnValue /= num_points;
130  returnValue -= (newMean * newMean);
131  returnValue = sqrtf(returnValue);
132  }
133  *stddev = returnValue;
134  *mean = newMean;
135 }
136 #endif /* LV_HAVE_SSE */
137 
138 #ifdef LV_HAVE_GENERIC
139 /*!
140  \brief Calculates the standard deviation and mean of the input buffer
141  \param stddev The calculated standard deviation
142  \param mean The mean of the input buffer
143  \param inputBuffer The buffer of points to calculate the std deviation for
144  \param num_points The number of values in input buffer to used in the stddev and mean calculations
145 */
146 static inline void volk_32f_stddev_and_mean_32f_x2_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){
147  float returnValue = 0;
148  float newMean = 0;
149  if(num_points > 0){
150  const float* aPtr = inputBuffer;
151  unsigned int number = 0;
152 
153  for(number = 0; number < num_points; number++){
154  returnValue += (*aPtr) * (*aPtr);
155  newMean += *aPtr++;
156  }
157  newMean /= num_points;
158  returnValue /= num_points;
159  returnValue -= (newMean * newMean);
160  returnValue = sqrtf(returnValue);
161  }
162  *stddev = returnValue;
163  *mean = newMean;
164 }
165 #endif /* LV_HAVE_GENERIC */
166 
167 
168 
169 
170 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27