GNU Radio Manual and C++ API Reference  3.7.2.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_s32f_stddev_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
2 #define INCLUDED_volk_32f_s32f_stddev_32f_a_H
3 
4 #include <volk/volk_common.h>
5 #include <inttypes.h>
6 #include <stdio.h>
7 #include <math.h>
8 
9 #ifdef LV_HAVE_SSE4_1
10 #include <smmintrin.h>
11 /*!
12  \brief Calculates the standard deviation of the input buffer using the supplied mean
13  \param stddev The calculated standard deviation
14  \param inputBuffer The buffer of points to calculate the std deviation for
15  \param mean The mean of the input buffer
16  \param num_points The number of values in input buffer to used in the stddev calculation
17 */
18 static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
19  float returnValue = 0;
20  if(num_points > 0){
21  unsigned int number = 0;
22  const unsigned int sixteenthPoints = num_points / 16;
23 
24  const float* aPtr = inputBuffer;
25 
26  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
27 
28  __m128 squareAccumulator = _mm_setzero_ps();
29  __m128 aVal1, aVal2, aVal3, aVal4;
30  __m128 cVal1, cVal2, cVal3, cVal4;
31  for(;number < sixteenthPoints; number++) {
32  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
33  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
34 
35  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
36  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
37 
38  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
39  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
40 
41  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
42  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
43 
44  cVal1 = _mm_or_ps(cVal1, cVal2);
45  cVal3 = _mm_or_ps(cVal3, cVal4);
46  cVal1 = _mm_or_ps(cVal1, cVal3);
47 
48  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
49  }
50  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
51  returnValue = squareBuffer[0];
52  returnValue += squareBuffer[1];
53  returnValue += squareBuffer[2];
54  returnValue += squareBuffer[3];
55 
56  number = sixteenthPoints * 16;
57  for(;number < num_points; number++){
58  returnValue += (*aPtr) * (*aPtr);
59  aPtr++;
60  }
61  returnValue /= num_points;
62  returnValue -= (mean * mean);
63  returnValue = sqrtf(returnValue);
64  }
65  *stddev = returnValue;
66 }
67 #endif /* LV_HAVE_SSE4_1 */
68 
69 #ifdef LV_HAVE_SSE
70 #include <xmmintrin.h>
71 /*!
72  \brief Calculates the standard deviation of the input buffer using the supplied mean
73  \param stddev The calculated standard deviation
74  \param inputBuffer The buffer of points to calculate the std deviation for
75  \param mean The mean of the input buffer
76  \param num_points The number of values in input buffer to used in the stddev calculation
77 */
78 static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
79  float returnValue = 0;
80  if(num_points > 0){
81  unsigned int number = 0;
82  const unsigned int quarterPoints = num_points / 4;
83 
84  const float* aPtr = inputBuffer;
85 
86  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
87 
88  __m128 squareAccumulator = _mm_setzero_ps();
89  __m128 aVal = _mm_setzero_ps();
90  for(;number < quarterPoints; number++) {
91  aVal = _mm_load_ps(aPtr); // aVal = x
92  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
93  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
94  aPtr += 4;
95  }
96  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
97  returnValue = squareBuffer[0];
98  returnValue += squareBuffer[1];
99  returnValue += squareBuffer[2];
100  returnValue += squareBuffer[3];
101 
102  number = quarterPoints * 4;
103  for(;number < num_points; number++){
104  returnValue += (*aPtr) * (*aPtr);
105  aPtr++;
106  }
107  returnValue /= num_points;
108  returnValue -= (mean * mean);
109  returnValue = sqrtf(returnValue);
110  }
111  *stddev = returnValue;
112 }
113 #endif /* LV_HAVE_SSE */
114 
115 #ifdef LV_HAVE_GENERIC
116 /*!
117  \brief Calculates the standard deviation of the input buffer using the supplied mean
118  \param stddev The calculated standard deviation
119  \param inputBuffer The buffer of points to calculate the std deviation for
120  \param mean The mean of the input buffer
121  \param num_points The number of values in input buffer to used in the stddev calculation
122 */
123 static inline void volk_32f_s32f_stddev_32f_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){
124  float returnValue = 0;
125  if(num_points > 0){
126  const float* aPtr = inputBuffer;
127  unsigned int number = 0;
128 
129  for(number = 0; number < num_points; number++){
130  returnValue += (*aPtr) * (*aPtr);
131  aPtr++;
132  }
133 
134  returnValue /= num_points;
135  returnValue -= (mean * mean);
136  returnValue = sqrtf(returnValue);
137  }
138  *stddev = returnValue;
139 }
140 #endif /* LV_HAVE_GENERIC */
141 
142 
143 
144 
145 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27