GNU Radio Manual and C++ API Reference  3.7.5.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
2 #define INCLUDED_volk_32fc_magnitude_32f_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 #ifdef LV_HAVE_SSE3
9 #include <pmmintrin.h>
10  /*!
11  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
12  \param complexVector The vector containing the complex input values
13  \param magnitudeVector The vector containing the real output values
14  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
15  */
16 static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  const float* complexVectorPtr = (float*)complexVector;
21  float* magnitudeVectorPtr = magnitudeVector;
22 
23  __m128 cplxValue1, cplxValue2, result;
24  for(;number < quarterPoints; number++){
25  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
26  complexVectorPtr += 4;
27 
28  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
29  complexVectorPtr += 4;
30 
31  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
32  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
33 
34  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
35 
36  result = _mm_sqrt_ps(result);
37 
38  _mm_storeu_ps(magnitudeVectorPtr, result);
39  magnitudeVectorPtr += 4;
40  }
41 
42  number = quarterPoints * 4;
43  for(; number < num_points; number++){
44  float val1Real = *complexVectorPtr++;
45  float val1Imag = *complexVectorPtr++;
46  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
47  }
48 }
49 #endif /* LV_HAVE_SSE3 */
50 
51 #ifdef LV_HAVE_SSE
52 #include <xmmintrin.h>
53  /*!
54  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
55  \param complexVector The vector containing the complex input values
56  \param magnitudeVector The vector containing the real output values
57  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
58  */
59 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
60  unsigned int number = 0;
61  const unsigned int quarterPoints = num_points / 4;
62 
63  const float* complexVectorPtr = (float*)complexVector;
64  float* magnitudeVectorPtr = magnitudeVector;
65 
66  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
67  for(;number < quarterPoints; number++){
68  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
69  complexVectorPtr += 4;
70 
71  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
72  complexVectorPtr += 4;
73 
74  // Arrange in i1i2i3i4 format
75  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
76  // Arrange in q1q2q3q4 format
77  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
78 
79  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
80  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
81 
82  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
83 
84  result = _mm_sqrt_ps(result);
85 
86  _mm_storeu_ps(magnitudeVectorPtr, result);
87  magnitudeVectorPtr += 4;
88  }
89 
90  number = quarterPoints * 4;
91  for(; number < num_points; number++){
92  float val1Real = *complexVectorPtr++;
93  float val1Imag = *complexVectorPtr++;
94  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
95  }
96 }
97 #endif /* LV_HAVE_SSE */
98 
99 #ifdef LV_HAVE_GENERIC
100  /*!
101  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
102  \param complexVector The vector containing the complex input values
103  \param magnitudeVector The vector containing the real output values
104  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
105  */
106 static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
107  const float* complexVectorPtr = (float*)complexVector;
108  float* magnitudeVectorPtr = magnitudeVector;
109  unsigned int number = 0;
110  for(number = 0; number < num_points; number++){
111  const float real = *complexVectorPtr++;
112  const float imag = *complexVectorPtr++;
113  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
114  }
115 }
116 #endif /* LV_HAVE_GENERIC */
117 
118 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
119 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
120 #define INCLUDED_volk_32fc_magnitude_32f_a_H
121 
122 #include <inttypes.h>
123 #include <stdio.h>
124 #include <math.h>
125 
126 #ifdef LV_HAVE_SSE3
127 #include <pmmintrin.h>
128  /*!
129  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
130  \param complexVector The vector containing the complex input values
131  \param magnitudeVector The vector containing the real output values
132  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
133  */
134 static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
135  unsigned int number = 0;
136  const unsigned int quarterPoints = num_points / 4;
137 
138  const float* complexVectorPtr = (float*)complexVector;
139  float* magnitudeVectorPtr = magnitudeVector;
140 
141  __m128 cplxValue1, cplxValue2, result;
142  for(;number < quarterPoints; number++){
143  cplxValue1 = _mm_load_ps(complexVectorPtr);
144  complexVectorPtr += 4;
145 
146  cplxValue2 = _mm_load_ps(complexVectorPtr);
147  complexVectorPtr += 4;
148 
149  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
150  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
151 
152  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
153 
154  result = _mm_sqrt_ps(result);
155 
156  _mm_store_ps(magnitudeVectorPtr, result);
157  magnitudeVectorPtr += 4;
158  }
159 
160  number = quarterPoints * 4;
161  for(; number < num_points; number++){
162  float val1Real = *complexVectorPtr++;
163  float val1Imag = *complexVectorPtr++;
164  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
165  }
166 }
167 #endif /* LV_HAVE_SSE3 */
168 
169 #ifdef LV_HAVE_SSE
170 #include <xmmintrin.h>
171  /*!
172  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
173  \param complexVector The vector containing the complex input values
174  \param magnitudeVector The vector containing the real output values
175  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
176  */
177 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
178  unsigned int number = 0;
179  const unsigned int quarterPoints = num_points / 4;
180 
181  const float* complexVectorPtr = (float*)complexVector;
182  float* magnitudeVectorPtr = magnitudeVector;
183 
184  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
185  for(;number < quarterPoints; number++){
186  cplxValue1 = _mm_load_ps(complexVectorPtr);
187  complexVectorPtr += 4;
188 
189  cplxValue2 = _mm_load_ps(complexVectorPtr);
190  complexVectorPtr += 4;
191 
192  // Arrange in i1i2i3i4 format
193  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
194  // Arrange in q1q2q3q4 format
195  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
196 
197  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
198  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
199 
200  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
201 
202  result = _mm_sqrt_ps(result);
203 
204  _mm_store_ps(magnitudeVectorPtr, result);
205  magnitudeVectorPtr += 4;
206  }
207 
208  number = quarterPoints * 4;
209  for(; number < num_points; number++){
210  float val1Real = *complexVectorPtr++;
211  float val1Imag = *complexVectorPtr++;
212  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
213  }
214 }
215 #endif /* LV_HAVE_SSE */
216 
217 #ifdef LV_HAVE_GENERIC
218  /*!
219  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
220  \param complexVector The vector containing the complex input values
221  \param magnitudeVector The vector containing the real output values
222  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
223  */
224 static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
225  const float* complexVectorPtr = (float*)complexVector;
226  float* magnitudeVectorPtr = magnitudeVector;
227  unsigned int number = 0;
228  for(number = 0; number < num_points; number++){
229  const float real = *complexVectorPtr++;
230  const float imag = *complexVectorPtr++;
231  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
232  }
233 }
234 #endif /* LV_HAVE_GENERIC */
235 
236 #ifdef LV_HAVE_NEON
237 #include <arm_neon.h>
238 
239  /*!
240  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
241  \param complexVector The vector containing the complex input values
242  \param magnitudeVector The vector containing the real output values
243  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
244  */
245 static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
246  unsigned int number;
247  unsigned int quarter_points = num_points / 4;
248  const float* complexVectorPtr = (float*)complexVector;
249  float* magnitudeVectorPtr = magnitudeVector;
250 
251  float32x4x2_t complex_vec;
252  float32x4_t magnitude_vec;
253  for(number = 0; number < quarter_points; number++){
254  complex_vec = vld2q_f32(complexVectorPtr);
255  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
256  magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
257  magnitude_vec = vrsqrteq_f32(magnitude_vec);
258  magnitude_vec = vrecpeq_f32( magnitude_vec ); // no plain ol' sqrt
259  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
260 
261  complexVectorPtr += 8;
262  magnitudeVectorPtr += 4;
263  }
264 
265  for(number = quarter_points*4; number < num_points; number++){
266  const float real = *complexVectorPtr++;
267  const float imag = *complexVectorPtr++;
268  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
269  }
270 }
271 #endif /* LV_HAVE_NEON */
272 
273 #ifdef LV_HAVE_NEON
274  /*!
275  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
276 
277  This is an approximation from "Streamlining Digital Signal Processing" by
278  Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
279  The basic idea is to do a weighted sum of the abs. value of imag and real parts
280  where weight A is always assigned to max(imag, real) and B is always min(imag,real).
281  There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
282  This method is called equiripple-error magnitude estimation proposed by Filip in '73
283 
284  \param complexVector The vector containing the complex input values
285  \param magnitudeVector The vector containing the real output values
286  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
287  */
288 static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
289  unsigned int number;
290  unsigned int quarter_points = num_points / 4;
291  const float* complexVectorPtr = (float*)complexVector;
292  float* magnitudeVectorPtr = magnitudeVector;
293 
294  const float threshold = 0.4142135;
295 
296  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
297  a_high = vdupq_n_f32( 0.84 );
298  b_high = vdupq_n_f32( 0.561);
299  a_low = vdupq_n_f32( 0.99 );
300  b_low = vdupq_n_f32( 0.197);
301 
302  uint32x4_t comp0, comp1;
303 
304  float32x4x2_t complex_vec;
305  float32x4_t min_vec, max_vec, magnitude_vec;
306  float32x4_t real_abs, imag_abs;
307  for(number = 0; number < quarter_points; number++){
308  complex_vec = vld2q_f32(complexVectorPtr);
309 
310  real_abs = vabsq_f32(complex_vec.val[0]);
311  imag_abs = vabsq_f32(complex_vec.val[1]);
312 
313  min_vec = vminq_f32(real_abs, imag_abs);
314  max_vec = vmaxq_f32(real_abs, imag_abs);
315 
316  // effective branch to choose coefficient pair.
317  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
318  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
319 
320  // and 0s or 1s with coefficients from previous effective branch
321  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high), vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
322  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high), vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
323 
324  // coefficients chosen, do the weighted sum
325  min_vec = vmulq_f32(min_vec, b_vec);
326  max_vec = vmulq_f32(max_vec, a_vec);
327 
328  magnitude_vec = vaddq_f32(min_vec, max_vec);
329  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
330 
331  complexVectorPtr += 8;
332  magnitudeVectorPtr += 4;
333  }
334 
335  for(number = quarter_points*4; number < num_points; number++){
336  const float real = *complexVectorPtr++;
337  const float imag = *complexVectorPtr++;
338  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
339  }
340 }
341 #endif /* LV_HAVE_NEON */
342 
343 
344 #ifdef LV_HAVE_ORC
345  /*!
346  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
347  \param complexVector The vector containing the complex input values
348  \param magnitudeVector The vector containing the real output values
349  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
350  */
351 extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points);
352 static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
353  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
354 }
355 #endif /* LV_HAVE_ORC */
356 
357 
358 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56