GNU Radio 3.7.3 C++ API
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
2 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 #ifdef LV_HAVE_SSE4_1
9 #include <smmintrin.h>
10 
11 #ifdef LV_HAVE_LIB_SIMDMATH
12 #include <simdmath.h>
13 #endif /* LV_HAVE_LIB_SIMDMATH */
14 
15 /*!
16  \brief performs the atan2 on the input vector and stores the results in the output vector.
17  \param outputVector The byte-aligned vector where the results will be stored.
18  \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
19  \param normalizeFactor The atan2 results will be divided by this normalization factor.
20  \param num_points The number of complex values in the input vector.
21 */
22 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
23  const float* complexVectorPtr = (float*)complexVector;
24  float* outPtr = outputVector;
25 
26  unsigned int number = 0;
27  const float invNormalizeFactor = 1.0 / normalizeFactor;
28 
29 #ifdef LV_HAVE_LIB_SIMDMATH
30  const unsigned int quarterPoints = num_points / 4;
31  __m128 testVector = _mm_set_ps1(2*M_PI);
32  __m128 correctVector = _mm_set_ps1(M_PI);
33  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
34  __m128 phase;
35  __m128 complex1, complex2, iValue, qValue;
36  __m128 keepMask;
37 
38  for (; number < quarterPoints; number++) {
39  // Load IQ data:
40  complex1 = _mm_load_ps(complexVectorPtr);
41  complexVectorPtr += 4;
42  complex2 = _mm_load_ps(complexVectorPtr);
43  complexVectorPtr += 4;
44  // Deinterleave IQ data:
45  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
46  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
47  // Arctan to get phase:
48  phase = atan2f4(qValue, iValue);
49  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
50  // Compare to 2pi:
51  keepMask = _mm_cmpneq_ps(phase,testVector);
52  phase = _mm_blendv_ps(correctVector, phase, keepMask);
53  // done with above correction.
54  phase = _mm_mul_ps(phase, vNormalizeFactor);
55  _mm_store_ps((float*)outPtr, phase);
56  outPtr += 4;
57  }
58  number = quarterPoints * 4;
59 #endif /* LV_HAVE_SIMDMATH_H */
60 
61  for (; number < num_points; number++) {
62  const float real = *complexVectorPtr++;
63  const float imag = *complexVectorPtr++;
64  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
65  }
66 }
67 #endif /* LV_HAVE_SSE4_1 */
68 
69 
70 #ifdef LV_HAVE_SSE
71 #include <xmmintrin.h>
72 
73 #ifdef LV_HAVE_LIB_SIMDMATH
74 #include <simdmath.h>
75 #endif /* LV_HAVE_LIB_SIMDMATH */
76 
77 /*!
78  \brief performs the atan2 on the input vector and stores the results in the output vector.
79  \param outputVector The byte-aligned vector where the results will be stored.
80  \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
81  \param normalizeFactor The atan2 results will be divided by this normalization factor.
82  \param num_points The number of complex values in the input vector.
83 */
84 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
85  const float* complexVectorPtr = (float*)complexVector;
86  float* outPtr = outputVector;
87 
88  unsigned int number = 0;
89  const float invNormalizeFactor = 1.0 / normalizeFactor;
90 
91 #ifdef LV_HAVE_LIB_SIMDMATH
92  const unsigned int quarterPoints = num_points / 4;
93  __m128 testVector = _mm_set_ps1(2*M_PI);
94  __m128 correctVector = _mm_set_ps1(M_PI);
95  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
96  __m128 phase;
97  __m128 complex1, complex2, iValue, qValue;
98  __m128 mask;
99  __m128 keepMask;
100 
101  for (; number < quarterPoints; number++) {
102  // Load IQ data:
103  complex1 = _mm_load_ps(complexVectorPtr);
104  complexVectorPtr += 4;
105  complex2 = _mm_load_ps(complexVectorPtr);
106  complexVectorPtr += 4;
107  // Deinterleave IQ data:
108  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
109  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
110  // Arctan to get phase:
111  phase = atan2f4(qValue, iValue);
112  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
113  // Compare to 2pi:
114  keepMask = _mm_cmpneq_ps(phase,testVector);
115  phase = _mm_and_ps(phase, keepMask);
116  mask = _mm_andnot_ps(keepMask, correctVector);
117  phase = _mm_or_ps(phase, mask);
118  // done with above correction.
119  phase = _mm_mul_ps(phase, vNormalizeFactor);
120  _mm_store_ps((float*)outPtr, phase);
121  outPtr += 4;
122  }
123  number = quarterPoints * 4;
124 #endif /* LV_HAVE_SIMDMATH_H */
125 
126  for (; number < num_points; number++) {
127  const float real = *complexVectorPtr++;
128  const float imag = *complexVectorPtr++;
129  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
130  }
131 }
132 #endif /* LV_HAVE_SSE */
133 
134 #ifdef LV_HAVE_GENERIC
135 /*!
136  \brief performs the atan2 on the input vector and stores the results in the output vector.
137  \param outputVector The vector where the results will be stored.
138  \param inputVector Input vector containing interleaved IQ data (I = cos, Q = sin).
139  \param normalizeFactor The atan2 results will be divided by this normalization factor.
140  \param num_points The number of complex values in the input vector.
141 */
142 static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
143  float* outPtr = outputVector;
144  const float* inPtr = (float*)inputVector;
145  const float invNormalizeFactor = 1.0 / normalizeFactor;
146  unsigned int number;
147  for ( number = 0; number < num_points; number++) {
148  const float real = *inPtr++;
149  const float imag = *inPtr++;
150  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
151  }
152 }
153 #endif /* LV_HAVE_GENERIC */
154 
155 
156 
157 
158 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
#define M_PI
Definition: gnuradio/volk/cmake/msvc/config.h:42
float complex lv_32fc_t
Definition: volk_complex.h:56