1 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
2 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
11 #ifdef LV_HAVE_LIB_SIMDMATH
22 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(
float* outputVector,
const lv_32fc_t* complexVector,
const float normalizeFactor,
unsigned int num_points){
23 const float* complexVectorPtr = (
float*)complexVector;
24 float* outPtr = outputVector;
26 unsigned int number = 0;
27 const float invNormalizeFactor = 1.0 / normalizeFactor;
29 #ifdef LV_HAVE_LIB_SIMDMATH
30 const unsigned int quarterPoints = num_points / 4;
31 __m128 testVector = _mm_set_ps1(2*
M_PI);
32 __m128 correctVector = _mm_set_ps1(
M_PI);
33 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
35 __m128 complex1, complex2, iValue, qValue;
38 for (; number < quarterPoints; number++) {
40 complex1 = _mm_load_ps(complexVectorPtr);
41 complexVectorPtr += 4;
42 complex2 = _mm_load_ps(complexVectorPtr);
43 complexVectorPtr += 4;
45 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
46 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
48 phase = atan2f4(qValue, iValue);
51 keepMask = _mm_cmpneq_ps(phase,testVector);
52 phase = _mm_blendv_ps(correctVector, phase, keepMask);
54 phase = _mm_mul_ps(phase, vNormalizeFactor);
55 _mm_store_ps((
float*)outPtr, phase);
58 number = quarterPoints * 4;
61 for (; number < num_points; number++) {
62 const float real = *complexVectorPtr++;
63 const float imag = *complexVectorPtr++;
64 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
71 #include <xmmintrin.h>
73 #ifdef LV_HAVE_LIB_SIMDMATH
84 static inline void volk_32fc_s32f_atan2_32f_a_sse(
float* outputVector,
const lv_32fc_t* complexVector,
const float normalizeFactor,
unsigned int num_points){
85 const float* complexVectorPtr = (
float*)complexVector;
86 float* outPtr = outputVector;
88 unsigned int number = 0;
89 const float invNormalizeFactor = 1.0 / normalizeFactor;
91 #ifdef LV_HAVE_LIB_SIMDMATH
92 const unsigned int quarterPoints = num_points / 4;
93 __m128 testVector = _mm_set_ps1(2*
M_PI);
94 __m128 correctVector = _mm_set_ps1(
M_PI);
95 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
97 __m128 complex1, complex2, iValue, qValue;
101 for (; number < quarterPoints; number++) {
103 complex1 = _mm_load_ps(complexVectorPtr);
104 complexVectorPtr += 4;
105 complex2 = _mm_load_ps(complexVectorPtr);
106 complexVectorPtr += 4;
108 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
109 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
111 phase = atan2f4(qValue, iValue);
114 keepMask = _mm_cmpneq_ps(phase,testVector);
115 phase = _mm_and_ps(phase, keepMask);
116 mask = _mm_andnot_ps(keepMask, correctVector);
117 phase = _mm_or_ps(phase, mask);
119 phase = _mm_mul_ps(phase, vNormalizeFactor);
120 _mm_store_ps((
float*)outPtr, phase);
123 number = quarterPoints * 4;
126 for (; number < num_points; number++) {
127 const float real = *complexVectorPtr++;
128 const float imag = *complexVectorPtr++;
129 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
134 #ifdef LV_HAVE_GENERIC
142 static inline void volk_32fc_s32f_atan2_32f_generic(
float* outputVector,
const lv_32fc_t* inputVector,
const float normalizeFactor,
unsigned int num_points){
143 float* outPtr = outputVector;
144 const float* inPtr = (
float*)inputVector;
145 const float invNormalizeFactor = 1.0 / normalizeFactor;
147 for ( number = 0; number < num_points; number++) {
148 const float real = *inPtr++;
149 const float imag = *inPtr++;
150 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
#define M_PI
Definition: gnuradio/volk/cmake/msvc/config.h:42
float complex lv_32fc_t
Definition: volk_complex.h:56