GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H 00002 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #ifdef LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 00011 #ifdef LV_HAVE_LIB_SIMDMATH 00012 #include <simdmath.h> 00013 #endif /* LV_HAVE_LIB_SIMDMATH */ 00014 00015 /*! 00016 \brief performs the atan2 on the input vector and stores the results in the output vector. 00017 \param outputVector The byte-aligned vector where the results will be stored. 00018 \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin). 00019 \param normalizeFactor The atan2 results will be divided by this normalization factor. 00020 \param num_points The number of complex values in the input vector. 00021 */ 00022 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ 00023 const float* complexVectorPtr = (float*)complexVector; 00024 float* outPtr = outputVector; 00025 00026 unsigned int number = 0; 00027 const float invNormalizeFactor = 1.0 / normalizeFactor; 00028 00029 #ifdef LV_HAVE_LIB_SIMDMATH 00030 const unsigned int quarterPoints = num_points / 4; 00031 __m128 testVector = _mm_set_ps1(2*M_PI); 00032 __m128 correctVector = _mm_set_ps1(M_PI); 00033 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); 00034 __m128 phase; 00035 __m128 complex1, complex2, iValue, qValue; 00036 __m128 keepMask; 00037 00038 for (; number < quarterPoints; number++) { 00039 // Load IQ data: 00040 complex1 = _mm_load_ps(complexVectorPtr); 00041 complexVectorPtr += 4; 00042 complex2 = _mm_load_ps(complexVectorPtr); 00043 complexVectorPtr += 4; 00044 // Deinterleave IQ data: 00045 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0)); 00046 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1)); 00047 // Arctan to get phase: 00048 phase = atan2f4(qValue, iValue); 00049 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi. 00050 // Compare to 2pi: 00051 keepMask = _mm_cmpneq_ps(phase,testVector); 00052 phase = _mm_blendv_ps(correctVector, phase, keepMask); 00053 // done with above correction. 00054 phase = _mm_mul_ps(phase, vNormalizeFactor); 00055 _mm_store_ps((float*)outPtr, phase); 00056 outPtr += 4; 00057 } 00058 number = quarterPoints * 4; 00059 #endif /* LV_HAVE_SIMDMATH_H */ 00060 00061 for (; number < num_points; number++) { 00062 const float real = *complexVectorPtr++; 00063 const float imag = *complexVectorPtr++; 00064 *outPtr++ = atan2f(imag, real) * invNormalizeFactor; 00065 } 00066 } 00067 #endif /* LV_HAVE_SSE4_1 */ 00068 00069 00070 #ifdef LV_HAVE_SSE 00071 #include <xmmintrin.h> 00072 00073 #ifdef LV_HAVE_LIB_SIMDMATH 00074 #include <simdmath.h> 00075 #endif /* LV_HAVE_LIB_SIMDMATH */ 00076 00077 /*! 00078 \brief performs the atan2 on the input vector and stores the results in the output vector. 00079 \param outputVector The byte-aligned vector where the results will be stored. 00080 \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin). 00081 \param normalizeFactor The atan2 results will be divided by this normalization factor. 00082 \param num_points The number of complex values in the input vector. 00083 */ 00084 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){ 00085 const float* complexVectorPtr = (float*)complexVector; 00086 float* outPtr = outputVector; 00087 00088 unsigned int number = 0; 00089 const float invNormalizeFactor = 1.0 / normalizeFactor; 00090 00091 #ifdef LV_HAVE_LIB_SIMDMATH 00092 const unsigned int quarterPoints = num_points / 4; 00093 __m128 testVector = _mm_set_ps1(2*M_PI); 00094 __m128 correctVector = _mm_set_ps1(M_PI); 00095 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor); 00096 __m128 phase; 00097 __m128 complex1, complex2, iValue, qValue; 00098 __m128 mask; 00099 __m128 keepMask; 00100 00101 for (; number < quarterPoints; number++) { 00102 // Load IQ data: 00103 complex1 = _mm_load_ps(complexVectorPtr); 00104 complexVectorPtr += 4; 00105 complex2 = _mm_load_ps(complexVectorPtr); 00106 complexVectorPtr += 4; 00107 // Deinterleave IQ data: 00108 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0)); 00109 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1)); 00110 // Arctan to get phase: 00111 phase = atan2f4(qValue, iValue); 00112 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi. 00113 // Compare to 2pi: 00114 keepMask = _mm_cmpneq_ps(phase,testVector); 00115 phase = _mm_and_ps(phase, keepMask); 00116 mask = _mm_andnot_ps(keepMask, correctVector); 00117 phase = _mm_or_ps(phase, mask); 00118 // done with above correction. 00119 phase = _mm_mul_ps(phase, vNormalizeFactor); 00120 _mm_store_ps((float*)outPtr, phase); 00121 outPtr += 4; 00122 } 00123 number = quarterPoints * 4; 00124 #endif /* LV_HAVE_SIMDMATH_H */ 00125 00126 for (; number < num_points; number++) { 00127 const float real = *complexVectorPtr++; 00128 const float imag = *complexVectorPtr++; 00129 *outPtr++ = atan2f(imag, real) * invNormalizeFactor; 00130 } 00131 } 00132 #endif /* LV_HAVE_SSE */ 00133 00134 #ifdef LV_HAVE_GENERIC 00135 /*! 00136 \brief performs the atan2 on the input vector and stores the results in the output vector. 00137 \param outputVector The vector where the results will be stored. 00138 \param inputVector Input vector containing interleaved IQ data (I = cos, Q = sin). 00139 \param normalizeFactor The atan2 results will be divided by this normalization factor. 00140 \param num_points The number of complex values in the input vector. 00141 */ 00142 static inline void volk_32fc_s32f_atan2_32f_a_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){ 00143 float* outPtr = outputVector; 00144 const float* inPtr = (float*)inputVector; 00145 const float invNormalizeFactor = 1.0 / normalizeFactor; 00146 unsigned int number; 00147 for ( number = 0; number < num_points; number++) { 00148 const float real = *inPtr++; 00149 const float imag = *inPtr++; 00150 *outPtr++ = atan2f(imag, real) * invNormalizeFactor; 00151 } 00152 } 00153 #endif /* LV_HAVE_GENERIC */ 00154 00155 00156 00157 00158 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */