doc/doxygen-3.6/volk__32fc__s32f__atan2__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
00002 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010
00011 #ifdef LV_HAVE_LIB_SIMDMATH
00012 #include <simdmath.h>
00013 #endif /* LV_HAVE_LIB_SIMDMATH */
00014
00015 /*!
00016   \brief performs the atan2 on the input vector and stores the results in the output vector.
00017   \param outputVector The byte-aligned vector where the results will be stored.
00018   \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
00019   \param normalizeFactor The atan2 results will be divided by this normalization factor.
00020   \param num_points The number of complex values in the input vector.
00021 */
00022 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
00023   const float* complexVectorPtr = (float*)complexVector;
00024   float* outPtr = outputVector;
00025
00026   unsigned int number = 0;
00027   const float invNormalizeFactor = 1.0 / normalizeFactor;
00028
00029 #ifdef LV_HAVE_LIB_SIMDMATH
00030   const unsigned int quarterPoints = num_points / 4;
00031   __m128 testVector = _mm_set_ps1(2*M_PI);
00032   __m128 correctVector = _mm_set_ps1(M_PI);
00033   __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
00034   __m128 phase;
00035   __m128 complex1, complex2, iValue, qValue;
00036   __m128 keepMask;
00037
00038   for (; number < quarterPoints; number++) {
00039     // Load IQ data:
00040     complex1 = _mm_load_ps(complexVectorPtr);
00041     complexVectorPtr += 4;
00042     complex2 = _mm_load_ps(complexVectorPtr);
00043     complexVectorPtr += 4;
00044     // Deinterleave IQ data:
00045     iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
00046     qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
00047     // Arctan to get phase:
00048     phase = atan2f4(qValue, iValue);
00049     // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
00050     // Compare to 2pi:
00051     keepMask = _mm_cmpneq_ps(phase,testVector);
00052     phase = _mm_blendv_ps(correctVector, phase, keepMask);
00053     // done with above correction.
00054     phase = _mm_mul_ps(phase, vNormalizeFactor);
00055     _mm_store_ps((float*)outPtr, phase);
00056     outPtr += 4;
00057   }
00058   number = quarterPoints * 4;
00059 #endif /* LV_HAVE_SIMDMATH_H */
00060
00061   for (; number < num_points; number++) {
00062     const float real = *complexVectorPtr++;
00063     const float imag = *complexVectorPtr++;
00064     *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
00065   }
00066 }
00067 #endif /* LV_HAVE_SSE4_1 */
00068
00069
00070 #ifdef LV_HAVE_SSE
00071 #include <xmmintrin.h>
00072
00073 #ifdef LV_HAVE_LIB_SIMDMATH
00074 #include <simdmath.h>
00075 #endif /* LV_HAVE_LIB_SIMDMATH */
00076
00077 /*!
00078   \brief performs the atan2 on the input vector and stores the results in the output vector.
00079   \param outputVector The byte-aligned vector where the results will be stored.
00080   \param inputVector The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
00081   \param normalizeFactor The atan2 results will be divided by this normalization factor.
00082   \param num_points The number of complex values in the input vector.
00083 */
00084 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
00085   const float* complexVectorPtr = (float*)complexVector;
00086   float* outPtr = outputVector;
00087
00088   unsigned int number = 0;
00089   const float invNormalizeFactor = 1.0 / normalizeFactor;
00090
00091 #ifdef LV_HAVE_LIB_SIMDMATH
00092   const unsigned int quarterPoints = num_points / 4;
00093   __m128 testVector = _mm_set_ps1(2*M_PI);
00094   __m128 correctVector = _mm_set_ps1(M_PI);
00095   __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
00096   __m128 phase;
00097   __m128 complex1, complex2, iValue, qValue;
00098   __m128 mask;
00099   __m128 keepMask;
00100
00101   for (; number < quarterPoints; number++) {
00102     // Load IQ data:
00103     complex1 = _mm_load_ps(complexVectorPtr);
00104     complexVectorPtr += 4;
00105     complex2 = _mm_load_ps(complexVectorPtr);
00106     complexVectorPtr += 4;
00107     // Deinterleave IQ data:
00108     iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
00109     qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
00110     // Arctan to get phase:
00111     phase = atan2f4(qValue, iValue);
00112     // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
00113     // Compare to 2pi:
00114     keepMask = _mm_cmpneq_ps(phase,testVector);
00115     phase = _mm_and_ps(phase, keepMask);
00116     mask = _mm_andnot_ps(keepMask, correctVector);
00117     phase = _mm_or_ps(phase, mask);
00118     // done with above correction.
00119     phase = _mm_mul_ps(phase, vNormalizeFactor);
00120     _mm_store_ps((float*)outPtr, phase);
00121     outPtr += 4;
00122   }
00123   number = quarterPoints * 4;
00124 #endif /* LV_HAVE_SIMDMATH_H */
00125
00126   for (; number < num_points; number++) {
00127     const float real = *complexVectorPtr++;
00128     const float imag = *complexVectorPtr++;
00129     *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
00130   }
00131 }
00132 #endif /* LV_HAVE_SSE */
00133
00134 #ifdef LV_HAVE_GENERIC
00135 /*!
00136   \brief performs the atan2 on the input vector and stores the results in the output vector.
00137   \param outputVector The vector where the results will be stored.
00138   \param inputVector Input vector containing interleaved IQ data (I = cos, Q = sin).
00139   \param normalizeFactor The atan2 results will be divided by this normalization factor.
00140   \param num_points The number of complex values in the input vector.
00141 */
00142 static inline void volk_32fc_s32f_atan2_32f_a_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
00143   float* outPtr = outputVector;
00144   const float* inPtr = (float*)inputVector;
00145   const float invNormalizeFactor = 1.0 / normalizeFactor;
00146   unsigned int number;
00147   for ( number = 0; number < num_points; number++) {
00148     const float real = *inPtr++;
00149     const float imag = *inPtr++;
00150     *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
00151   }
00152 }
00153 #endif /* LV_HAVE_GENERIC */
00154
00155
00156
00157
00158 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */