doc/doxygen-3.6/volk__16ic__s32f__magnitude__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
00002 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007 #include <math.h>
00008
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011 /*!
00012   \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
00013   \param complexVector The vector containing the complex input values
00014   \param magnitudeVector The vector containing the real output values
00015   \param scalar The data value to be divided against each input data value of the input complex vector
00016   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
00017 */
00018 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00019   unsigned int number = 0;
00020   const unsigned int quarterPoints = num_points / 4;
00021
00022   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
00023   float* magnitudeVectorPtr = magnitudeVector;
00024
00025   __m128 invScalar = _mm_set_ps1(1.0/scalar);
00026
00027   __m128 cplxValue1, cplxValue2, result;
00028
00029   __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
00030
00031   for(;number < quarterPoints; number++){
00032
00033     inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
00034     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
00035     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
00036     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
00037
00038     inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
00039     inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
00040     inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
00041     inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
00042
00043     cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
00044     cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
00045
00046     complexVectorPtr += 8;
00047
00048     cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
00049     cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
00050
00051     cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
00052     cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
00053
00054     result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
00055
00056     result = _mm_sqrt_ps(result); // Square root the values
00057
00058     _mm_store_ps(magnitudeVectorPtr, result);
00059
00060     magnitudeVectorPtr += 4;
00061   }
00062
00063   number = quarterPoints * 4;
00064   magnitudeVectorPtr = &magnitudeVector[number];
00065   complexVectorPtr = (const int16_t*)&complexVector[number];
00066   for(; number < num_points; number++){
00067     float val1Real = (float)(*complexVectorPtr++) / scalar;
00068     float val1Imag = (float)(*complexVectorPtr++) / scalar;
00069     *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
00070   }
00071 }
00072 #endif /* LV_HAVE_SSE3 */
00073
00074 #ifdef LV_HAVE_SSE
00075 #include <xmmintrin.h>
00076 /*!
00077   \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
00078   \param complexVector The vector containing the complex input values
00079   \param magnitudeVector The vector containing the real output values
00080   \param scalar The data value to be divided against each input data value of the input complex vector
00081   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
00082 */
00083 static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00084   unsigned int number = 0;
00085   const unsigned int quarterPoints = num_points / 4;
00086
00087   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
00088   float* magnitudeVectorPtr = magnitudeVector;
00089
00090   const float iScalar = 1.0 / scalar;
00091   __m128 invScalar = _mm_set_ps1(iScalar);
00092
00093   __m128 cplxValue1, cplxValue2, result, re, im;
00094
00095   __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
00096
00097   for(;number < quarterPoints; number++){
00098     inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
00099     inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
00100     inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
00101     inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
00102
00103     inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
00104     inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
00105     inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
00106     inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
00107
00108     cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
00109     cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
00110
00111     re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
00112     im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
00113
00114     complexVectorPtr += 8;
00115
00116     cplxValue1 = _mm_mul_ps(re, invScalar);
00117     cplxValue2 = _mm_mul_ps(im, invScalar);
00118
00119     cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
00120     cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
00121
00122     result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
00123
00124     result = _mm_sqrt_ps(result); // Square root the values
00125
00126     _mm_store_ps(magnitudeVectorPtr, result);
00127
00128     magnitudeVectorPtr += 4;
00129   }
00130
00131   number = quarterPoints * 4;
00132   magnitudeVectorPtr = &magnitudeVector[number];
00133   complexVectorPtr = (const int16_t*)&complexVector[number];
00134   for(; number < num_points; number++){
00135     float val1Real = (float)(*complexVectorPtr++) * iScalar;
00136     float val1Imag = (float)(*complexVectorPtr++) * iScalar;
00137     *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
00138   }
00139 }
00140
00141
00142 #endif /* LV_HAVE_SSE */
00143
00144 #ifdef LV_HAVE_GENERIC
00145 /*!
00146   \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
00147   \param complexVector The vector containing the complex input values
00148   \param magnitudeVector The vector containing the real output values
00149   \param scalar The data value to be divided against each input data value of the input complex vector
00150   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
00151 */
00152 static inline void volk_16ic_s32f_magnitude_32f_a_generic(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00153   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
00154   float* magnitudeVectorPtr = magnitudeVector;
00155   unsigned int number = 0;
00156   const float invScalar = 1.0 / scalar;
00157   for(number = 0; number < num_points; number++){
00158     float real = ( (float) (*complexVectorPtr++)) * invScalar;
00159     float imag = ( (float) (*complexVectorPtr++)) * invScalar;
00160     *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
00161   }
00162 }
00163 #endif /* LV_HAVE_GENERIC */
00164
00165 #ifdef LV_HAVE_ORC_DISABLED
00166 /*!
00167   \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
00168   \param complexVector The vector containing the complex input values
00169   \param magnitudeVector The vector containing the real output values
00170   \param scalar The data value to be divided against each input data value of the input complex vector
00171   \param num_points The number of complex values in complexVector to be calculated and stored into cVector
00172 */
00173 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
00174 static inline void volk_16ic_s32f_magnitude_32f_a_orc(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00175     volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
00176 }
00177 #endif /* LV_HAVE_ORC */
00178
00179
00180 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */