doc/doxygen-3.6/volk__32f__x2__s32f__interleave__16ic__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
00002 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007
00008 #ifdef LV_HAVE_SSE2
00009 #include <emmintrin.h>
00010   /*!
00011     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00012     \param iBuffer The I buffer data to be interleaved
00013     \param qBuffer The Q buffer data to be interleaved
00014     \param complexVector The complex output vector
00015     \param scalar The scaling value being multiplied against each data point
00016     \param num_points The number of complex data values to be interleaved
00017   */
00018 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00019     unsigned int number = 0;
00020     const float* iBufferPtr = iBuffer;
00021     const float* qBufferPtr = qBuffer;
00022
00023     __m128 vScalar = _mm_set_ps1(scalar);
00024
00025     const unsigned int quarterPoints = num_points / 4;
00026
00027     __m128 iValue, qValue, cplxValue1, cplxValue2;
00028     __m128i intValue1, intValue2;
00029
00030     int16_t* complexVectorPtr = (int16_t*)complexVector;
00031
00032     for(;number < quarterPoints; number++){
00033       iValue = _mm_load_ps(iBufferPtr);
00034       qValue = _mm_load_ps(qBufferPtr);
00035
00036       // Interleaves the lower two values in the i and q variables into one buffer
00037       cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
00038       cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
00039
00040       // Interleaves the upper two values in the i and q variables into one buffer
00041       cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
00042       cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
00043
00044       intValue1 = _mm_cvtps_epi32(cplxValue1);
00045       intValue2 = _mm_cvtps_epi32(cplxValue2);
00046
00047       intValue1 = _mm_packs_epi32(intValue1, intValue2);
00048
00049       _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
00050       complexVectorPtr += 8;
00051
00052       iBufferPtr += 4;
00053       qBufferPtr += 4;
00054     }
00055
00056     number = quarterPoints * 4;
00057     complexVectorPtr = (int16_t*)(&complexVector[number]);
00058     for(; number < num_points; number++){
00059       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00060       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00061     }
00062
00063 }
00064 #endif /* LV_HAVE_SSE2 */
00065
00066 #ifdef LV_HAVE_SSE
00067 #include <xmmintrin.h>
00068   /*!
00069     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00070     \param iBuffer The I buffer data to be interleaved
00071     \param qBuffer The Q buffer data to be interleaved
00072     \param complexVector The complex output vector
00073     \param scalar The scaling value being multiplied against each data point
00074     \param num_points The number of complex data values to be interleaved
00075   */
00076 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00077     unsigned int number = 0;
00078     const float* iBufferPtr = iBuffer;
00079     const float* qBufferPtr = qBuffer;
00080
00081     __m128 vScalar = _mm_set_ps1(scalar);
00082
00083     const unsigned int quarterPoints = num_points / 4;
00084
00085     __m128 iValue, qValue, cplxValue;
00086
00087     int16_t* complexVectorPtr = (int16_t*)complexVector;
00088
00089     __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
00090
00091     for(;number < quarterPoints; number++){
00092       iValue = _mm_load_ps(iBufferPtr);
00093       qValue = _mm_load_ps(qBufferPtr);
00094
00095       // Interleaves the lower two values in the i and q variables into one buffer
00096       cplxValue = _mm_unpacklo_ps(iValue, qValue);
00097       cplxValue = _mm_mul_ps(cplxValue, vScalar);
00098
00099       _mm_store_ps(floatBuffer, cplxValue);
00100
00101       *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
00102       *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
00103       *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
00104       *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
00105
00106       // Interleaves the upper two values in the i and q variables into one buffer
00107       cplxValue = _mm_unpackhi_ps(iValue, qValue);
00108       cplxValue = _mm_mul_ps(cplxValue, vScalar);
00109
00110       _mm_store_ps(floatBuffer, cplxValue);
00111
00112       *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
00113       *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
00114       *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
00115       *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
00116
00117       iBufferPtr += 4;
00118       qBufferPtr += 4;
00119     }
00120
00121     number = quarterPoints * 4;
00122     complexVectorPtr = (int16_t*)(&complexVector[number]);
00123     for(; number < num_points; number++){
00124       *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00125       *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00126     }
00127
00128 }
00129 #endif /* LV_HAVE_SSE */
00130
00131 #ifdef LV_HAVE_GENERIC
00132   /*!
00133     \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data.
00134     \param iBuffer The I buffer data to be interleaved
00135     \param qBuffer The Q buffer data to be interleaved
00136     \param complexVector The complex output vector
00137     \param scalar The scaling value being multiplied against each data point
00138     \param num_points The number of complex data values to be interleaved
00139   */
00140 static inline void volk_32f_x2_s32f_interleave_16ic_a_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){
00141   int16_t* complexVectorPtr = (int16_t*)complexVector;
00142   const float* iBufferPtr = iBuffer;
00143   const float* qBufferPtr = qBuffer;
00144   unsigned int number = 0;
00145
00146   for(number = 0; number < num_points; number++){
00147     *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
00148     *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
00149   }
00150 }
00151 #endif /* LV_HAVE_GENERIC */
00152
00153
00154
00155
00156 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */