doc/doxygen-3.6/volk__16ic__s32f__deinterleave__real__32f__a_8h_source.html

00001 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
00002 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007
00008 #ifdef LV_HAVE_SSE4_1
00009 #include <smmintrin.h>
00010 /*!
00011   \brief Deinterleaves the complex 16 bit vector into I float vector data
00012   \param complexVector The complex input vector
00013   \param iBuffer The I buffer output data
00014   \param scalar The scaling value being multiplied against each data point
00015   \param num_points The number of complex data values to be deinterleaved
00016 */
00017 static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00018   float* iBufferPtr = iBuffer;
00019
00020   unsigned int number = 0;
00021   const unsigned int quarterPoints = num_points / 4;
00022
00023   __m128 iFloatValue;
00024
00025   const float iScalar= 1.0 / scalar;
00026   __m128 invScalar = _mm_set_ps1(iScalar);
00027   __m128i complexVal, iIntVal;
00028   int8_t* complexVectorPtr = (int8_t*)complexVector;
00029
00030   __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
00031
00032   for(;number < quarterPoints; number++){
00033     complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
00034     complexVal = _mm_shuffle_epi8(complexVal, moveMask);
00035
00036     iIntVal = _mm_cvtepi16_epi32(complexVal);
00037     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00038
00039     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00040
00041     _mm_store_ps(iBufferPtr, iFloatValue);
00042
00043     iBufferPtr += 4;
00044   }
00045
00046   number = quarterPoints * 4;
00047   int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
00048   for(; number < num_points; number++){
00049     *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
00050     sixteenTComplexVectorPtr++;
00051   }
00052
00053 }
00054 #endif /* LV_HAVE_SSE4_1 */
00055
00056 #ifdef LV_HAVE_SSE
00057 #include <xmmintrin.h>
00058 /*!
00059   \brief Deinterleaves the complex 16 bit vector into I float vector data
00060   \param complexVector The complex input vector
00061   \param iBuffer The I buffer output data
00062   \param scalar The scaling value being multiplied against each data point
00063   \param num_points The number of complex data values to be deinterleaved
00064 */
00065 static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00066   float* iBufferPtr = iBuffer;
00067
00068   unsigned int number = 0;
00069   const unsigned int quarterPoints = num_points / 4;
00070   __m128 iValue;
00071
00072   const float iScalar = 1.0/scalar;
00073   __m128 invScalar = _mm_set_ps1(iScalar);
00074   int16_t* complexVectorPtr = (int16_t*)complexVector;
00075
00076   __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
00077
00078   for(;number < quarterPoints; number++){
00079     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00080     floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00081     floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00082     floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00083
00084     iValue = _mm_load_ps(floatBuffer);
00085
00086     iValue = _mm_mul_ps(iValue, invScalar);
00087
00088     _mm_store_ps(iBufferPtr, iValue);
00089
00090     iBufferPtr += 4;
00091   }
00092
00093   number = quarterPoints * 4;
00094   complexVectorPtr = (int16_t*)&complexVector[number];
00095   for(; number < num_points; number++){
00096     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
00097     complexVectorPtr++;
00098   }
00099
00100 }
00101 #endif /* LV_HAVE_SSE */
00102
00103 #ifdef LV_HAVE_GENERIC
00104 /*!
00105   \brief Deinterleaves the complex 16 bit vector into I float vector data
00106   \param complexVector The complex input vector
00107   \param iBuffer The I buffer output data
00108   \param scalar The scaling value being multiplied against each data point
00109   \param num_points The number of complex data values to be deinterleaved
00110 */
00111 static inline void volk_16ic_s32f_deinterleave_real_32f_a_generic(float* iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
00112   unsigned int number = 0;
00113   const int16_t* complexVectorPtr = (const int16_t*)complexVector;
00114   float* iBufferPtr = iBuffer;
00115   const float invScalar = 1.0 / scalar;
00116   for(number = 0; number < num_points; number++){
00117     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
00118     complexVectorPtr++;
00119   }
00120 }
00121 #endif /* LV_HAVE_GENERIC */
00122
00123
00124
00125
00126 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */