GNU Radio 3.5.3.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H 00002 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 00008 #ifdef LV_HAVE_SSE2 00009 #include <emmintrin.h> 00010 /*! 00011 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00012 \param iBuffer The I buffer data to be interleaved 00013 \param qBuffer The Q buffer data to be interleaved 00014 \param complexVector The complex output vector 00015 \param scalar The scaling value being multiplied against each data point 00016 \param num_points The number of complex data values to be interleaved 00017 */ 00018 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00019 unsigned int number = 0; 00020 const float* iBufferPtr = iBuffer; 00021 const float* qBufferPtr = qBuffer; 00022 00023 __m128 vScalar = _mm_set_ps1(scalar); 00024 00025 const unsigned int quarterPoints = num_points / 4; 00026 00027 __m128 iValue, qValue, cplxValue1, cplxValue2; 00028 __m128i intValue1, intValue2; 00029 00030 int16_t* complexVectorPtr = (int16_t*)complexVector; 00031 00032 for(;number < quarterPoints; number++){ 00033 iValue = _mm_load_ps(iBufferPtr); 00034 qValue = _mm_load_ps(qBufferPtr); 00035 00036 // Interleaves the lower two values in the i and q variables into one buffer 00037 cplxValue1 = _mm_unpacklo_ps(iValue, qValue); 00038 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar); 00039 00040 // Interleaves the upper two values in the i and q variables into one buffer 00041 cplxValue2 = _mm_unpackhi_ps(iValue, qValue); 00042 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar); 00043 00044 intValue1 = _mm_cvtps_epi32(cplxValue1); 00045 intValue2 = _mm_cvtps_epi32(cplxValue2); 00046 00047 intValue1 = _mm_packs_epi32(intValue1, intValue2); 00048 00049 _mm_store_si128((__m128i*)complexVectorPtr, intValue1); 00050 complexVectorPtr += 8; 00051 00052 iBufferPtr += 4; 00053 qBufferPtr += 4; 00054 } 00055 00056 number = quarterPoints * 4; 00057 complexVectorPtr = (int16_t*)(&complexVector[number]); 00058 for(; number < num_points; number++){ 00059 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00060 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00061 } 00062 00063 } 00064 #endif /* LV_HAVE_SSE2 */ 00065 00066 #ifdef LV_HAVE_SSE 00067 #include <xmmintrin.h> 00068 /*! 00069 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00070 \param iBuffer The I buffer data to be interleaved 00071 \param qBuffer The Q buffer data to be interleaved 00072 \param complexVector The complex output vector 00073 \param scalar The scaling value being multiplied against each data point 00074 \param num_points The number of complex data values to be interleaved 00075 */ 00076 static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00077 unsigned int number = 0; 00078 const float* iBufferPtr = iBuffer; 00079 const float* qBufferPtr = qBuffer; 00080 00081 __m128 vScalar = _mm_set_ps1(scalar); 00082 00083 const unsigned int quarterPoints = num_points / 4; 00084 00085 __m128 iValue, qValue, cplxValue; 00086 00087 int16_t* complexVectorPtr = (int16_t*)complexVector; 00088 00089 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; 00090 00091 for(;number < quarterPoints; number++){ 00092 iValue = _mm_load_ps(iBufferPtr); 00093 qValue = _mm_load_ps(qBufferPtr); 00094 00095 // Interleaves the lower two values in the i and q variables into one buffer 00096 cplxValue = _mm_unpacklo_ps(iValue, qValue); 00097 cplxValue = _mm_mul_ps(cplxValue, vScalar); 00098 00099 _mm_store_ps(floatBuffer, cplxValue); 00100 00101 *complexVectorPtr++ = (int16_t)(floatBuffer[0]); 00102 *complexVectorPtr++ = (int16_t)(floatBuffer[1]); 00103 *complexVectorPtr++ = (int16_t)(floatBuffer[2]); 00104 *complexVectorPtr++ = (int16_t)(floatBuffer[3]); 00105 00106 // Interleaves the upper two values in the i and q variables into one buffer 00107 cplxValue = _mm_unpackhi_ps(iValue, qValue); 00108 cplxValue = _mm_mul_ps(cplxValue, vScalar); 00109 00110 _mm_store_ps(floatBuffer, cplxValue); 00111 00112 *complexVectorPtr++ = (int16_t)(floatBuffer[0]); 00113 *complexVectorPtr++ = (int16_t)(floatBuffer[1]); 00114 *complexVectorPtr++ = (int16_t)(floatBuffer[2]); 00115 *complexVectorPtr++ = (int16_t)(floatBuffer[3]); 00116 00117 iBufferPtr += 4; 00118 qBufferPtr += 4; 00119 } 00120 00121 number = quarterPoints * 4; 00122 complexVectorPtr = (int16_t*)(&complexVector[number]); 00123 for(; number < num_points; number++){ 00124 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00125 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00126 } 00127 00128 } 00129 #endif /* LV_HAVE_SSE */ 00130 00131 #ifdef LV_HAVE_GENERIC 00132 /*! 00133 \brief Interleaves the I & Q vector data into the complex vector, scales the output values by the scalar, and converts to 16 bit data. 00134 \param iBuffer The I buffer data to be interleaved 00135 \param qBuffer The Q buffer data to be interleaved 00136 \param complexVector The complex output vector 00137 \param scalar The scaling value being multiplied against each data point 00138 \param num_points The number of complex data values to be interleaved 00139 */ 00140 static inline void volk_32f_x2_s32f_interleave_16ic_a_generic(lv_16sc_t* complexVector, const float* iBuffer, const float* qBuffer, const float scalar, unsigned int num_points){ 00141 int16_t* complexVectorPtr = (int16_t*)complexVector; 00142 const float* iBufferPtr = iBuffer; 00143 const float* qBufferPtr = qBuffer; 00144 unsigned int number = 0; 00145 00146 for(number = 0; number < num_points; number++){ 00147 *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar); 00148 *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar); 00149 } 00150 } 00151 #endif /* LV_HAVE_GENERIC */ 00152 00153 00154 00155 00156 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */