GNU Radio 3.5.3.2 C++ API
volk_32f_s32f_convert_16i_u.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
00002 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007 
00008 #ifdef LV_HAVE_SSE2
00009 #include <emmintrin.h>
00010   /*!
00011     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00012     \param inputVector The floating point input data buffer
00013     \param outputVector The 16 bit output data buffer
00014     \param scalar The value multiplied against each point in the input buffer
00015     \param num_points The number of data values to be converted
00016     \note Input buffer does NOT need to be properly aligned
00017   */
00018 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00019   unsigned int number = 0;
00020 
00021   const unsigned int eighthPoints = num_points / 8;
00022     
00023   const float* inputVectorPtr = (const float*)inputVector;
00024   int16_t* outputVectorPtr = outputVector;
00025 
00026   float min_val = -32768;
00027   float max_val = 32767;
00028   float r;
00029 
00030   __m128 vScalar = _mm_set_ps1(scalar);
00031   __m128 inputVal1, inputVal2;
00032   __m128i intInputVal1, intInputVal2;
00033   __m128 ret1, ret2;
00034   __m128 vmin_val = _mm_set_ps1(min_val);
00035   __m128 vmax_val = _mm_set_ps1(max_val);
00036 
00037   for(;number < eighthPoints; number++){
00038     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00039     inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00040 
00041     // Scale and clip
00042     ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00043     ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
00044 
00045     intInputVal1 = _mm_cvtps_epi32(ret1);
00046     intInputVal2 = _mm_cvtps_epi32(ret2);
00047 
00048     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
00049 
00050     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
00051     outputVectorPtr += 8;
00052   }
00053 
00054   number = eighthPoints * 8;    
00055   for(; number < num_points; number++){
00056     r = inputVector[number] * scalar;
00057     if(r > max_val)
00058       r = max_val;
00059     else if(r < min_val)
00060       r = min_val;
00061     outputVector[number] = (int16_t)rintf(r);
00062   }
00063 }
00064 #endif /* LV_HAVE_SSE2 */
00065 
00066 #ifdef LV_HAVE_SSE
00067 #include <xmmintrin.h>
00068   /*!
00069     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00070     \param inputVector The floating point input data buffer
00071     \param outputVector The 16 bit output data buffer
00072     \param scalar The value multiplied against each point in the input buffer
00073     \param num_points The number of data values to be converted
00074     \note Input buffer does NOT need to be properly aligned
00075   */
00076 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00077   unsigned int number = 0;
00078 
00079   const unsigned int quarterPoints = num_points / 4;
00080     
00081   const float* inputVectorPtr = (const float*)inputVector;
00082   int16_t* outputVectorPtr = outputVector;
00083 
00084   float min_val = -32768;
00085   float max_val = 32767;
00086   float r;
00087 
00088   __m128 vScalar = _mm_set_ps1(scalar);
00089   __m128 ret;
00090   __m128 vmin_val = _mm_set_ps1(min_val);
00091   __m128 vmax_val = _mm_set_ps1(max_val);
00092 
00093   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00094 
00095   for(;number < quarterPoints; number++){
00096     ret = _mm_loadu_ps(inputVectorPtr);
00097     inputVectorPtr += 4;
00098 
00099     // Scale and clip
00100     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00101 
00102     _mm_store_ps(outputFloatBuffer, ret);
00103     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
00104     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
00105     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
00106     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
00107   }
00108 
00109   number = quarterPoints * 4;    
00110   for(; number < num_points; number++){
00111     r = inputVector[number] * scalar;
00112     if(r > max_val)
00113       r = max_val;
00114     else if(r < min_val)
00115       r = min_val;
00116     outputVector[number] = (int16_t)rintf(r);
00117   }
00118 }
00119 #endif /* LV_HAVE_SSE */
00120 
00121 #ifdef LV_HAVE_GENERIC
00122   /*!
00123     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00124     \param inputVector The floating point input data buffer
00125     \param outputVector The 16 bit output data buffer
00126     \param scalar The value multiplied against each point in the input buffer
00127     \param num_points The number of data values to be converted
00128     \note Input buffer does NOT need to be properly aligned
00129   */
00130 static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00131   int16_t* outputVectorPtr = outputVector;
00132   const float* inputVectorPtr = inputVector;
00133   unsigned int number = 0;
00134   float min_val = -32768;
00135   float max_val = 32767;
00136   float r;
00137 
00138   for(number = 0; number < num_points; number++){
00139     r = *inputVectorPtr++  * scalar;
00140     if(r > max_val)
00141       r = max_val;
00142     else if(r < min_val)
00143       r = min_val;
00144     *outputVectorPtr++ = (int16_t)rintf(r);
00145   }
00146 }
00147 #endif /* LV_HAVE_GENERIC */
00148 
00149 
00150 
00151 
00152 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */