doc/doxygen-3.6/volk__32f__s32f__convert__16i__u_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
00002 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <math.h>
00007
00008 #ifdef LV_HAVE_SSE2
00009 #include <emmintrin.h>
00010   /*!
00011     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00012     \param inputVector The floating point input data buffer
00013     \param outputVector The 16 bit output data buffer
00014     \param scalar The value multiplied against each point in the input buffer
00015     \param num_points The number of data values to be converted
00016     \note Input buffer does NOT need to be properly aligned
00017   */
00018 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00019   unsigned int number = 0;
00020
00021   const unsigned int eighthPoints = num_points / 8;
00022
00023   const float* inputVectorPtr = (const float*)inputVector;
00024   int16_t* outputVectorPtr = outputVector;
00025
00026   float min_val = -32768;
00027   float max_val = 32767;
00028   float r;
00029
00030   __m128 vScalar = _mm_set_ps1(scalar);
00031   __m128 inputVal1, inputVal2;
00032   __m128i intInputVal1, intInputVal2;
00033   __m128 ret1, ret2;
00034   __m128 vmin_val = _mm_set_ps1(min_val);
00035   __m128 vmax_val = _mm_set_ps1(max_val);
00036
00037   for(;number < eighthPoints; number++){
00038     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00039     inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00040
00041     // Scale and clip
00042     ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00043     ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
00044
00045     intInputVal1 = _mm_cvtps_epi32(ret1);
00046     intInputVal2 = _mm_cvtps_epi32(ret2);
00047
00048     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
00049
00050     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
00051     outputVectorPtr += 8;
00052   }
00053
00054   number = eighthPoints * 8;
00055   for(; number < num_points; number++){
00056     r = inputVector[number] * scalar;
00057     if(r > max_val)
00058       r = max_val;
00059     else if(r < min_val)
00060       r = min_val;
00061     outputVector[number] = (int16_t)rintf(r);
00062   }
00063 }
00064 #endif /* LV_HAVE_SSE2 */
00065
00066 #ifdef LV_HAVE_SSE
00067 #include <xmmintrin.h>
00068   /*!
00069     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00070     \param inputVector The floating point input data buffer
00071     \param outputVector The 16 bit output data buffer
00072     \param scalar The value multiplied against each point in the input buffer
00073     \param num_points The number of data values to be converted
00074     \note Input buffer does NOT need to be properly aligned
00075   */
00076 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00077   unsigned int number = 0;
00078
00079   const unsigned int quarterPoints = num_points / 4;
00080
00081   const float* inputVectorPtr = (const float*)inputVector;
00082   int16_t* outputVectorPtr = outputVector;
00083
00084   float min_val = -32768;
00085   float max_val = 32767;
00086   float r;
00087
00088   __m128 vScalar = _mm_set_ps1(scalar);
00089   __m128 ret;
00090   __m128 vmin_val = _mm_set_ps1(min_val);
00091   __m128 vmax_val = _mm_set_ps1(max_val);
00092
00093   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00094
00095   for(;number < quarterPoints; number++){
00096     ret = _mm_loadu_ps(inputVectorPtr);
00097     inputVectorPtr += 4;
00098
00099     // Scale and clip
00100     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00101
00102     _mm_store_ps(outputFloatBuffer, ret);
00103     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
00104     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
00105     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
00106     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
00107   }
00108
00109   number = quarterPoints * 4;
00110   for(; number < num_points; number++){
00111     r = inputVector[number] * scalar;
00112     if(r > max_val)
00113       r = max_val;
00114     else if(r < min_val)
00115       r = min_val;
00116     outputVector[number] = (int16_t)rintf(r);
00117   }
00118 }
00119 #endif /* LV_HAVE_SSE */
00120
00121 #ifdef LV_HAVE_GENERIC
00122   /*!
00123     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00124     \param inputVector The floating point input data buffer
00125     \param outputVector The 16 bit output data buffer
00126     \param scalar The value multiplied against each point in the input buffer
00127     \param num_points The number of data values to be converted
00128     \note Input buffer does NOT need to be properly aligned
00129   */
00130 static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00131   int16_t* outputVectorPtr = outputVector;
00132   const float* inputVectorPtr = inputVector;
00133   unsigned int number = 0;
00134   float min_val = -32768;
00135   float max_val = 32767;
00136   float r;
00137
00138   for(number = 0; number < num_points; number++){
00139     r = *inputVectorPtr++  * scalar;
00140     if(r > max_val)
00141       r = max_val;
00142     else if(r < min_val)
00143       r = min_val;
00144     *outputVectorPtr++ = (int16_t)rintf(r);
00145   }
00146 }
00147 #endif /* LV_HAVE_GENERIC */
00148
00149
00150
00151
00152 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */