doc/doxygen-3.6/volk__32f__s32f__convert__16i__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
00002 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007 #include <math.h>
00008
00009 #ifdef LV_HAVE_SSE2
00010 #include <emmintrin.h>
00011   /*!
00012     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00013     \param inputVector The floating point input data buffer
00014     \param outputVector The 16 bit output data buffer
00015     \param scalar The value multiplied against each point in the input buffer
00016     \param num_points The number of data values to be converted
00017   */
00018 static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00019   unsigned int number = 0;
00020
00021   const unsigned int eighthPoints = num_points / 8;
00022
00023   const float* inputVectorPtr = (const float*)inputVector;
00024   int16_t* outputVectorPtr = outputVector;
00025
00026   float min_val = -32768;
00027   float max_val = 32767;
00028   float r;
00029
00030   __m128 vScalar = _mm_set_ps1(scalar);
00031   __m128 inputVal1, inputVal2;
00032   __m128i intInputVal1, intInputVal2;
00033   __m128 ret1, ret2;
00034   __m128 vmin_val = _mm_set_ps1(min_val);
00035   __m128 vmax_val = _mm_set_ps1(max_val);
00036
00037   for(;number < eighthPoints; number++){
00038     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00039     inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00040
00041     // Scale and clip
00042     ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00043     ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
00044
00045     intInputVal1 = _mm_cvtps_epi32(ret1);
00046     intInputVal2 = _mm_cvtps_epi32(ret2);
00047
00048     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
00049
00050     _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
00051     outputVectorPtr += 8;
00052   }
00053
00054   number = eighthPoints * 8;
00055   for(; number < num_points; number++){
00056     r = inputVector[number] * scalar;
00057     if(r > max_val)
00058       r = max_val;
00059     else if(r < min_val)
00060       r = min_val;
00061     outputVector[number] = (int16_t)rintf(r);
00062   }
00063 }
00064 #endif /* LV_HAVE_SSE2 */
00065
00066 #ifdef LV_HAVE_SSE
00067 #include <xmmintrin.h>
00068   /*!
00069     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00070     \param inputVector The floating point input data buffer
00071     \param outputVector The 16 bit output data buffer
00072     \param scalar The value multiplied against each point in the input buffer
00073     \param num_points The number of data values to be converted
00074   */
00075 static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00076   unsigned int number = 0;
00077
00078   const unsigned int quarterPoints = num_points / 4;
00079
00080   const float* inputVectorPtr = (const float*)inputVector;
00081   int16_t* outputVectorPtr = outputVector;
00082
00083   float min_val = -32768;
00084   float max_val = 32767;
00085   float r;
00086
00087   __m128 vScalar = _mm_set_ps1(scalar);
00088   __m128 ret;
00089   __m128 vmin_val = _mm_set_ps1(min_val);
00090   __m128 vmax_val = _mm_set_ps1(max_val);
00091
00092   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00093
00094   for(;number < quarterPoints; number++){
00095     ret = _mm_load_ps(inputVectorPtr);
00096     inputVectorPtr += 4;
00097
00098     // Scale and clip
00099     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00100
00101     _mm_store_ps(outputFloatBuffer, ret);
00102     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
00103     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
00104     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
00105     *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
00106   }
00107
00108   number = quarterPoints * 4;
00109   for(; number < num_points; number++){
00110     r = inputVector[number] * scalar;
00111     if(r > max_val)
00112       r = max_val;
00113     else if(r < min_val)
00114       r = min_val;
00115     outputVector[number] = (int16_t)rintf(r);
00116   }
00117 }
00118 #endif /* LV_HAVE_SSE */
00119
00120 #ifdef LV_HAVE_GENERIC
00121   /*!
00122     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
00123     \param inputVector The floating point input data buffer
00124     \param outputVector The 16 bit output data buffer
00125     \param scalar The value multiplied against each point in the input buffer
00126     \param num_points The number of data values to be converted
00127   */
00128 static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00129   int16_t* outputVectorPtr = outputVector;
00130   const float* inputVectorPtr = inputVector;
00131   unsigned int number = 0;
00132   float min_val = -32768;
00133   float max_val = 32767;
00134   float r;
00135
00136   for(number = 0; number < num_points; number++){
00137     r  = *inputVectorPtr++ * scalar;
00138     if(r < min_val)
00139       r = min_val;
00140     else if(r > max_val)
00141       r = max_val;
00142     *outputVectorPtr++ = (int16_t)rintf(r);
00143   }
00144 }
00145 #endif /* LV_HAVE_GENERIC */
00146
00147
00148
00149
00150 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */