doc/doxygen-3.6/volk__32f__s32f__convert__32i__u_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
00002 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009   /*!
00010     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00011     \param inputVector The floating point input data buffer
00012     \param outputVector The 32 bit output data buffer
00013     \param scalar The value multiplied against each point in the input buffer
00014     \param num_points The number of data values to be converted
00015     \note Input buffer does NOT need to be properly aligned
00016   */
00017 static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00018   unsigned int number = 0;
00019
00020   const unsigned int quarterPoints = num_points / 4;
00021
00022   const float* inputVectorPtr = (const float*)inputVector;
00023   int32_t* outputVectorPtr = outputVector;
00024
00025   float min_val = -2147483647;
00026   float max_val = 2147483647;
00027   float r;
00028
00029   __m128 vScalar = _mm_set_ps1(scalar);
00030   __m128 inputVal1;
00031   __m128i intInputVal1;
00032   __m128 vmin_val = _mm_set_ps1(min_val);
00033   __m128 vmax_val = _mm_set_ps1(max_val);
00034
00035   for(;number < quarterPoints; number++){
00036     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00037
00038     inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00039     intInputVal1 = _mm_cvtps_epi32(inputVal1);
00040
00041     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
00042     outputVectorPtr += 4;
00043   }
00044
00045   number = quarterPoints * 4;
00046   for(; number < num_points; number++){
00047     r = inputVector[number] * scalar;
00048     if(r > max_val)
00049       r = max_val;
00050     else if(r < min_val)
00051       r = min_val;
00052     outputVector[number] = (int32_t)(r);
00053   }
00054 }
00055 #endif /* LV_HAVE_SSE2 */
00056
00057 #ifdef LV_HAVE_SSE
00058 #include <xmmintrin.h>
00059   /*!
00060     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00061     \param inputVector The floating point input data buffer
00062     \param outputVector The 32 bit output data buffer
00063     \param scalar The value multiplied against each point in the input buffer
00064     \param num_points The number of data values to be converted
00065     \note Input buffer does NOT need to be properly aligned
00066   */
00067 static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00068   unsigned int number = 0;
00069
00070   const unsigned int quarterPoints = num_points / 4;
00071
00072   const float* inputVectorPtr = (const float*)inputVector;
00073   int32_t* outputVectorPtr = outputVector;
00074
00075   float min_val = -2147483647;
00076   float max_val = 2147483647;
00077   float r;
00078
00079   __m128 vScalar = _mm_set_ps1(scalar);
00080   __m128 ret;
00081   __m128 vmin_val = _mm_set_ps1(min_val);
00082   __m128 vmax_val = _mm_set_ps1(max_val);
00083
00084   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00085
00086   for(;number < quarterPoints; number++){
00087     ret = _mm_loadu_ps(inputVectorPtr);
00088     inputVectorPtr += 4;
00089
00090     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00091
00092     _mm_store_ps(outputFloatBuffer, ret);
00093     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
00094     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
00095     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
00096     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
00097   }
00098
00099   number = quarterPoints * 4;
00100   for(; number < num_points; number++){
00101     r = inputVector[number] * scalar;
00102     if(r > max_val)
00103       r = max_val;
00104     else if(r < min_val)
00105       r = min_val;
00106     outputVector[number] = (int32_t)(r);
00107   }
00108 }
00109 #endif /* LV_HAVE_SSE */
00110
00111 #ifdef LV_HAVE_GENERIC
00112   /*!
00113     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00114     \param inputVector The floating point input data buffer
00115     \param outputVector The 32 bit output data buffer
00116     \param scalar The value multiplied against each point in the input buffer
00117     \param num_points The number of data values to be converted
00118     \note Input buffer does NOT need to be properly aligned
00119   */
00120 static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00121   int32_t* outputVectorPtr = outputVector;
00122   const float* inputVectorPtr = inputVector;
00123   unsigned int number = 0;
00124   float min_val = -2147483647;
00125   float max_val = 2147483647;
00126   float r;
00127
00128   for(number = 0; number < num_points; number++){
00129     r = *inputVectorPtr++ * scalar;
00130     if(r > max_val)
00131       r = max_val;
00132     else if(r < min_val)
00133       r = min_val;
00134     *outputVectorPtr++ = (int32_t)(r);
00135   }
00136 }
00137 #endif /* LV_HAVE_GENERIC */
00138
00139
00140
00141
00142 #endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */