doc/doxygen-3.6/volk__32f__s32f__convert__8i__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
00002 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007
00008 #ifdef LV_HAVE_SSE2
00009 #include <emmintrin.h>
00010   /*!
00011     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00012     \param inputVector The floating point input data buffer
00013     \param outputVector The 8 bit output data buffer
00014     \param scalar The value multiplied against each point in the input buffer
00015     \param num_points The number of data values to be converted
00016   */
00017 static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00018   unsigned int number = 0;
00019
00020   const unsigned int sixteenthPoints = num_points / 16;
00021
00022   const float* inputVectorPtr = (const float*)inputVector;
00023   int8_t* outputVectorPtr = outputVector;
00024
00025   float min_val = -128;
00026   float max_val = 127;
00027   float r;
00028
00029   __m128 vScalar = _mm_set_ps1(scalar);
00030   __m128 inputVal1, inputVal2, inputVal3, inputVal4;
00031   __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
00032   __m128 vmin_val = _mm_set_ps1(min_val);
00033   __m128 vmax_val = _mm_set_ps1(max_val);
00034
00035   for(;number < sixteenthPoints; number++){
00036     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00037     inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00038     inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00039     inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00040
00041     inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00042     inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
00043     inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
00044     inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
00045
00046     intInputVal1 = _mm_cvtps_epi32(inputVal1);
00047     intInputVal2 = _mm_cvtps_epi32(inputVal2);
00048     intInputVal3 = _mm_cvtps_epi32(inputVal3);
00049     intInputVal4 = _mm_cvtps_epi32(inputVal4);
00050
00051     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
00052     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
00053
00054     intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
00055
00056     _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
00057     outputVectorPtr += 16;
00058   }
00059
00060   number = sixteenthPoints * 16;
00061   for(; number < num_points; number++){
00062     r = inputVector[number] * scalar;
00063     if(r > max_val)
00064       r = max_val;
00065     else if(r < min_val)
00066       r = min_val;
00067     outputVector[number] = (int8_t)(r);
00068   }
00069 }
00070 #endif /* LV_HAVE_SSE2 */
00071
00072 #ifdef LV_HAVE_SSE
00073 #include <xmmintrin.h>
00074   /*!
00075     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00076     \param inputVector The floating point input data buffer
00077     \param outputVector The 8 bit output data buffer
00078     \param scalar The value multiplied against each point in the input buffer
00079     \param num_points The number of data values to be converted
00080   */
00081 static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00082   unsigned int number = 0;
00083
00084   const unsigned int quarterPoints = num_points / 4;
00085
00086   const float* inputVectorPtr = (const float*)inputVector;
00087
00088   float min_val = -128;
00089   float max_val = 127;
00090   float r;
00091
00092   int8_t* outputVectorPtr = outputVector;
00093   __m128 vScalar = _mm_set_ps1(scalar);
00094   __m128 ret;
00095   __m128 vmin_val = _mm_set_ps1(min_val);
00096   __m128 vmax_val = _mm_set_ps1(max_val);
00097
00098   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00099
00100   for(;number < quarterPoints; number++){
00101     ret = _mm_load_ps(inputVectorPtr);
00102     inputVectorPtr += 4;
00103
00104     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00105
00106     _mm_store_ps(outputFloatBuffer, ret);
00107     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
00108     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
00109     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
00110     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
00111   }
00112
00113   number = quarterPoints * 4;
00114   for(; number < num_points; number++){
00115     r = inputVector[number] * scalar;
00116     if(r > max_val)
00117       r = max_val;
00118     else if(r < min_val)
00119       r = min_val;
00120     outputVector[number] = (int8_t)(r);
00121   }
00122 }
00123 #endif /* LV_HAVE_SSE */
00124
00125 #ifdef LV_HAVE_GENERIC
00126   /*!
00127     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00128     \param inputVector The floating point input data buffer
00129     \param outputVector The 8 bit output data buffer
00130     \param scalar The value multiplied against each point in the input buffer
00131     \param num_points The number of data values to be converted
00132   */
00133 static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00134   int8_t* outputVectorPtr = outputVector;
00135   const float* inputVectorPtr = inputVector;
00136   unsigned int number = 0;
00137   float min_val = -128;
00138   float max_val = 127;
00139   float r;
00140
00141   for(number = 0; number < num_points; number++){
00142     r = *inputVectorPtr++ * scalar;
00143     if(r > max_val)
00144       r = max_val;
00145     else if(r < min_val)
00146       r = min_val;
00147     *outputVectorPtr++ = (int8_t)(r);
00148   }
00149 }
00150 #endif /* LV_HAVE_GENERIC */
00151
00152
00153
00154
00155 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */