doc/doxygen-3.6/volk__32f__s32f__convert__8i__u_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
00002 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009   /*!
00010     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00011     \param inputVector The floating point input data buffer
00012     \param outputVector The 8 bit output data buffer
00013     \param scalar The value multiplied against each point in the input buffer
00014     \param num_points The number of data values to be converted
00015     \note Input buffer does NOT need to be properly aligned
00016   */
00017 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00018   unsigned int number = 0;
00019
00020   const unsigned int sixteenthPoints = num_points / 16;
00021
00022   const float* inputVectorPtr = (const float*)inputVector;
00023   int8_t* outputVectorPtr = outputVector;
00024
00025   float min_val = -128;
00026   float max_val = 127;
00027   float r;
00028
00029   __m128 vScalar = _mm_set_ps1(scalar);
00030   __m128 inputVal1, inputVal2, inputVal3, inputVal4;
00031   __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
00032   __m128 vmin_val = _mm_set_ps1(min_val);
00033   __m128 vmax_val = _mm_set_ps1(max_val);
00034
00035   for(;number < sixteenthPoints; number++){
00036     inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00037     inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00038     inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00039     inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
00040
00041     inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00042     inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
00043     inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
00044     inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
00045
00046     intInputVal1 = _mm_cvtps_epi32(inputVal1);
00047     intInputVal2 = _mm_cvtps_epi32(inputVal2);
00048     intInputVal3 = _mm_cvtps_epi32(inputVal3);
00049     intInputVal4 = _mm_cvtps_epi32(inputVal4);
00050
00051     intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
00052     intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
00053
00054     intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
00055
00056     _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
00057     outputVectorPtr += 16;
00058   }
00059
00060   number = sixteenthPoints * 16;
00061   for(; number < num_points; number++){
00062     r = inputVector[number] * scalar;
00063     if(r > max_val)
00064       r = max_val;
00065     else if(r < min_val)
00066       r = min_val;
00067     outputVector[number] = (int16_t)(r);
00068   }
00069 }
00070 #endif /* LV_HAVE_SSE2 */
00071
00072 #ifdef LV_HAVE_SSE
00073 #include <xmmintrin.h>
00074   /*!
00075     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00076     \param inputVector The floating point input data buffer
00077     \param outputVector The 8 bit output data buffer
00078     \param scalar The value multiplied against each point in the input buffer
00079     \param num_points The number of data values to be converted
00080     \note Input buffer does NOT need to be properly aligned
00081   */
00082 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00083   unsigned int number = 0;
00084
00085   const unsigned int quarterPoints = num_points / 4;
00086
00087   const float* inputVectorPtr = (const float*)inputVector;
00088   int8_t* outputVectorPtr = outputVector;
00089
00090   float min_val = -128;
00091   float max_val = 127;
00092   float r;
00093
00094   __m128 vScalar = _mm_set_ps1(scalar);
00095   __m128 ret;
00096   __m128 vmin_val = _mm_set_ps1(min_val);
00097   __m128 vmax_val = _mm_set_ps1(max_val);
00098
00099   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00100
00101   for(;number < quarterPoints; number++){
00102     ret = _mm_loadu_ps(inputVectorPtr);
00103     inputVectorPtr += 4;
00104
00105     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00106
00107     _mm_store_ps(outputFloatBuffer, ret);
00108     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
00109     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
00110     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
00111     *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
00112   }
00113
00114   number = quarterPoints * 4;
00115   for(; number < num_points; number++){
00116     r = inputVector[number] * scalar;
00117     if(r > max_val)
00118       r = max_val;
00119     else if(r < min_val)
00120       r = min_val;
00121     outputVector[number] = (int16_t)(r);
00122   }
00123 }
00124 #endif /* LV_HAVE_SSE */
00125
00126 #ifdef LV_HAVE_GENERIC
00127   /*!
00128     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
00129     \param inputVector The floating point input data buffer
00130     \param outputVector The 8 bit output data buffer
00131     \param scalar The value multiplied against each point in the input buffer
00132     \param num_points The number of data values to be converted
00133     \note Input buffer does NOT need to be properly aligned
00134   */
00135 static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00136   int8_t* outputVectorPtr = outputVector;
00137   const float* inputVectorPtr = inputVector;
00138   unsigned int number = 0;
00139   float min_val = -128;
00140   float max_val = 127;
00141   float r;
00142
00143   for(number = 0; number < num_points; number++){
00144     r = *inputVectorPtr++ * scalar;
00145     if(r > max_val)
00146       r = max_val;
00147     else if(r < min_val)
00148       r = min_val;
00149     *outputVectorPtr++ = (int16_t)(r);
00150   }
00151 }
00152 #endif /* LV_HAVE_GENERIC */
00153
00154
00155
00156
00157 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */