doc/doxygen-3.6/volk__8i__s32f__convert__32f__u_8h_source.html

00001 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
00002 #define INCLUDED_volk_8i_s32f_convert_32f_u_H
00003
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006
00007 #ifdef LV_HAVE_SSE4_1
00008 #include <smmintrin.h>
00009
00010   /*!
00011     \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
00012     \param inputVector The 8 bit input data buffer
00013     \param outputVector The floating point output data buffer
00014     \param scalar The value divided against each point in the output buffer
00015     \param num_points The number of data values to be converted
00016     \note Output buffer does NOT need to be properly aligned
00017   */
00018 static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
00019     unsigned int number = 0;
00020     const unsigned int sixteenthPoints = num_points / 16;
00021
00022     float* outputVectorPtr = outputVector;
00023     const float iScalar = 1.0 / scalar;
00024     __m128 invScalar = _mm_set_ps1( iScalar );
00025     const int8_t* inputVectorPtr = inputVector;
00026     __m128 ret;
00027     __m128i inputVal;
00028     __m128i interimVal;
00029
00030     for(;number < sixteenthPoints; number++){
00031       inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
00032
00033       interimVal = _mm_cvtepi8_epi32(inputVal);
00034       ret = _mm_cvtepi32_ps(interimVal);
00035       ret = _mm_mul_ps(ret, invScalar);
00036       _mm_storeu_ps(outputVectorPtr, ret);
00037       outputVectorPtr += 4;
00038
00039       inputVal = _mm_srli_si128(inputVal, 4);
00040       interimVal = _mm_cvtepi8_epi32(inputVal);
00041       ret = _mm_cvtepi32_ps(interimVal);
00042       ret = _mm_mul_ps(ret, invScalar);
00043       _mm_storeu_ps(outputVectorPtr, ret);
00044       outputVectorPtr += 4;
00045
00046       inputVal = _mm_srli_si128(inputVal, 4);
00047       interimVal = _mm_cvtepi8_epi32(inputVal);
00048       ret = _mm_cvtepi32_ps(interimVal);
00049       ret = _mm_mul_ps(ret, invScalar);
00050       _mm_storeu_ps(outputVectorPtr, ret);
00051       outputVectorPtr += 4;
00052
00053       inputVal = _mm_srli_si128(inputVal, 4);
00054       interimVal = _mm_cvtepi8_epi32(inputVal);
00055       ret = _mm_cvtepi32_ps(interimVal);
00056       ret = _mm_mul_ps(ret, invScalar);
00057       _mm_storeu_ps(outputVectorPtr, ret);
00058       outputVectorPtr += 4;
00059
00060       inputVectorPtr += 16;
00061     }
00062
00063     number = sixteenthPoints * 16;
00064     for(; number < num_points; number++){
00065       outputVector[number] = (float)(inputVector[number]) * iScalar;
00066     }
00067 }
00068 #endif /* LV_HAVE_SSE4_1 */
00069
00070 #ifdef LV_HAVE_GENERIC
00071   /*!
00072     \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
00073     \param inputVector The 8 bit input data buffer
00074     \param outputVector The floating point output data buffer
00075     \param scalar The value divided against each point in the output buffer
00076     \param num_points The number of data values to be converted
00077     \note Output buffer does NOT need to be properly aligned
00078   */
00079 static inline void volk_8i_s32f_convert_32f_u_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){
00080   float* outputVectorPtr = outputVector;
00081   const int8_t* inputVectorPtr = inputVector;
00082   unsigned int number = 0;
00083   const float iScalar = 1.0 / scalar;
00084
00085   for(number = 0; number < num_points; number++){
00086     *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
00087   }
00088 }
00089 #endif /* LV_HAVE_GENERIC */
00090
00091
00092
00093
00094 #endif /* INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED8_H */