doc/doxygen-3.6/volk__32f__s32f__convert__32i__a_8h_source.html

00001 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
00002 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
00003
00004 #include <volk/volk_common.h>
00005 #include <inttypes.h>
00006 #include <stdio.h>
00007
00008 #ifdef LV_HAVE_AVX
00009 #include <immintrin.h>
00010   /*!
00011     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00012     \param inputVector The floating point input data buffer
00013     \param outputVector The 32 bit output data buffer
00014     \param scalar The value multiplied against each point in the input buffer
00015     \param num_points The number of data values to be converted
00016   */
00017 static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00018   unsigned int number = 0;
00019
00020   const unsigned int eighthPoints = num_points / 8;
00021
00022   const float* inputVectorPtr = (const float*)inputVector;
00023   int32_t* outputVectorPtr = outputVector;
00024
00025   float min_val = -2147483647;
00026   float max_val = 2147483647;
00027   float r;
00028
00029   __m256 vScalar = _mm256_set1_ps(scalar);
00030   __m256 inputVal1;
00031   __m256i intInputVal1;
00032   __m256 vmin_val = _mm256_set1_ps(min_val);
00033   __m256 vmax_val = _mm256_set1_ps(max_val);
00034
00035   for(;number < eighthPoints; number++){
00036     inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
00037
00038     inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00039     intInputVal1 = _mm256_cvtps_epi32(inputVal1);
00040
00041     _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
00042     outputVectorPtr += 8;
00043   }
00044
00045   number = eighthPoints * 8;
00046   for(; number < num_points; number++){
00047     r = inputVector[number] * scalar;
00048     if(r > max_val)
00049       r = max_val;
00050     else if(r < min_val)
00051       r = min_val;
00052     outputVector[number] = (int32_t)(r);
00053   }
00054 }
00055 #endif /* LV_HAVE_AVX */
00056
00057 #ifdef LV_HAVE_SSE2
00058 #include <emmintrin.h>
00059   /*!
00060     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00061     \param inputVector The floating point input data buffer
00062     \param outputVector The 32 bit output data buffer
00063     \param scalar The value multiplied against each point in the input buffer
00064     \param num_points The number of data values to be converted
00065   */
00066 static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00067   unsigned int number = 0;
00068
00069   const unsigned int quarterPoints = num_points / 4;
00070
00071   const float* inputVectorPtr = (const float*)inputVector;
00072   int32_t* outputVectorPtr = outputVector;
00073
00074   float min_val = -2147483647;
00075   float max_val = 2147483647;
00076   float r;
00077
00078   __m128 vScalar = _mm_set_ps1(scalar);
00079   __m128 inputVal1;
00080   __m128i intInputVal1;
00081   __m128 vmin_val = _mm_set_ps1(min_val);
00082   __m128 vmax_val = _mm_set_ps1(max_val);
00083
00084   for(;number < quarterPoints; number++){
00085     inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
00086
00087     inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
00088     intInputVal1 = _mm_cvtps_epi32(inputVal1);
00089
00090     _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
00091     outputVectorPtr += 4;
00092   }
00093
00094   number = quarterPoints * 4;
00095   for(; number < num_points; number++){
00096     r = inputVector[number] * scalar;
00097     if(r > max_val)
00098       r = max_val;
00099     else if(r < min_val)
00100       r = min_val;
00101     outputVector[number] = (int32_t)(r);
00102   }
00103 }
00104 #endif /* LV_HAVE_SSE2 */
00105
00106 #ifdef LV_HAVE_SSE
00107 #include <xmmintrin.h>
00108   /*!
00109     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00110     \param inputVector The floating point input data buffer
00111     \param outputVector The 32 bit output data buffer
00112     \param scalar The value multiplied against each point in the input buffer
00113     \param num_points The number of data values to be converted
00114   */
00115 static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00116   unsigned int number = 0;
00117
00118   const unsigned int quarterPoints = num_points / 4;
00119
00120   const float* inputVectorPtr = (const float*)inputVector;
00121   int32_t* outputVectorPtr = outputVector;
00122
00123   float min_val = -2147483647;
00124   float max_val = 2147483647;
00125   float r;
00126
00127   __m128 vScalar = _mm_set_ps1(scalar);
00128   __m128 ret;
00129   __m128 vmin_val = _mm_set_ps1(min_val);
00130   __m128 vmax_val = _mm_set_ps1(max_val);
00131
00132   __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
00133
00134   for(;number < quarterPoints; number++){
00135     ret = _mm_load_ps(inputVectorPtr);
00136     inputVectorPtr += 4;
00137
00138     ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
00139
00140     _mm_store_ps(outputFloatBuffer, ret);
00141     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
00142     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
00143     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
00144     *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
00145   }
00146
00147   number = quarterPoints * 4;
00148   for(; number < num_points; number++){
00149     r = inputVector[number] * scalar;
00150     if(r > max_val)
00151       r = max_val;
00152     else if(r < min_val)
00153       r = min_val;
00154     outputVector[number] = (int32_t)(r);
00155   }
00156 }
00157 #endif /* LV_HAVE_SSE */
00158
00159 #ifdef LV_HAVE_GENERIC
00160   /*!
00161     \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value
00162     \param inputVector The floating point input data buffer
00163     \param outputVector The 32 bit output data buffer
00164     \param scalar The value multiplied against each point in the input buffer
00165     \param num_points The number of data values to be converted
00166   */
00167 static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
00168   int32_t* outputVectorPtr = outputVector;
00169   const float* inputVectorPtr = inputVector;
00170   unsigned int number = 0;
00171   float min_val = -2147483647;
00172   float max_val = 2147483647;
00173   float r;
00174
00175   for(number = 0; number < num_points; number++){
00176     r = *inputVectorPtr++ * scalar;
00177     if(r > max_val)
00178       r = max_val;
00179     else if(r < min_val)
00180       r = min_val;
00181     *outputVectorPtr++ = (int32_t)(r);
00182   }
00183 }
00184 #endif /* LV_HAVE_GENERIC */
00185
00186
00187
00188
00189 #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */