GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_16i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #ifdef LV_HAVE_SSE2 00009 #include <emmintrin.h> 00010 /*! 00011 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00012 \param inputVector The floating point input data buffer 00013 \param outputVector The 16 bit output data buffer 00014 \param scalar The value multiplied against each point in the input buffer 00015 \param num_points The number of data values to be converted 00016 \note Input buffer does NOT need to be properly aligned 00017 */ 00018 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00019 unsigned int number = 0; 00020 00021 const unsigned int eighthPoints = num_points / 8; 00022 00023 const float* inputVectorPtr = (const float*)inputVector; 00024 int16_t* outputVectorPtr = outputVector; 00025 00026 float min_val = -32768; 00027 float max_val = 32767; 00028 float r; 00029 00030 __m128 vScalar = _mm_set_ps1(scalar); 00031 __m128 inputVal1, inputVal2; 00032 __m128i intInputVal1, intInputVal2; 00033 __m128 ret1, ret2; 00034 __m128 vmin_val = _mm_set_ps1(min_val); 00035 __m128 vmax_val = _mm_set_ps1(max_val); 00036 00037 for(;number < eighthPoints; number++){ 00038 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00039 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00040 00041 // Scale and clip 00042 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00043 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00044 00045 intInputVal1 = _mm_cvtps_epi32(ret1); 00046 intInputVal2 = _mm_cvtps_epi32(ret2); 00047 00048 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00049 00050 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00051 outputVectorPtr += 8; 00052 } 00053 00054 number = eighthPoints * 8; 00055 for(; number < num_points; number++){ 00056 r = inputVector[number] * scalar; 00057 if(r > max_val) 00058 r = max_val; 00059 else if(r < min_val) 00060 r = min_val; 00061 outputVector[number] = (int16_t)rintf(r); 00062 } 00063 } 00064 #endif /* LV_HAVE_SSE2 */ 00065 00066 #ifdef LV_HAVE_SSE 00067 #include <xmmintrin.h> 00068 /*! 00069 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00070 \param inputVector The floating point input data buffer 00071 \param outputVector The 16 bit output data buffer 00072 \param scalar The value multiplied against each point in the input buffer 00073 \param num_points The number of data values to be converted 00074 \note Input buffer does NOT need to be properly aligned 00075 */ 00076 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00077 unsigned int number = 0; 00078 00079 const unsigned int quarterPoints = num_points / 4; 00080 00081 const float* inputVectorPtr = (const float*)inputVector; 00082 int16_t* outputVectorPtr = outputVector; 00083 00084 float min_val = -32768; 00085 float max_val = 32767; 00086 float r; 00087 00088 __m128 vScalar = _mm_set_ps1(scalar); 00089 __m128 ret; 00090 __m128 vmin_val = _mm_set_ps1(min_val); 00091 __m128 vmax_val = _mm_set_ps1(max_val); 00092 00093 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00094 00095 for(;number < quarterPoints; number++){ 00096 ret = _mm_loadu_ps(inputVectorPtr); 00097 inputVectorPtr += 4; 00098 00099 // Scale and clip 00100 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00101 00102 _mm_store_ps(outputFloatBuffer, ret); 00103 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); 00104 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); 00105 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); 00106 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); 00107 } 00108 00109 number = quarterPoints * 4; 00110 for(; number < num_points; number++){ 00111 r = inputVector[number] * scalar; 00112 if(r > max_val) 00113 r = max_val; 00114 else if(r < min_val) 00115 r = min_val; 00116 outputVector[number] = (int16_t)rintf(r); 00117 } 00118 } 00119 #endif /* LV_HAVE_SSE */ 00120 00121 #ifdef LV_HAVE_GENERIC 00122 /*! 00123 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00124 \param inputVector The floating point input data buffer 00125 \param outputVector The 16 bit output data buffer 00126 \param scalar The value multiplied against each point in the input buffer 00127 \param num_points The number of data values to be converted 00128 \note Input buffer does NOT need to be properly aligned 00129 */ 00130 static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00131 int16_t* outputVectorPtr = outputVector; 00132 const float* inputVectorPtr = inputVector; 00133 unsigned int number = 0; 00134 float min_val = -32768; 00135 float max_val = 32767; 00136 float r; 00137 00138 for(number = 0; number < num_points; number++){ 00139 r = *inputVectorPtr++ * scalar; 00140 if(r > max_val) 00141 r = max_val; 00142 else if(r < min_val) 00143 r = min_val; 00144 *outputVectorPtr++ = (int16_t)rintf(r); 00145 } 00146 } 00147 #endif /* LV_HAVE_GENERIC */ 00148 00149 00150 00151 00152 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */