GNU Radio 3.6.5 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H 00002 #define INCLUDED_volk_32f_s32f_convert_16i_a_H 00003 00004 #include <volk/volk_common.h> 00005 #include <inttypes.h> 00006 #include <stdio.h> 00007 #include <math.h> 00008 00009 #ifdef LV_HAVE_SSE2 00010 #include <emmintrin.h> 00011 /*! 00012 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00013 \param inputVector The floating point input data buffer 00014 \param outputVector The 16 bit output data buffer 00015 \param scalar The value multiplied against each point in the input buffer 00016 \param num_points The number of data values to be converted 00017 */ 00018 static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00019 unsigned int number = 0; 00020 00021 const unsigned int eighthPoints = num_points / 8; 00022 00023 const float* inputVectorPtr = (const float*)inputVector; 00024 int16_t* outputVectorPtr = outputVector; 00025 00026 float min_val = -32768; 00027 float max_val = 32767; 00028 float r; 00029 00030 __m128 vScalar = _mm_set_ps1(scalar); 00031 __m128 inputVal1, inputVal2; 00032 __m128i intInputVal1, intInputVal2; 00033 __m128 ret1, ret2; 00034 __m128 vmin_val = _mm_set_ps1(min_val); 00035 __m128 vmax_val = _mm_set_ps1(max_val); 00036 00037 for(;number < eighthPoints; number++){ 00038 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00039 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00040 00041 // Scale and clip 00042 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00043 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00044 00045 intInputVal1 = _mm_cvtps_epi32(ret1); 00046 intInputVal2 = _mm_cvtps_epi32(ret2); 00047 00048 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00049 00050 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); 00051 outputVectorPtr += 8; 00052 } 00053 00054 number = eighthPoints * 8; 00055 for(; number < num_points; number++){ 00056 r = inputVector[number] * scalar; 00057 if(r > max_val) 00058 r = max_val; 00059 else if(r < min_val) 00060 r = min_val; 00061 outputVector[number] = (int16_t)rintf(r); 00062 } 00063 } 00064 #endif /* LV_HAVE_SSE2 */ 00065 00066 #ifdef LV_HAVE_SSE 00067 #include <xmmintrin.h> 00068 /*! 00069 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00070 \param inputVector The floating point input data buffer 00071 \param outputVector The 16 bit output data buffer 00072 \param scalar The value multiplied against each point in the input buffer 00073 \param num_points The number of data values to be converted 00074 */ 00075 static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00076 unsigned int number = 0; 00077 00078 const unsigned int quarterPoints = num_points / 4; 00079 00080 const float* inputVectorPtr = (const float*)inputVector; 00081 int16_t* outputVectorPtr = outputVector; 00082 00083 float min_val = -32768; 00084 float max_val = 32767; 00085 float r; 00086 00087 __m128 vScalar = _mm_set_ps1(scalar); 00088 __m128 ret; 00089 __m128 vmin_val = _mm_set_ps1(min_val); 00090 __m128 vmax_val = _mm_set_ps1(max_val); 00091 00092 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00093 00094 for(;number < quarterPoints; number++){ 00095 ret = _mm_load_ps(inputVectorPtr); 00096 inputVectorPtr += 4; 00097 00098 // Scale and clip 00099 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00100 00101 _mm_store_ps(outputFloatBuffer, ret); 00102 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); 00103 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); 00104 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); 00105 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); 00106 } 00107 00108 number = quarterPoints * 4; 00109 for(; number < num_points; number++){ 00110 r = inputVector[number] * scalar; 00111 if(r > max_val) 00112 r = max_val; 00113 else if(r < min_val) 00114 r = min_val; 00115 outputVector[number] = (int16_t)rintf(r); 00116 } 00117 } 00118 #endif /* LV_HAVE_SSE */ 00119 00120 #ifdef LV_HAVE_GENERIC 00121 /*! 00122 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00123 \param inputVector The floating point input data buffer 00124 \param outputVector The 16 bit output data buffer 00125 \param scalar The value multiplied against each point in the input buffer 00126 \param num_points The number of data values to be converted 00127 */ 00128 static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00129 int16_t* outputVectorPtr = outputVector; 00130 const float* inputVectorPtr = inputVector; 00131 unsigned int number = 0; 00132 float min_val = -32768; 00133 float max_val = 32767; 00134 float r; 00135 00136 for(number = 0; number < num_points; number++){ 00137 r = *inputVectorPtr++ * scalar; 00138 if(r < min_val) 00139 r = min_val; 00140 else if(r > max_val) 00141 r = max_val; 00142 *outputVectorPtr++ = (int16_t)rintf(r); 00143 } 00144 } 00145 #endif /* LV_HAVE_GENERIC */ 00146 00147 00148 00149 00150 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */